Source code for glue.ligolw.table

# Copyright (C) 2006--2016  Kipp Cannon
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.


#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#


"""
While the ligolw module provides classes and parser support for reading and
writing LIGO Light Weight XML documents, this module supplements that code
with classes and parsers that add intelligence to the in-RAM document
representation.

In particular, the document tree associated with a Table element is
enhanced.  During parsing, the Stream element in this module converts the
character data contained within it into a list of objects.  The list
contains one object for each row of the table, and the objects' attributes
are the names of the table's columns.  When the document is written out
again, the Stream element serializes the row objects back into character
data.

The Table element exports a list-like interface to the rows.  The Column
elements also provide list-like access to the values in the corresponding
columns of the table.
"""


import copy
import itertools
import re
from six.moves import zip
import sys
from xml.sax.saxutils import escape as xmlescape
from xml.sax.xmlreader import AttributesImpl


from glue import git_version
from . import ligolw
from . import tokenizer
from . import types as ligolwtypes


__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
__version__ = "git id %s" % git_version.id
__date__ = git_version.date


#
# =============================================================================
#
#                                  Utilities
#
# =============================================================================
#


def get_table(xmldoc, name):
	"""
	Scan xmldoc for a Table element named name.  Raises ValueError if
	not exactly 1 such table is found.

	NOTE:  if a Table sub-class has its .tableName attribute set, then
	its .get_table() class method can be used instead.  This is true
	for all Table classes in the glue.ligolw.lsctables module, and it
	is recommended to always use the .get_table() class method of those
	classes to retrieve those standard tables instead of calling this
	function and passing the .tableName attribute.  The example below
	shows both techniques.

	Example:

	>>> from . import ligolw, lsctables
	>>> xmldoc = ligolw.Document()
	>>> xmldoc.appendChild(ligolw.LIGO_LW()).appendChild(lsctables.New(lsctables.SnglInspiralTable))
	[]
	>>> # find table with this function
	>>> sngl_inspiral_table = get_table(xmldoc, lsctables.SnglInspiralTable.tableName)
	>>> # find table with .get_table() class method (preferred)
	>>> sngl_inspiral_table = lsctables.SnglInspiralTable.get_table(xmldoc)

	See also the .get_table() class method of the Table class.
	"""
	tables = Table.getTablesByName(xmldoc, name)
	if len(tables) != 1:
		raise ValueError("document must contain exactly one %s table" % Table.TableName(name))
	return tables[0]


def reassign_ids(elem):
	"""
	Recurses over all Table elements below elem whose next_id
	attributes are not None, and uses the .get_next_id() method of each
	of those Tables to generate and assign new IDs to their rows.  The
	modifications are recorded, and finally all ID attributes in all
	rows of all tables are updated to fix cross references to the
	modified IDs.

	This function is used by ligolw_add to assign new IDs to rows when
	merging documents in order to make sure there are no ID collisions.
	Using this function in this way requires the .get_next_id() methods
	of all Table elements to yield unused IDs, otherwise collisions
	will result anyway.  See the .sync_next_id() method of the Table
	class for a way to initialize the .next_id attributes so that
	collisions will not occur.

	Example:

	>>> from . import ligolw, lsctables
	>>> xmldoc = ligolw.Document()
	>>> xmldoc.appendChild(ligolw.LIGO_LW()).appendChild(lsctables.New(lsctables.SnglInspiralTable))
	[]
	>>> reassign_ids(xmldoc)
	"""
	mapping = {}
	for tbl in elem.getElementsByTagName(ligolw.Table.tagName):
		if tbl.next_id is not None:
			tbl.updateKeyMapping(mapping)
	for tbl in elem.getElementsByTagName(ligolw.Table.tagName):
		tbl.applyKeyMapping(mapping)


#
# =============================================================================
#
#                                Column Element
#
# =============================================================================
#


class Column(ligolw.Column):
	"""
	High-level column element that provides list-like access to the
	values in a column.

	Example:

	>>> from xml.sax.xmlreader import AttributesImpl
	>>> import sys
	>>> tbl = Table(AttributesImpl({u"Name": u"test"}))
	>>> col = tbl.appendChild(Column(AttributesImpl({u"Name": u"test:snr", u"Type": u"real_8"})))
	>>> tbl.appendChild(TableStream(AttributesImpl({u"Name": u"test"})))	# doctest: +ELLIPSIS
	<glue.ligolw.table.TableStream object at ...>
	>>> tbl._update_column_info()
	>>> assert col.Name == 'snr'
	>>> assert col.Type == 'real_8'
	>>> # append 3 rows (with nothing in them)
	>>> tbl.append(tbl.RowType())
	>>> tbl.append(tbl.RowType())
	>>> tbl.append(tbl.RowType())
	>>> # assign values to the rows, in order, in this column
	>>> col[:] = [8.0, 10.0, 12.0]
	>>> col[:]
	[8.0, 10.0, 12.0]
	>>> col.asarray()	# doctest: +NORMALIZE_WHITESPACE
	array([  8.,  10.,  12.])
	>>> tbl.write(sys.stdout)	# doctest: +NORMALIZE_WHITESPACE
	<Table Name="test">
		<Column Name="test:snr" Type="real_8"/>
		<Stream Name="test">
			8,
			10,
			12
		</Stream>
	</Table>
	>>> col.index(10)
	1
	>>> 12 in col
	True
	>>> col[0] = 9.
	>>> col[1] = 9.
	>>> col[2] = 9.
	>>> tbl.write(sys.stdout)		# doctest: +NORMALIZE_WHITESPACE
	<Table Name="test">
		<Column Name="test:snr" Type="real_8"/>
		<Stream Name="test">
			9,
			9,
			9
		</Stream>
	</Table>
	>>> col.count(9)
	3

	NOTE:  the .Name attribute returns the stripped "Name" attribute of
	the element, e.g. with the table suffix removed, but when assigning
	to the .Name attribute the value provided is stored without
	modification, i.e. there is no attempt to reattach the table's name
	to the string.  The calling code is responsible for doing the
	correct manipulations.  Therefore, the assignment operation below

	>>> assert col.Name, col.getAttribute("Name") == ('snr', 'test:snr')
	>>> col.Name = col.Name
	>>> assert col.Name, col.getAttribute("Name") == ('snr', 'snr')

	does not preserve the value of the "Name" attribute (though it does
	preserve the stripped form reported by the .Name property).  This
	asymmetry is necessary because the correct table name string to
	reattach to the attribute's value cannot always be known, e.g., if
	the Column object is not part of an XML tree and does not have a
	parent node.
	"""
	# FIXME: the pattern should be
	#
	# r"(?:\A[a-z0-9_]+:|\A)(?P<FullName>(?:[a-z0-9_]+:|\A)(?P<Name>[a-z0-9_]+))\Z"
	#
	# but people are putting upper case letters in names!!!!!  Someone
	# is going to get the beats.  There is a reason for requiring names
	# to be all lower case:  SQL table and column names are case
	# insensitive, therefore (i) when converting a document to SQL the
	# columns "Rho" and "rho" would become indistinguishable and so it
	# would be impossible to convert a document with columns having
	# names like this into an SQL database;  and (ii) even if that
	# degeneracy is not encountered the case cannot be preserved and so
	# when converting back to XML the correct capitalization is lost.
	# Requiring names to be all lower-case creates the same
	# degeneracies in XML representations that exist in SQL
	# representations ensuring compatibility and defines the correct
	# case to restore the names to when converting to XML.  Other rules
	# can be imagined that would work as well, this is the one that got
	# chosen.
	class ColumnName(ligolw.LLWNameAttr):
		dec_pattern = re.compile(r"(?:\A\w+:|\A)(?P<FullName>(?:(?P<Table>\w+):|\A)(?P<Name>\w+))\Z")
		enc_pattern = u"%s"

	Name = ligolw.attributeproxy(u"Name", enc = ColumnName.enc, dec = ColumnName)

	def __len__(self):
		"""
		The number of values in this column.
		"""
		return len(self.parentNode)

	def __getitem__(self, i):
		"""
		Retrieve the value in this column in row i.
		"""
		if isinstance(i, slice):
			return [getattr(r, self.Name) for r in self.parentNode[i]]
		else:
			return getattr(self.parentNode[i], self.Name)

	def __setitem__(self, i, value):
		"""
		Set the value in this column in row i.  i may be a slice.

		NOTE:  Unlike normal Python lists, the length of the Column
		cannot be changed as it is tied to the number of rows in
		the Table.  Therefore, if i is a slice, value should be an
		iterable with exactly the correct number of items.  No
		check is performed to ensure that this is true:  if value
		contains too many items the extras will be ignored, and if
		value contains too few items only as many rows will be
		updated as there are items.
		"""
		if isinstance(i, slice):
			for r, val in zip(self.parentNode[i], value):
				setattr(r, self.Name, val)
		else:
			setattr(self.parentNode[i], self.Name, value)

	def __delitem__(self, *args):
		raise NotImplementedError

	def __iter__(self):
		"""
		Return an iterator object for iterating over values in this
		column.
		"""
		for row in self.parentNode:
			yield getattr(row, self.Name)

	def count(self, value):
		"""
		Return the number of rows with this column equal to value.
		"""
		return sum(x == value for x in self)

	def index(self, value):
		"""
		Return the smallest index of the row(s) with this column
		equal to value.
		"""
		for i, x in enumerate(self):
			if x == value:
				return i
		raise ValueError(value)

	def __contains__(self, value):
		"""
		Returns True or False if there is or is not, respectively,
		a row containing val in this column.
		"""
		return value in iter(self)

	def asarray(self):
		"""
		Construct a numpy array from this column.  Note that this
		creates a copy of the data, so modifications made to the
		array will *not* be recorded in the original document.
		"""
		# most codes don't use this feature, this is the only place
		# numpy is used here, and importing numpy can be
		# time-consuming, so we derfer the import until needed.
		import numpy
		try:
			dtype = ligolwtypes.ToNumPyType[self.Type]
		except KeyError as e:
			raise TypeError("cannot determine numpy dtype for Column '%s': %s" % (self.getAttribute("Name"), e))
		return numpy.fromiter(self, dtype = dtype)

	@classmethod
	def getColumnsByName(cls, elem, name):
		"""
		Return a list of Column elements named name under elem.
		"""
		name = cls.ColumnName(name)
		return elem.getElements(lambda e: (e.tagName == cls.tagName) and (e.Name == name))


#
# =============================================================================
#
#                                Stream Element
#
# =============================================================================
#


#
# A subclass of tokenizer.RowBuilder that interns strings.
#


class InterningRowBuilder(tokenizer.RowBuilder):
	"""
	This subclass of the tokenizer.RowBuilder class respects the
	"interning" hints provided by table definitions, and attempts to
	replace the values of row attributes associated with interned
	columns with references to shared instances of those values.  This
	results in a reduction in memory use which is small for most
	documents, but can be subtantial when dealing with tables
	containing large volumes of repeated information.

	Example:

	>>> class Row(object):
	...	pass
	...
	>>> # 3rd arg is optional list of attributes to intern
	>>> rows = InterningRowBuilder(Row, ["name", "age"], ("name",))
	>>> l = list(rows.append(["Dick", 20., "Jane", 75., "Dick", 22.]))
	>>> l[0].name
	'Dick'
	>>> l[2].name
	'Dick'
	>>> l[2].name is l[0].name
	True

	Note that Python naturally interns short strings, so this example
	would return True regardless;  it is intended only to demonstrate
	the use of the class.

	The values are stored in a dictionary that is shared between all
	instances of this class, and which survives forever.  Nothing is
	ever naturally "uninterned", so the string dictionary grows without
	bound as more documents are processed.  This can be a problem in
	some use cases, and the work-around is to run

	>>> InterningRowBuilder.strings.clear()

	to reset the dictionary at appropriate points in the application.
	Typically this would be done immediately after each document is
	loaded.
	"""
	strings = {}
	def append(self, tokens):
		interns = self.interns
		setdefault = self.strings.setdefault
		for row in super(InterningRowBuilder, self).append(tokens):
			for col in interns:
				val = getattr(row, col)
				setattr(row, col, setdefault(val, val))
			yield row


#
# Stream class
#


class TableStream(ligolw.Stream):
	"""
	High-level Stream element for use inside Tables.  This element
	knows how to parse the delimited character stream into row objects
	that it appends into the list-like parent element, and knows how to
	turn the parent's rows back into a character stream.
	"""
	#
	# Select the RowBuilder class to use when parsing tables.
	#

	RowBuilder = tokenizer.RowBuilder

	def config(self, parentNode):
		# some initialization that requires access to the
		# parentNode, and so cannot be done inside the __init__()
		# function.
		loadcolumns = set(parentNode.columnnames)
		if parentNode.loadcolumns is not None:
			# FIXME:  convert loadcolumns attributes to sets to
			# avoid the conversion.
			loadcolumns &= set(parentNode.loadcolumns)
		self._tokenizer = tokenizer.Tokenizer(self.Delimiter)
		self._tokenizer.set_types([(pytype if colname in loadcolumns else None) for pytype, colname in zip(parentNode.columnpytypes, parentNode.columnnames)])
		columnnames = [name for name in parentNode.columnnames if name in loadcolumns]
		# FIXME:  convert interncolumns attributes to sets to
		# simplify computing the intersection
		interncolumns = [name for name in (parentNode.interncolumns or set()) if name in columnnames]
		self._rowbuilder = self.RowBuilder(parentNode.RowType, columnnames, interncolumns)
		return self

	def appendData(self, content):
		# tokenize buffer, pack into row objects, and append to
		# table
		appendfunc = self.parentNode.append
		for row in self._rowbuilder.append(self._tokenizer.append(content)):
			appendfunc(row)

	def endElement(self):
		# stream tokenizer uses delimiter to identify end of each
		# token, so add a final delimiter to induce the last token
		# to get parsed but only if there's something other than
		# whitespace left in the tokenizer's buffer.  the writing
		# code will have put a final delimiter into the stream if
		# the final token was pure whitespace in order to
		# unambiguously indicate that token's presence
		if not self._tokenizer.data.isspace():
			self.appendData(self.Delimiter)
		# now we're done with these
		del self._tokenizer
		del self._rowbuilder
		# call parent's _end_of_rows() hook.
		self.parentNode._end_of_rows()

	def write(self, fileobj = sys.stdout, indent = u""):
		# retrieve the .write() method of the file object to avoid
		# doing the attribute lookup in loops
		w = fileobj.write
		# loop over parent's rows.  This is complicated because we
		# need to not put a delimiter at the end of the last row
		# unless it ends with a null token
		w(self.start_tag(indent))
		rowdumper = tokenizer.RowDumper(self.parentNode.columnnames, [ligolwtypes.FormatFunc[coltype] for coltype in self.parentNode.columntypes], self.Delimiter)
		rowdumper.dump(self.parentNode)
		try:
			line = next(rowdumper)
		except StopIteration:
			# table is empty
			pass
		else:
			# write first row
			newline = u"\n" + indent + ligolw.Indent
			w(newline)
			# the xmlescape() call replaces things like "<"
			# with "&lt;" so that the string will not confuse
			# an XML parser when the file is read.  turning
			# "&lt;" back into "<" during file reading is
			# handled by the XML parser, so there is no code
			# in Glue related to that.
			w(xmlescape(line))
			# now add delimiter and write the remaining rows
			newline = rowdumper.delimiter + newline
			for line in rowdumper:
				w(newline)
				w(xmlescape(line))
			if rowdumper.tokens and rowdumper.tokens[-1] == u"":
				# the last token of the last row was null:
				# add a final delimiter to indicate that a
				# token is present
				w(rowdumper.delimiter)
		w(u"\n" + self.end_tag(indent) + u"\n")


#
# =============================================================================
#
#                                Table Element
#
# =============================================================================
#


class Table(ligolw.Table, list):
	"""
	High-level Table element that knows about its columns and rows.
	"""
	class TableName(ligolw.LLWNameAttr):
		dec_pattern = re.compile(r"(?:\A[a-z0-9_]+:|\A)(?P<Name>[a-z0-9_]+):table\Z")
		enc_pattern = u"%s:table"

	Name = ligolw.attributeproxy(u"Name", enc = TableName.enc, dec = TableName)

	validcolumns = None
	loadcolumns = None
	interncolumns = None
	constraints = None
	how_to_index = None
	next_id = None

	class RowType(object):
		"""
		Helpful parent class for row objects.  Also used as the
		default row class by Table instances.  Provides an
		__init__() method that accepts keyword arguments from which
		the object's attributes are initialized.

		Example:

		>>> x = Table.RowType(a = 0.0, b = "test", c = True)
		>>> x.a
		0.0
		>>> x.b
		'test'
		>>> x.c
		True

		Also provides .__getstate__() and .__setstate__() methods
		to allow row objects to be pickled (otherwise, because they
		all use __slots__ to reduce their memory footprint, they
		aren't pickleable).
		"""
		def __init__(self, **kwargs):
			for key, value in kwargs.items():
				setattr(self, key, value)

		def __getstate__(self):
			return dict((key, getattr(self, key)) for key in self.__slots__ if hasattr(self, key))

		def __setstate__(self, state):
			self.__init__(**state)


	def __init__(self, *args):
		"""
		Initialize
		"""
		super(Table, self).__init__(*args)
		self.columnnames = []
		self.columntypes = []
		self.columnpytypes = []


	#
	# Table retrieval
	#


	@classmethod
	def getTablesByName(cls, elem, name):
		"""
		Return a list of Table elements named name under elem.
		"""
		name = cls.TableName(name)
		return elem.getElements(lambda e: (e.tagName == cls.tagName) and (e.Name == name))

	@classmethod
	def get_table(cls, xmldoc):
		"""
		Equivalent to the module-level function get_table(), but
		uses the .tableName attribute of this class to provide the
		name of the table to search for.  The Table parent class
		does not provide a .tableName attribute, but sub-classes,
		especially those in lsctables.py, do provide a value for
		that attribute, and in those cases this class method
		provides a cleaner way to retrieve them.

		Example:

		>>> from . import ligolw, lsctables
		>>> xmldoc = ligolw.Document()
		>>> xmldoc.appendChild(ligolw.LIGO_LW()).appendChild(lsctables.New(lsctables.SnglInspiralTable))
		[]
		>>> sngl_inspiral_table = lsctables.SnglInspiralTable.get_table(xmldoc)
		"""
		return get_table(xmldoc, cls.tableName)

	def copy(self):
		"""
		Construct and return a new Table document subtree whose
		structure is the same as this table, that is it has the
		same columns etc..  The rows are not copied.  Note that a
		fair amount of metadata is shared between the original and
		new tables.  In particular, a copy of the Table object
		itself is created (but with no rows), and copies of the
		child nodes are created.  All other object references are
		shared between the two instances, such as the RowType
		attribute on the Table object.
		"""
		new = copy.copy(self)
		new.childNodes = []	# got reference to original list
		for elem in self.childNodes:
			new.appendChild(copy.copy(elem))
		del new[:]
		new._end_of_columns()
		new._end_of_rows()
		return new


	@classmethod
	def CheckElement(cls, elem):
		"""
		Return True if element is a Table element whose Name
		attribute matches the .tableName attribute of this class ;
		return False otherwise.  See also .CheckProperties().
		"""
		return cls.CheckProperties(elem.tagName, elem.attributes)


	@classmethod
	def CheckProperties(cls, tagname, attrs):
		"""
		Return True if tagname and attrs are the XML tag name and
		element attributes, respectively, of a Table element whose
		Name attribute matches the .tableName attribute of this
		class;  return False otherwise.  The Table parent class
		does not provide a .tableName attribute, but sub-classes,
		especially those in lsctables.py, do provide a value for
		that attribute.  See also .CheckElement()

		Example:

		>>> from . import lsctables
		>>> lsctables.ProcessTable.CheckProperties(u"Table", {u"Name": u"process:table"})
		True
		"""
		return tagname == cls.tagName and cls.TableName(attrs[u"Name"]) == cls.tableName


	#
	# Column access
	#

	def getColumnByName(self, name):
		"""
		Retrieve and return the Column child element named name.
		The comparison is done using the stripped names.  Raises
		KeyError if this table has no column by that name.

		Example:

		>>> from . import lsctables
		>>> tbl = lsctables.New(lsctables.SnglInspiralTable)
		>>> col = tbl.getColumnByName("mass1")
		"""
		try:
			col, = Column.getColumnsByName(self, name)
		except ValueError:
			# did not find exactly 1 matching child
			raise KeyError(name)
		return col


	def appendColumn(self, name):
		"""
		Append a Column element named "name" to the table.  Returns
		the new child.  Raises ValueError if the table already has
		a column by that name, and KeyError if the validcolumns
		attribute of this table does not contain an entry for a
		column by that name.

		Note that the name string is assumed to be "pre-stripped",
		that is it is the significant portion of the elements Name
		attribute.  The Column element's Name attribute will be
		constructed by pre-pending the stripped Table element's
		name and a colon.

		Example:

		>>> from . import lsctables
		>>> process_table = lsctables.New(lsctables.ProcessTable, [])
		>>> col = process_table.appendColumn("program")
		>>> assert col.getAttribute("Name") == 'process:program'
		>>> assert col.Name == 'program'
		"""
		try:
			self.getColumnByName(name)
			# if we get here the table already has that column
			raise ValueError("duplicate Column '%s'" % name)
		except KeyError:
			pass
		column = Column(AttributesImpl({u"Name": "%s:%s" % (self.Name, name), u"Type": self.validcolumns[name]}))
		streams = self.getElementsByTagName(ligolw.Stream.tagName)
		if streams:
			self.insertBefore(column, streams[0])
		else:
			self.appendChild(column)
		return column


	#
	# Row access
	#

	def appendRow(self, *args, **kwargs):
		"""
		Create and append a new row to this table, then return it

		All positional and keyword arguments are passed to the RowType
		constructor for this table.
		"""
		row = self.RowType(*args, **kwargs)
		self.append(row)
		return row


	#
	# Element methods
	#

	def _update_column_info(self):
		"""
		Used for validation during parsing, and additional
		book-keeping.  For internal use only.
		"""
		del self.columnnames[:]
		del self.columntypes[:]
		del self.columnpytypes[:]
		for child in self.getElementsByTagName(ligolw.Column.tagName):
			if self.validcolumns is not None:
				try:
					if self.validcolumns[child.Name] != child.Type:
						raise ligolw.ElementError("invalid type '%s' for Column '%s' in Table '%s', expected type '%s'" % (child.Type, child.getAttribute("Name"), self.getAttribute("Name"), self.validcolumns[child.Name]))
				except KeyError:
					raise ligolw.ElementError("invalid Column '%s' for Table '%s'" % (child.getAttribute("Name"), self.getAttribute("Name")))
			if child.Name in self.columnnames:
				raise ligolw.ElementError("duplicate Column '%s' in Table '%s'" % (child.getAttribute("Name"), self.getAttribute("Name")))
			self.columnnames.append(child.Name)
			self.columntypes.append(child.Type)
			try:
				self.columnpytypes.append(ligolwtypes.ToPyType[child.Type])
			except KeyError:
				raise ligolw.ElementError("unrecognized Type '%s' for Column '%s' in Table '%s'" % (child.Type, child.getAttribute("Name"), self.getAttribute("Name")))

	def _verifyChildren(self, i):
		"""
		Used for validation during parsing, and additional
		book-keeping.  For internal use only.
		"""
		super(Table, self)._verifyChildren(i)
		child = self.childNodes[i]
		if child.tagName == ligolw.Column.tagName:
			self._update_column_info()
		elif child.tagName == ligolw.Stream.tagName:
			# require agreement of non-stripped strings
			if child.getAttribute("Name") != self.getAttribute("Name"):
				raise ligolw.ElementError("Stream name '%s' does not match Table name '%s'" % (child.getAttribute("Name"), self.getAttribute("Name")))

	def _end_of_columns(self):
		"""
		Called during parsing to indicate that the last Column
		child element has been added.  Subclasses can override this
		to perform any special action that should occur following
		the addition of the last Column element.
		"""
		pass

	def _end_of_rows(self):
		"""
		Called during parsing to indicate that the last row has
		been added.  Subclasses can override this to perform any
		special action that should occur following the addition of
		the last row.
		"""
		pass

	def removeChild(self, child):
		"""
		Remove a child from this element.  The child element is
		returned, and it's parentNode element is reset.
		"""
		super(Table, self).removeChild(child)
		if child.tagName == ligolw.Column.tagName:
			self._update_column_info()
		return child

	def unlink(self):
		"""
		Break internal references within the document tree rooted
		on this element to promote garbage collection.
		"""
		super(Table, self).unlink()
		del self[:]

	def endElement(self):
		# Table elements are allowed to contain 0 Stream children,
		# but _end_of_columns() and _end_of_rows() hooks must be
		# called regardless, so we do that here if needed.
		if self.childNodes[-1].tagName != ligolw.Stream.tagName:
			self._end_of_columns()
			self._end_of_rows()

	#
	# Row ID manipulation
	#

	@classmethod
	def get_next_id(cls):
		"""
		Returns the current value of the next_id class attribute,
		and increments the next_id class attribute by 1.  Raises
		ValueError if the table does not have an ID generator
		associated with it.
		"""
		# = None if no ID generator
		id = cls.next_id
		cls.next_id += 1
		return id

	@classmethod
	def set_next_id(cls, id):
		"""
		Sets the value of the next_id class attribute.  This is a
		convenience function to help prevent accidentally assigning
		a value to an instance attribute instead of the class
		attribute.
		"""
		cls.next_id = id

	@classmethod
	def reset_next_id(cls):
		"""
		If the current value of the next_id class attribute is not
		None then set it to 0, otherwise it is left unmodified.

		Example:

		>>> from . import lsctables
		>>> for cls in lsctables.TableByName.values(): cls.reset_next_id()
		"""
		if cls.next_id is not None:
			cls.set_next_id(type(cls.next_id)(0))

	def sync_next_id(self):
		"""
		Determines the highest-numbered ID in this table, and sets
		the table's .next_id attribute to the next highest ID in
		sequence.  If the .next_id attribute is already set to a
		value greater than the highest value found, then it is left
		unmodified.  The return value is the ID identified by this
		method.  If the table's .next_id attribute is None, then
		this function is a no-op.

		Note that tables of the same name typically share a common
		.next_id attribute (it is a class attribute, not an
		attribute of each instance) so that IDs can be generated
		that are unique across all tables in the document.  Running
		sync_next_id() on all the tables in a document that are of
		the same type will have the effect of setting the ID to the
		next ID higher than any ID in any of those tables.

		Example:

		>>> from . import lsctables
		>>> tbl = lsctables.New(lsctables.ProcessTable)
		>>> print(tbl.sync_next_id())
		process:process_id:0
		"""
		if self.next_id is not None:
			if len(self):
				n = max(self.getColumnByName(self.next_id.column_name)) + 1
			else:
				n = type(self.next_id)(0)
			if n > self.next_id:
				self.set_next_id(n)
		return self.next_id

	def updateKeyMapping(self, mapping):
		"""
		Used as the first half of the row key reassignment
		algorithm.  Accepts a dictionary mapping old key --> new
		key.  Iterates over the rows in this table, using the
		table's next_id attribute to assign a new ID to each row,
		recording the changes in the mapping.  Returns the mapping.
		Raises ValueError if the table's next_id attribute is None.
		"""
		if self.next_id is None:
			raise ValueError(self)
		try:
			column = self.getColumnByName(self.next_id.column_name)
		except KeyError:
			# table is missing its ID column, this is a no-op
			return mapping
		for i, old in enumerate(column):
			if old is None:
				raise ValueError("null row ID encountered in Table '%s', row %d" % (self.getAttribute("Name"), i))
			if old in mapping:
				column[i] = mapping[old]
			else:
				column[i] = mapping[old] = self.get_next_id()
		return mapping

	def applyKeyMapping(self, mapping):
		"""
		Used as the second half of the key reassignment algorithm.
		Loops over each row in the table, replacing references to
		old row keys with the new values from the mapping.
		"""
		for coltype, colname in zip(self.columntypes, self.columnnames):
			if coltype in ligolwtypes.IDTypes and (self.next_id is None or colname != self.next_id.column_name):
				column = self.getColumnByName(colname)
				for i, old in enumerate(column):
					try:
						column[i] = mapping[old]
					except KeyError:
						pass


#
# =============================================================================
#
#                               Content Handler
#
# =============================================================================
#


#
# Override portions of a ligolw.LIGOLWContentHandler class
#


def use_in(ContentHandler):
	"""
	Modify ContentHandler, a sub-class of
	glue.ligolw.LIGOLWContentHandler, to cause it to use the Table,
	Column, and Stream classes defined in this module when parsing XML
	documents.

	Example:

	>>> from glue.ligolw import ligolw
	>>> class LIGOLWContentHandler(ligolw.LIGOLWContentHandler):
	...	pass
	...
	>>> use_in(LIGOLWContentHandler)
	<class 'glue.ligolw.table.LIGOLWContentHandler'>
	"""
	def startColumn(self, parent, attrs):
		return Column(attrs)

	def startStream(self, parent, attrs, __orig_startStream = ContentHandler.startStream):
		if parent.tagName == ligolw.Table.tagName:
			parent._end_of_columns()
			return TableStream(attrs).config(parent)
		return __orig_startStream(self, parent, attrs)

	def startTable(self, parent, attrs):
		return Table(attrs)

	ContentHandler.startColumn = startColumn
	ContentHandler.startStream = startStream
	ContentHandler.startTable = startTable

	return ContentHandler