# Natural Language Toolkit: Text Trees # # Copyright (C) 2001-2022 NLTK Project # Author: Edward Loper # Steven Bird # Peter Ljunglöf # Tom Aarsen <> # URL: # For license information, see LICENSE.TXT import warnings from abc import ABCMeta, abstractmethod from nltk.tree.tree import Tree from nltk.util import slice_bounds ###################################################################### ## Parented trees ###################################################################### class AbstractParentedTree(Tree, metaclass=ABCMeta): """ An abstract base class for a ``Tree`` that automatically maintains pointers to parent nodes. These parent pointers are updated whenever any change is made to a tree's structure. Two subclasses are currently defined: - ``ParentedTree`` is used for tree structures where each subtree has at most one parent. This class should be used in cases where there is no"sharing" of subtrees. - ``MultiParentedTree`` is used for tree structures where a subtree may have zero or more parents. This class should be used in cases where subtrees may be shared. Subclassing =========== The ``AbstractParentedTree`` class redefines all operations that modify a tree's structure to call two methods, which are used by subclasses to update parent information: - ``_setparent()`` is called whenever a new child is added. - ``_delparent()`` is called whenever a child is removed. """ def __init__(self, node, children=None): super().__init__(node, children) # If children is None, the tree is read from node, and # all parents will be set during parsing. if children is not None: # Otherwise we have to set the parent of the children. # Iterate over self, and *not* children, because children # might be an iterator. for i, child in enumerate(self): if isinstance(child, Tree): self._setparent(child, i, dry_run=True) for i, child in enumerate(self): if isinstance(child, Tree): self._setparent(child, i) # //////////////////////////////////////////////////////////// # Parent management # //////////////////////////////////////////////////////////// @abstractmethod def _setparent(self, child, index, dry_run=False): """ Update the parent pointer of ``child`` to point to ``self``. This method is only called if the type of ``child`` is ``Tree``; i.e., it is not called when adding a leaf to a tree. This method is always called before the child is actually added to the child list of ``self``. :type child: Tree :type index: int :param index: The index of ``child`` in ``self``. :raise TypeError: If ``child`` is a tree with an impropriate type. Typically, if ``child`` is a tree, then its type needs to match the type of ``self``. This prevents mixing of different tree types (single-parented, multi-parented, and non-parented). :param dry_run: If true, the don't actually set the child's parent pointer; just check for any error conditions, and raise an exception if one is found. """ @abstractmethod def _delparent(self, child, index): """ Update the parent pointer of ``child`` to not point to self. This method is only called if the type of ``child`` is ``Tree``; i.e., it is not called when removing a leaf from a tree. This method is always called before the child is actually removed from the child list of ``self``. :type child: Tree :type index: int :param index: The index of ``child`` in ``self``. """ # //////////////////////////////////////////////////////////// # Methods that add/remove children # //////////////////////////////////////////////////////////// # Every method that adds or removes a child must make # appropriate calls to _setparent() and _delparent(). def __delitem__(self, index): # del ptree[start:stop] if isinstance(index, slice): start, stop, step = slice_bounds(self, index, allow_step=True) # Clear all the children pointers. for i in range(start, stop, step): if isinstance(self[i], Tree): self._delparent(self[i], i) # Delete the children from our child list. super().__delitem__(index) # del ptree[i] elif isinstance(index, int): if index < 0: index += len(self) if index < 0: raise IndexError("index out of range") # Clear the child's parent pointer. if isinstance(self[index], Tree): self._delparent(self[index], index) # Remove the child from our child list. super().__delitem__(index) elif isinstance(index, (list, tuple)): # del ptree[()] if len(index) == 0: raise IndexError("The tree position () may not be deleted.") # del ptree[(i,)] elif len(index) == 1: del self[index[0]] # del ptree[i1, i2, i3] else: del self[index[0]][index[1:]] else: raise TypeError( "%s indices must be integers, not %s" % (type(self).__name__, type(index).__name__) ) def __setitem__(self, index, value): # ptree[start:stop] = value if isinstance(index, slice): start, stop, step = slice_bounds(self, index, allow_step=True) # make a copy of value, in case it's an iterator if not isinstance(value, (list, tuple)): value = list(value) # Check for any error conditions, so we can avoid ending # up in an inconsistent state if an error does occur. for i, child in enumerate(value): if isinstance(child, Tree): self._setparent(child, start + i * step, dry_run=True) # clear the child pointers of all parents we're removing for i in range(start, stop, step): if isinstance(self[i], Tree): self._delparent(self[i], i) # set the child pointers of the new children. We do this # after clearing *all* child pointers, in case we're e.g. # reversing the elements in a tree. for i, child in enumerate(value): if isinstance(child, Tree): self._setparent(child, start + i * step) # finally, update the content of the child list itself. super().__setitem__(index, value) # ptree[i] = value elif isinstance(index, int): if index < 0: index += len(self) if index < 0: raise IndexError("index out of range") # if the value is not changing, do nothing. if value is self[index]: return # Set the new child's parent pointer. if isinstance(value, Tree): self._setparent(value, index) # Remove the old child's parent pointer if isinstance(self[index], Tree): self._delparent(self[index], index) # Update our child list. super().__setitem__(index, value) elif isinstance(index, (list, tuple)): # ptree[()] = value if len(index) == 0: raise IndexError("The tree position () may not be assigned to.") # ptree[(i,)] = value elif len(index) == 1: self[index[0]] = value # ptree[i1, i2, i3] = value else: self[index[0]][index[1:]] = value else: raise TypeError( "%s indices must be integers, not %s" % (type(self).__name__, type(index).__name__) ) def append(self, child): if isinstance(child, Tree): self._setparent(child, len(self)) super().append(child) def extend(self, children): for child in children: if isinstance(child, Tree): self._setparent(child, len(self)) super().append(child) def insert(self, index, child): # Handle negative indexes. Note that if index < -len(self), # we do *not* raise an IndexError, unlike __getitem__. This # is done for consistency with list.__getitem__ and list.index. if index < 0: index += len(self) if index < 0: index = 0 # Set the child's parent, and update our child list. if isinstance(child, Tree): self._setparent(child, index) super().insert(index, child) def pop(self, index=-1): if index < 0: index += len(self) if index < 0: raise IndexError("index out of range") if isinstance(self[index], Tree): self._delparent(self[index], index) return super().pop(index) # n.b.: like `list`, this is done by equality, not identity! # To remove a specific child, use del ptree[i]. def remove(self, child): index = self.index(child) if isinstance(self[index], Tree): self._delparent(self[index], index) super().remove(child) # We need to implement __getslice__ and friends, even though # they're deprecated, because otherwise list.__getslice__ will get # called (since we're subclassing from list). Just delegate to # __getitem__ etc., but use max(0, start) and max(0, stop) because # because negative indices are already handled *before* # __getslice__ is called; and we don't want to double-count them. if hasattr(list, "__getslice__"): def __getslice__(self, start, stop): return self.__getitem__(slice(max(0, start), max(0, stop))) def __delslice__(self, start, stop): return self.__delitem__(slice(max(0, start), max(0, stop))) def __setslice__(self, start, stop, value): return self.__setitem__(slice(max(0, start), max(0, stop)), value) def __getnewargs__(self): """Method used by the pickle module when un-pickling. This method provides the arguments passed to ``__new__`` upon un-pickling. Without this method, ParentedTree instances cannot be pickled and unpickled in Python 3.7+ onwards. :return: Tuple of arguments for ``__new__``, i.e. the label and the children of this node. :rtype: Tuple[Any, List[AbstractParentedTree]] """ return (self._label, list(self)) class ParentedTree(AbstractParentedTree): """ A ``Tree`` that automatically maintains parent pointers for single-parented trees. The following are methods for querying the structure of a parented tree: ``parent``, ``parent_index``, ``left_sibling``, ``right_sibling``, ``root``, ``treeposition``. Each ``ParentedTree`` may have at most one parent. In particular, subtrees may not be shared. Any attempt to reuse a single ``ParentedTree`` as a child of more than one parent (or as multiple children of the same parent) will cause a ``ValueError`` exception to be raised. ``ParentedTrees`` should never be used in the same tree as ``Trees`` or ``MultiParentedTrees``. Mixing tree implementations may result in incorrect parent pointers and in ``TypeError`` exceptions. """ def __init__(self, node, children=None): self._parent = None """The parent of this Tree, or None if it has no parent.""" super().__init__(node, children) if children is None: # If children is None, the tree is read from node. # After parsing, the parent of the immediate children # will point to an intermediate tree, not self. # We fix this by brute force: for i, child in enumerate(self): if isinstance(child, Tree): child._parent = None self._setparent(child, i) def _frozen_class(self): from nltk.tree.immutable import ImmutableParentedTree return ImmutableParentedTree def copy(self, deep=False): if not deep: warnings.warn( f"{self.__class__.__name__} objects do not support shallow copies. Defaulting to a deep copy." ) return super().copy(deep=True) # ///////////////////////////////////////////////////////////////// # Methods # ///////////////////////////////////////////////////////////////// def parent(self): """The parent of this tree, or None if it has no parent.""" return self._parent def parent_index(self): """ The index of this tree in its parent. I.e., ``ptree.parent()[ptree.parent_index()] is ptree``. Note that ``ptree.parent_index()`` is not necessarily equal to ``ptree.parent.index(ptree)``, since the ``index()`` method returns the first child that is equal to its argument. """ if self._parent is None: return None for i, child in enumerate(self._parent): if child is self: return i assert False, "expected to find self in self._parent!" def left_sibling(self): """The left sibling of this tree, or None if it has none.""" parent_index = self.parent_index() if self._parent and parent_index > 0: return self._parent[parent_index - 1] return None # no left sibling def right_sibling(self): """The right sibling of this tree, or None if it has none.""" parent_index = self.parent_index() if self._parent and parent_index < (len(self._parent) - 1): return self._parent[parent_index + 1] return None # no right sibling def root(self): """ The root of this tree. I.e., the unique ancestor of this tree whose parent is None. If ``ptree.parent()`` is None, then ``ptree`` is its own root. """ root = self while root.parent() is not None: root = root.parent() return root def treeposition(self): """ The tree position of this tree, relative to the root of the tree. I.e., ``ptree.root[ptree.treeposition] is ptree``. """ if self.parent() is None: return () else: return self.parent().treeposition() + (self.parent_index(),) # ///////////////////////////////////////////////////////////////// # Parent Management # ///////////////////////////////////////////////////////////////// def _delparent(self, child, index): # Sanity checks assert isinstance(child, ParentedTree) assert self[index] is child assert child._parent is self # Delete child's parent pointer. child._parent = None def _setparent(self, child, index, dry_run=False): # If the child's type is incorrect, then complain. if not isinstance(child, ParentedTree): raise TypeError("Can not insert a non-ParentedTree into a ParentedTree") # If child already has a parent, then complain. if hasattr(child, "_parent") and child._parent is not None: raise ValueError("Can not insert a subtree that already has a parent.") # Set child's parent pointer & index. if not dry_run: child._parent = self class MultiParentedTree(AbstractParentedTree): """ A ``Tree`` that automatically maintains parent pointers for multi-parented trees. The following are methods for querying the structure of a multi-parented tree: ``parents()``, ``parent_indices()``, ``left_siblings()``, ``right_siblings()``, ``roots``, ``treepositions``. Each ``MultiParentedTree`` may have zero or more parents. In particular, subtrees may be shared. If a single ``MultiParentedTree`` is used as multiple children of the same parent, then that parent will appear multiple times in its ``parents()`` method. ``MultiParentedTrees`` should never be used in the same tree as ``Trees`` or ``ParentedTrees``. Mixing tree implementations may result in incorrect parent pointers and in ``TypeError`` exceptions. """ def __init__(self, node, children=None): self._parents = [] """A list of this tree's parents. This list should not contain duplicates, even if a parent contains this tree multiple times.""" super().__init__(node, children) if children is None: # If children is None, the tree is read from node. # After parsing, the parent(s) of the immediate children # will point to an intermediate tree, not self. # We fix this by brute force: for i, child in enumerate(self): if isinstance(child, Tree): child._parents = [] self._setparent(child, i) def _frozen_class(self): from nltk.tree.immutable import ImmutableMultiParentedTree return ImmutableMultiParentedTree # ///////////////////////////////////////////////////////////////// # Methods # ///////////////////////////////////////////////////////////////// def parents(self): """ The set of parents of this tree. If this tree has no parents, then ``parents`` is the empty set. To check if a tree is used as multiple children of the same parent, use the ``parent_indices()`` method. :type: list(MultiParentedTree) """ return list(self._parents) def left_siblings(self): """ A list of all left siblings of this tree, in any of its parent trees. A tree may be its own left sibling if it is used as multiple contiguous children of the same parent. A tree may appear multiple times in this list if it is the left sibling of this tree with respect to multiple parents. :type: list(MultiParentedTree) """ return [ parent[index - 1] for (parent, index) in self._get_parent_indices() if index > 0 ] def right_siblings(self): """ A list of all right siblings of this tree, in any of its parent trees. A tree may be its own right sibling if it is used as multiple contiguous children of the same parent. A tree may appear multiple times in this list if it is the right sibling of this tree with respect to multiple parents. :type: list(MultiParentedTree) """ return [ parent[index + 1] for (parent, index) in self._get_parent_indices() if index < (len(parent) - 1) ] def _get_parent_indices(self): return [ (parent, index) for parent in self._parents for index, child in enumerate(parent) if child is self ] def roots(self): """ The set of all roots of this tree. This set is formed by tracing all possible parent paths until trees with no parents are found. :type: list(MultiParentedTree) """ return list(self._get_roots_helper({}).values()) def _get_roots_helper(self, result): if self._parents: for parent in self._parents: parent._get_roots_helper(result) else: result[id(self)] = self return result def parent_indices(self, parent): """ Return a list of the indices where this tree occurs as a child of ``parent``. If this child does not occur as a child of ``parent``, then the empty list is returned. The following is always true:: for parent_index in ptree.parent_indices(parent): parent[parent_index] is ptree """ if parent not in self._parents: return [] else: return [index for (index, child) in enumerate(parent) if child is self] def treepositions(self, root): """ Return a list of all tree positions that can be used to reach this multi-parented tree starting from ``root``. I.e., the following is always true:: for treepos in ptree.treepositions(root): root[treepos] is ptree """ if self is root: return [()] else: return [ treepos + (index,) for parent in self._parents for treepos in parent.treepositions(root) for (index, child) in enumerate(parent) if child is self ] # ///////////////////////////////////////////////////////////////// # Parent Management # ///////////////////////////////////////////////////////////////// def _delparent(self, child, index): # Sanity checks assert isinstance(child, MultiParentedTree) assert self[index] is child assert len([p for p in child._parents if p is self]) == 1 # If the only copy of child in self is at index, then delete # self from child's parent list. for i, c in enumerate(self): if c is child and i != index: break else: child._parents.remove(self) def _setparent(self, child, index, dry_run=False): # If the child's type is incorrect, then complain. if not isinstance(child, MultiParentedTree): raise TypeError( "Can not insert a non-MultiParentedTree into a MultiParentedTree" ) # Add self as a parent pointer if it's not already listed. if not dry_run: for parent in child._parents: if parent is self: break else: child._parents.append(self) __all__ = [ "ParentedTree", "MultiParentedTree", ]