""" Legacy module - don't use in new code! html5lib now has its own proper implementation. This module implements a tree builder for html5lib that generates lxml html element trees. This module uses camelCase as it follows the html5lib style guide. """ from html5lib.treebuilders import _base, etree as etree_builders from lxml import html, etree class DocumentType: def __init__(self, name, publicId, systemId): self.name = name self.publicId = publicId self.systemId = systemId class Document: def __init__(self): self._elementTree = None self.childNodes = [] def appendChild(self, element): self._elementTree.getroot().addnext(element._element) class TreeBuilder(_base.TreeBuilder): documentClass = Document doctypeClass = DocumentType elementClass = None commentClass = None fragmentClass = Document def __init__(self, *args, **kwargs): html_builder = etree_builders.getETreeModule(html, fullTree=False) etree_builder = etree_builders.getETreeModule(etree, fullTree=False) self.elementClass = html_builder.Element self.commentClass = etree_builder.Comment _base.TreeBuilder.__init__(self, *args, **kwargs) def reset(self): _base.TreeBuilder.reset(self) self.rootInserted = False self.initialComments = [] self.doctype = None def getDocument(self): return self.document._elementTree def getFragment(self): fragment = [] element = self.openElements[0]._element if element.text: fragment.append(element.text) fragment.extend(element.getchildren()) if element.tail: fragment.append(element.tail) return fragment def insertDoctype(self, name, publicId, systemId): doctype = self.doctypeClass(name, publicId, systemId) self.doctype = doctype def insertComment(self, data, parent=None): if not self.rootInserted: self.initialComments.append(data) else: _base.TreeBuilder.insertComment(self, data, parent) def insertRoot(self, name): buf = [] if self.doctype and self.doctype.name: buf.append('') buf.append('') root = html.fromstring(''.join(buf)) # Append the initial comments: for comment in self.initialComments: root.addprevious(etree.Comment(comment)) # Create the root document and add the ElementTree to it self.document = self.documentClass() self.document._elementTree = root.getroottree() # Add the root element to the internal child/open data structures root_element = self.elementClass(name) root_element._element = root self.document.childNodes.append(root_element) self.openElements.append(root_element) self.rootInserted = True