summaryrefslogtreecommitdiff
path: root/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree_lxml.py
diff options
context:
space:
mode:
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree_lxml.py')
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree_lxml.py366
1 files changed, 0 insertions, 366 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree_lxml.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree_lxml.py
deleted file mode 100644
index 66a9ba3..0000000
--- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree_lxml.py
+++ /dev/null
@@ -1,366 +0,0 @@
1"""Module for supporting the lxml.etree library. The idea here is to use as much
2of the native library as possible, without using fragile hacks like custom element
3names that break between releases. The downside of this is that we cannot represent
4all possible trees; specifically the following are known to cause problems:
5
6Text or comments as siblings of the root element
7Docypes with no name
8
9When any of these things occur, we emit a DataLossWarning
10"""
11
12from __future__ import absolute_import, division, unicode_literals
13# pylint:disable=protected-access
14
15import warnings
16import re
17import sys
18
19from . import base
20from ..constants import DataLossWarning
21from .. import constants
22from . import etree as etree_builders
23from .. import _ihatexml
24
25import lxml.etree as etree
26
27
28fullTree = True
29tag_regexp = re.compile("{([^}]*)}(.*)")
30
31comment_type = etree.Comment("asd").tag
32
33
34class DocumentType(object):
35 def __init__(self, name, publicId, systemId):
36 self.name = name
37 self.publicId = publicId
38 self.systemId = systemId
39
40
41class Document(object):
42 def __init__(self):
43 self._elementTree = None
44 self._childNodes = []
45
46 def appendChild(self, element):
47 self._elementTree.getroot().addnext(element._element)
48
49 def _getChildNodes(self):
50 return self._childNodes
51
52 childNodes = property(_getChildNodes)
53
54
55def testSerializer(element):
56 rv = []
57 infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
58
59 def serializeElement(element, indent=0):
60 if not hasattr(element, "tag"):
61 if hasattr(element, "getroot"):
62 # Full tree case
63 rv.append("#document")
64 if element.docinfo.internalDTD:
65 if not (element.docinfo.public_id or
66 element.docinfo.system_url):
67 dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
68 else:
69 dtd_str = """<!DOCTYPE %s "%s" "%s">""" % (
70 element.docinfo.root_name,
71 element.docinfo.public_id,
72 element.docinfo.system_url)
73 rv.append("|%s%s" % (' ' * (indent + 2), dtd_str))
74 next_element = element.getroot()
75 while next_element.getprevious() is not None:
76 next_element = next_element.getprevious()
77 while next_element is not None:
78 serializeElement(next_element, indent + 2)
79 next_element = next_element.getnext()
80 elif isinstance(element, str) or isinstance(element, bytes):
81 # Text in a fragment
82 assert isinstance(element, str) or sys.version_info[0] == 2
83 rv.append("|%s\"%s\"" % (' ' * indent, element))
84 else:
85 # Fragment case
86 rv.append("#document-fragment")
87 for next_element in element:
88 serializeElement(next_element, indent + 2)
89 elif element.tag == comment_type:
90 rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
91 if hasattr(element, "tail") and element.tail:
92 rv.append("|%s\"%s\"" % (' ' * indent, element.tail))
93 else:
94 assert isinstance(element, etree._Element)
95 nsmatch = etree_builders.tag_regexp.match(element.tag)
96 if nsmatch is not None:
97 ns = nsmatch.group(1)
98 tag = nsmatch.group(2)
99 prefix = constants.prefixes[ns]
100 rv.append("|%s<%s %s>" % (' ' * indent, prefix,
101 infosetFilter.fromXmlName(tag)))
102 else:
103 rv.append("|%s<%s>" % (' ' * indent,
104 infosetFilter.fromXmlName(element.tag)))
105
106 if hasattr(element, "attrib"):
107 attributes = []
108 for name, value in element.attrib.items():
109 nsmatch = tag_regexp.match(name)
110 if nsmatch is not None:
111 ns, name = nsmatch.groups()
112 name = infosetFilter.fromXmlName(name)
113 prefix = constants.prefixes[ns]
114 attr_string = "%s %s" % (prefix, name)
115 else:
116 attr_string = infosetFilter.fromXmlName(name)
117 attributes.append((attr_string, value))
118
119 for name, value in sorted(attributes):
120 rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
121
122 if element.text:
123 rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
124 indent += 2
125 for child in element:
126 serializeElement(child, indent)
127 if hasattr(element, "tail") and element.tail:
128 rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
129 serializeElement(element, 0)
130
131 return "\n".join(rv)
132
133
134def tostring(element):
135 """Serialize an element and its child nodes to a string"""
136 rv = []
137
138 def serializeElement(element):
139 if not hasattr(element, "tag"):
140 if element.docinfo.internalDTD:
141 if element.docinfo.doctype:
142 dtd_str = element.docinfo.doctype
143 else:
144 dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
145 rv.append(dtd_str)
146 serializeElement(element.getroot())
147
148 elif element.tag == comment_type:
149 rv.append("<!--%s-->" % (element.text,))
150
151 else:
152 # This is assumed to be an ordinary element
153 if not element.attrib:
154 rv.append("<%s>" % (element.tag,))
155 else:
156 attr = " ".join(["%s=\"%s\"" % (name, value)
157 for name, value in element.attrib.items()])
158 rv.append("<%s %s>" % (element.tag, attr))
159 if element.text:
160 rv.append(element.text)
161
162 for child in element:
163 serializeElement(child)
164
165 rv.append("</%s>" % (element.tag,))
166
167 if hasattr(element, "tail") and element.tail:
168 rv.append(element.tail)
169
170 serializeElement(element)
171
172 return "".join(rv)
173
174
175class TreeBuilder(base.TreeBuilder):
176 documentClass = Document
177 doctypeClass = DocumentType
178 elementClass = None
179 commentClass = None
180 fragmentClass = Document
181 implementation = etree
182
183 def __init__(self, namespaceHTMLElements, fullTree=False):
184 builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
185 infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
186 self.namespaceHTMLElements = namespaceHTMLElements
187
188 class Attributes(dict):
189 def __init__(self, element, value=None):
190 if value is None:
191 value = {}
192 self._element = element
193 dict.__init__(self, value) # pylint:disable=non-parent-init-called
194 for key, value in self.items():
195 if isinstance(key, tuple):
196 name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
197 else:
198 name = infosetFilter.coerceAttribute(key)
199 self._element._element.attrib[name] = value
200
201 def __setitem__(self, key, value):
202 dict.__setitem__(self, key, value)
203 if isinstance(key, tuple):
204 name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
205 else:
206 name = infosetFilter.coerceAttribute(key)
207 self._element._element.attrib[name] = value
208
209 class Element(builder.Element):
210 def __init__(self, name, namespace):
211 name = infosetFilter.coerceElement(name)
212 builder.Element.__init__(self, name, namespace=namespace)
213 self._attributes = Attributes(self)
214
215 def _setName(self, name):
216 self._name = infosetFilter.coerceElement(name)
217 self._element.tag = self._getETreeTag(
218 self._name, self._namespace)
219
220 def _getName(self):
221 return infosetFilter.fromXmlName(self._name)
222
223 name = property(_getName, _setName)
224
225 def _getAttributes(self):
226 return self._attributes
227
228 def _setAttributes(self, attributes):
229 self._attributes = Attributes(self, attributes)
230
231 attributes = property(_getAttributes, _setAttributes)
232
233 def insertText(self, data, insertBefore=None):
234 data = infosetFilter.coerceCharacters(data)
235 builder.Element.insertText(self, data, insertBefore)
236
237 def appendChild(self, child):
238 builder.Element.appendChild(self, child)
239
240 class Comment(builder.Comment):
241 def __init__(self, data):
242 data = infosetFilter.coerceComment(data)
243 builder.Comment.__init__(self, data)
244
245 def _setData(self, data):
246 data = infosetFilter.coerceComment(data)
247 self._element.text = data
248
249 def _getData(self):
250 return self._element.text
251
252 data = property(_getData, _setData)
253
254 self.elementClass = Element
255 self.commentClass = Comment
256 # self.fragmentClass = builder.DocumentFragment
257 base.TreeBuilder.__init__(self, namespaceHTMLElements)
258
259 def reset(self):
260 base.TreeBuilder.reset(self)
261 self.insertComment = self.insertCommentInitial
262 self.initial_comments = []
263 self.doctype = None
264
265 def testSerializer(self, element):
266 return testSerializer(element)
267
268 def getDocument(self):
269 if fullTree:
270 return self.document._elementTree
271 else:
272 return self.document._elementTree.getroot()
273
274 def getFragment(self):
275 fragment = []
276 element = self.openElements[0]._element
277 if element.text:
278 fragment.append(element.text)
279 fragment.extend(list(element))
280 if element.tail:
281 fragment.append(element.tail)
282 return fragment
283
284 def insertDoctype(self, token):
285 name = token["name"]
286 publicId = token["publicId"]
287 systemId = token["systemId"]
288
289 if not name:
290 warnings.warn("lxml cannot represent empty doctype", DataLossWarning)
291 self.doctype = None
292 else:
293 coercedName = self.infosetFilter.coerceElement(name)
294 if coercedName != name:
295 warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning)
296
297 doctype = self.doctypeClass(coercedName, publicId, systemId)
298 self.doctype = doctype
299
300 def insertCommentInitial(self, data, parent=None):
301 assert parent is None or parent is self.document
302 assert self.document._elementTree is None
303 self.initial_comments.append(data)
304
305 def insertCommentMain(self, data, parent=None):
306 if (parent == self.document and
307 self.document._elementTree.getroot()[-1].tag == comment_type):
308 warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
309 super(TreeBuilder, self).insertComment(data, parent)
310
311 def insertRoot(self, token):
312 # Because of the way libxml2 works, it doesn't seem to be possible to
313 # alter information like the doctype after the tree has been parsed.
314 # Therefore we need to use the built-in parser to create our initial
315 # tree, after which we can add elements like normal
316 docStr = ""
317 if self.doctype:
318 assert self.doctype.name
319 docStr += "<!DOCTYPE %s" % self.doctype.name
320 if (self.doctype.publicId is not None or
321 self.doctype.systemId is not None):
322 docStr += (' PUBLIC "%s" ' %
323 (self.infosetFilter.coercePubid(self.doctype.publicId or "")))
324 if self.doctype.systemId:
325 sysid = self.doctype.systemId
326 if sysid.find("'") >= 0 and sysid.find('"') >= 0:
327 warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning)
328 sysid = sysid.replace("'", 'U00027')
329 if sysid.find("'") >= 0:
330 docStr += '"%s"' % sysid
331 else:
332 docStr += "'%s'" % sysid
333 else:
334 docStr += "''"
335 docStr += ">"
336 if self.doctype.name != token["name"]:
337 warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning)
338 docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>"
339 root = etree.fromstring(docStr)
340
341 # Append the initial comments:
342 for comment_token in self.initial_comments:
343 comment = self.commentClass(comment_token["data"])
344 root.addprevious(comment._element)
345
346 # Create the root document and add the ElementTree to it
347 self.document = self.documentClass()
348 self.document._elementTree = root.getroottree()
349
350 # Give the root element the right name
351 name = token["name"]
352 namespace = token.get("namespace", self.defaultNamespace)
353 if namespace is None:
354 etree_tag = name
355 else:
356 etree_tag = "{%s}%s" % (namespace, name)
357 root.tag = etree_tag
358
359 # Add the root element to the internal child/open data structures
360 root_element = self.elementClass(name, namespace)
361 root_element._element = root
362 self.document._childNodes.append(root_element)
363 self.openElements.append(root_element)
364
365 # Reset to the default insert comment function
366 self.insertComment = self.insertCommentMain