summaryrefslogtreecommitdiff
path: root/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree_lxml.py
diff options
context:
space:
mode:
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree_lxml.py')
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree_lxml.py213
1 files changed, 213 insertions, 0 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree_lxml.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree_lxml.py
new file mode 100644
index 0000000..f6f395a
--- /dev/null
+++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree_lxml.py
@@ -0,0 +1,213 @@
1from __future__ import absolute_import, division, unicode_literals
2from pip._vendor.six import text_type
3
4from lxml import etree
5from ..treebuilders.etree import tag_regexp
6
7from . import base
8
9from .. import _ihatexml
10
11
12def ensure_str(s):
13 if s is None:
14 return None
15 elif isinstance(s, text_type):
16 return s
17 else:
18 return s.decode("ascii", "strict")
19
20
21class Root(object):
22 def __init__(self, et):
23 self.elementtree = et
24 self.children = []
25
26 try:
27 if et.docinfo.internalDTD:
28 self.children.append(Doctype(self,
29 ensure_str(et.docinfo.root_name),
30 ensure_str(et.docinfo.public_id),
31 ensure_str(et.docinfo.system_url)))
32 except AttributeError:
33 pass
34
35 try:
36 node = et.getroot()
37 except AttributeError:
38 node = et
39
40 while node.getprevious() is not None:
41 node = node.getprevious()
42 while node is not None:
43 self.children.append(node)
44 node = node.getnext()
45
46 self.text = None
47 self.tail = None
48
49 def __getitem__(self, key):
50 return self.children[key]
51
52 def getnext(self):
53 return None
54
55 def __len__(self):
56 return 1
57
58
59class Doctype(object):
60 def __init__(self, root_node, name, public_id, system_id):
61 self.root_node = root_node
62 self.name = name
63 self.public_id = public_id
64 self.system_id = system_id
65
66 self.text = None
67 self.tail = None
68
69 def getnext(self):
70 return self.root_node.children[1]
71
72
73class FragmentRoot(Root):
74 def __init__(self, children):
75 self.children = [FragmentWrapper(self, child) for child in children]
76 self.text = self.tail = None
77
78 def getnext(self):
79 return None
80
81
82class FragmentWrapper(object):
83 def __init__(self, fragment_root, obj):
84 self.root_node = fragment_root
85 self.obj = obj
86 if hasattr(self.obj, 'text'):
87 self.text = ensure_str(self.obj.text)
88 else:
89 self.text = None
90 if hasattr(self.obj, 'tail'):
91 self.tail = ensure_str(self.obj.tail)
92 else:
93 self.tail = None
94
95 def __getattr__(self, name):
96 return getattr(self.obj, name)
97
98 def getnext(self):
99 siblings = self.root_node.children
100 idx = siblings.index(self)
101 if idx < len(siblings) - 1:
102 return siblings[idx + 1]
103 else:
104 return None
105
106 def __getitem__(self, key):
107 return self.obj[key]
108
109 def __bool__(self):
110 return bool(self.obj)
111
112 def getparent(self):
113 return None
114
115 def __str__(self):
116 return str(self.obj)
117
118 def __unicode__(self):
119 return str(self.obj)
120
121 def __len__(self):
122 return len(self.obj)
123
124
125class TreeWalker(base.NonRecursiveTreeWalker):
126 def __init__(self, tree):
127 # pylint:disable=redefined-variable-type
128 if isinstance(tree, list):
129 self.fragmentChildren = set(tree)
130 tree = FragmentRoot(tree)
131 else:
132 self.fragmentChildren = set()
133 tree = Root(tree)
134 base.NonRecursiveTreeWalker.__init__(self, tree)
135 self.filter = _ihatexml.InfosetFilter()
136
137 def getNodeDetails(self, node):
138 if isinstance(node, tuple): # Text node
139 node, key = node
140 assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
141 return base.TEXT, ensure_str(getattr(node, key))
142
143 elif isinstance(node, Root):
144 return (base.DOCUMENT,)
145
146 elif isinstance(node, Doctype):
147 return base.DOCTYPE, node.name, node.public_id, node.system_id
148
149 elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"):
150 return base.TEXT, ensure_str(node.obj)
151
152 elif node.tag == etree.Comment:
153 return base.COMMENT, ensure_str(node.text)
154
155 elif node.tag == etree.Entity:
156 return base.ENTITY, ensure_str(node.text)[1:-1] # strip &;
157
158 else:
159 # This is assumed to be an ordinary element
160 match = tag_regexp.match(ensure_str(node.tag))
161 if match:
162 namespace, tag = match.groups()
163 else:
164 namespace = None
165 tag = ensure_str(node.tag)
166 attrs = {}
167 for name, value in list(node.attrib.items()):
168 name = ensure_str(name)
169 value = ensure_str(value)
170 match = tag_regexp.match(name)
171 if match:
172 attrs[(match.group(1), match.group(2))] = value
173 else:
174 attrs[(None, name)] = value
175 return (base.ELEMENT, namespace, self.filter.fromXmlName(tag),
176 attrs, len(node) > 0 or node.text)
177
178 def getFirstChild(self, node):
179 assert not isinstance(node, tuple), "Text nodes have no children"
180
181 assert len(node) or node.text, "Node has no children"
182 if node.text:
183 return (node, "text")
184 else:
185 return node[0]
186
187 def getNextSibling(self, node):
188 if isinstance(node, tuple): # Text node
189 node, key = node
190 assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
191 if key == "text":
192 # XXX: we cannot use a "bool(node) and node[0] or None" construct here
193 # because node[0] might evaluate to False if it has no child element
194 if len(node):
195 return node[0]
196 else:
197 return None
198 else: # tail
199 return node.getnext()
200
201 return (node, "tail") if node.tail else node.getnext()
202
203 def getParentNode(self, node):
204 if isinstance(node, tuple): # Text node
205 node, key = node
206 assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
207 if key == "text":
208 return node
209 # else: fallback to "normal" processing
210 elif node in self.fragmentChildren:
211 return None
212
213 return node.getparent()