diff options
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers')
6 files changed, 861 insertions, 0 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py new file mode 100644 index 0000000..31a173d --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py | |||
@@ -0,0 +1,154 @@ | |||
1 | """A collection of modules for iterating through different kinds of | ||
2 | tree, generating tokens identical to those produced by the tokenizer | ||
3 | module. | ||
4 | |||
5 | To create a tree walker for a new type of tree, you need to do | ||
6 | implement a tree walker object (called TreeWalker by convention) that | ||
7 | implements a 'serialize' method taking a tree as sole argument and | ||
8 | returning an iterator generating tokens. | ||
9 | """ | ||
10 | |||
11 | from __future__ import absolute_import, division, unicode_literals | ||
12 | |||
13 | from .. import constants | ||
14 | from .._utils import default_etree | ||
15 | |||
16 | __all__ = ["getTreeWalker", "pprint"] | ||
17 | |||
18 | treeWalkerCache = {} | ||
19 | |||
20 | |||
21 | def getTreeWalker(treeType, implementation=None, **kwargs): | ||
22 | """Get a TreeWalker class for various types of tree with built-in support | ||
23 | |||
24 | :arg str treeType: the name of the tree type required (case-insensitive). | ||
25 | Supported values are: | ||
26 | |||
27 | * "dom": The xml.dom.minidom DOM implementation | ||
28 | * "etree": A generic walker for tree implementations exposing an | ||
29 | elementtree-like interface (known to work with ElementTree, | ||
30 | cElementTree and lxml.etree). | ||
31 | * "lxml": Optimized walker for lxml.etree | ||
32 | * "genshi": a Genshi stream | ||
33 | |||
34 | :arg implementation: A module implementing the tree type e.g. | ||
35 | xml.etree.ElementTree or cElementTree (Currently applies to the "etree" | ||
36 | tree type only). | ||
37 | |||
38 | :arg kwargs: keyword arguments passed to the etree walker--for other | ||
39 | walkers, this has no effect | ||
40 | |||
41 | :returns: a TreeWalker class | ||
42 | |||
43 | """ | ||
44 | |||
45 | treeType = treeType.lower() | ||
46 | if treeType not in treeWalkerCache: | ||
47 | if treeType == "dom": | ||
48 | from . import dom | ||
49 | treeWalkerCache[treeType] = dom.TreeWalker | ||
50 | elif treeType == "genshi": | ||
51 | from . import genshi | ||
52 | treeWalkerCache[treeType] = genshi.TreeWalker | ||
53 | elif treeType == "lxml": | ||
54 | from . import etree_lxml | ||
55 | treeWalkerCache[treeType] = etree_lxml.TreeWalker | ||
56 | elif treeType == "etree": | ||
57 | from . import etree | ||
58 | if implementation is None: | ||
59 | implementation = default_etree | ||
60 | # XXX: NEVER cache here, caching is done in the etree submodule | ||
61 | return etree.getETreeModule(implementation, **kwargs).TreeWalker | ||
62 | return treeWalkerCache.get(treeType) | ||
63 | |||
64 | |||
65 | def concatenateCharacterTokens(tokens): | ||
66 | pendingCharacters = [] | ||
67 | for token in tokens: | ||
68 | type = token["type"] | ||
69 | if type in ("Characters", "SpaceCharacters"): | ||
70 | pendingCharacters.append(token["data"]) | ||
71 | else: | ||
72 | if pendingCharacters: | ||
73 | yield {"type": "Characters", "data": "".join(pendingCharacters)} | ||
74 | pendingCharacters = [] | ||
75 | yield token | ||
76 | if pendingCharacters: | ||
77 | yield {"type": "Characters", "data": "".join(pendingCharacters)} | ||
78 | |||
79 | |||
80 | def pprint(walker): | ||
81 | """Pretty printer for tree walkers | ||
82 | |||
83 | Takes a TreeWalker instance and pretty prints the output of walking the tree. | ||
84 | |||
85 | :arg walker: a TreeWalker instance | ||
86 | |||
87 | """ | ||
88 | output = [] | ||
89 | indent = 0 | ||
90 | for token in concatenateCharacterTokens(walker): | ||
91 | type = token["type"] | ||
92 | if type in ("StartTag", "EmptyTag"): | ||
93 | # tag name | ||
94 | if token["namespace"] and token["namespace"] != constants.namespaces["html"]: | ||
95 | if token["namespace"] in constants.prefixes: | ||
96 | ns = constants.prefixes[token["namespace"]] | ||
97 | else: | ||
98 | ns = token["namespace"] | ||
99 | name = "%s %s" % (ns, token["name"]) | ||
100 | else: | ||
101 | name = token["name"] | ||
102 | output.append("%s<%s>" % (" " * indent, name)) | ||
103 | indent += 2 | ||
104 | # attributes (sorted for consistent ordering) | ||
105 | attrs = token["data"] | ||
106 | for (namespace, localname), value in sorted(attrs.items()): | ||
107 | if namespace: | ||
108 | if namespace in constants.prefixes: | ||
109 | ns = constants.prefixes[namespace] | ||
110 | else: | ||
111 | ns = namespace | ||
112 | name = "%s %s" % (ns, localname) | ||
113 | else: | ||
114 | name = localname | ||
115 | output.append("%s%s=\"%s\"" % (" " * indent, name, value)) | ||
116 | # self-closing | ||
117 | if type == "EmptyTag": | ||
118 | indent -= 2 | ||
119 | |||
120 | elif type == "EndTag": | ||
121 | indent -= 2 | ||
122 | |||
123 | elif type == "Comment": | ||
124 | output.append("%s<!-- %s -->" % (" " * indent, token["data"])) | ||
125 | |||
126 | elif type == "Doctype": | ||
127 | if token["name"]: | ||
128 | if token["publicId"]: | ||
129 | output.append("""%s<!DOCTYPE %s "%s" "%s">""" % | ||
130 | (" " * indent, | ||
131 | token["name"], | ||
132 | token["publicId"], | ||
133 | token["systemId"] if token["systemId"] else "")) | ||
134 | elif token["systemId"]: | ||
135 | output.append("""%s<!DOCTYPE %s "" "%s">""" % | ||
136 | (" " * indent, | ||
137 | token["name"], | ||
138 | token["systemId"])) | ||
139 | else: | ||
140 | output.append("%s<!DOCTYPE %s>" % (" " * indent, | ||
141 | token["name"])) | ||
142 | else: | ||
143 | output.append("%s<!DOCTYPE >" % (" " * indent,)) | ||
144 | |||
145 | elif type == "Characters": | ||
146 | output.append("%s\"%s\"" % (" " * indent, token["data"])) | ||
147 | |||
148 | elif type == "SpaceCharacters": | ||
149 | assert False, "concatenateCharacterTokens should have got rid of all Space tokens" | ||
150 | |||
151 | else: | ||
152 | raise ValueError("Unknown token type, %s" % type) | ||
153 | |||
154 | return "\n".join(output) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/base.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/base.py new file mode 100644 index 0000000..f82984b --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/base.py | |||
@@ -0,0 +1,252 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from xml.dom import Node | ||
4 | from ..constants import namespaces, voidElements, spaceCharacters | ||
5 | |||
6 | __all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN", | ||
7 | "TreeWalker", "NonRecursiveTreeWalker"] | ||
8 | |||
9 | DOCUMENT = Node.DOCUMENT_NODE | ||
10 | DOCTYPE = Node.DOCUMENT_TYPE_NODE | ||
11 | TEXT = Node.TEXT_NODE | ||
12 | ELEMENT = Node.ELEMENT_NODE | ||
13 | COMMENT = Node.COMMENT_NODE | ||
14 | ENTITY = Node.ENTITY_NODE | ||
15 | UNKNOWN = "<#UNKNOWN#>" | ||
16 | |||
17 | spaceCharacters = "".join(spaceCharacters) | ||
18 | |||
19 | |||
20 | class TreeWalker(object): | ||
21 | """Walks a tree yielding tokens | ||
22 | |||
23 | Tokens are dicts that all have a ``type`` field specifying the type of the | ||
24 | token. | ||
25 | |||
26 | """ | ||
27 | def __init__(self, tree): | ||
28 | """Creates a TreeWalker | ||
29 | |||
30 | :arg tree: the tree to walk | ||
31 | |||
32 | """ | ||
33 | self.tree = tree | ||
34 | |||
35 | def __iter__(self): | ||
36 | raise NotImplementedError | ||
37 | |||
38 | def error(self, msg): | ||
39 | """Generates an error token with the given message | ||
40 | |||
41 | :arg msg: the error message | ||
42 | |||
43 | :returns: SerializeError token | ||
44 | |||
45 | """ | ||
46 | return {"type": "SerializeError", "data": msg} | ||
47 | |||
48 | def emptyTag(self, namespace, name, attrs, hasChildren=False): | ||
49 | """Generates an EmptyTag token | ||
50 | |||
51 | :arg namespace: the namespace of the token--can be ``None`` | ||
52 | |||
53 | :arg name: the name of the element | ||
54 | |||
55 | :arg attrs: the attributes of the element as a dict | ||
56 | |||
57 | :arg hasChildren: whether or not to yield a SerializationError because | ||
58 | this tag shouldn't have children | ||
59 | |||
60 | :returns: EmptyTag token | ||
61 | |||
62 | """ | ||
63 | yield {"type": "EmptyTag", "name": name, | ||
64 | "namespace": namespace, | ||
65 | "data": attrs} | ||
66 | if hasChildren: | ||
67 | yield self.error("Void element has children") | ||
68 | |||
69 | def startTag(self, namespace, name, attrs): | ||
70 | """Generates a StartTag token | ||
71 | |||
72 | :arg namespace: the namespace of the token--can be ``None`` | ||
73 | |||
74 | :arg name: the name of the element | ||
75 | |||
76 | :arg attrs: the attributes of the element as a dict | ||
77 | |||
78 | :returns: StartTag token | ||
79 | |||
80 | """ | ||
81 | return {"type": "StartTag", | ||
82 | "name": name, | ||
83 | "namespace": namespace, | ||
84 | "data": attrs} | ||
85 | |||
86 | def endTag(self, namespace, name): | ||
87 | """Generates an EndTag token | ||
88 | |||
89 | :arg namespace: the namespace of the token--can be ``None`` | ||
90 | |||
91 | :arg name: the name of the element | ||
92 | |||
93 | :returns: EndTag token | ||
94 | |||
95 | """ | ||
96 | return {"type": "EndTag", | ||
97 | "name": name, | ||
98 | "namespace": namespace} | ||
99 | |||
100 | def text(self, data): | ||
101 | """Generates SpaceCharacters and Characters tokens | ||
102 | |||
103 | Depending on what's in the data, this generates one or more | ||
104 | ``SpaceCharacters`` and ``Characters`` tokens. | ||
105 | |||
106 | For example: | ||
107 | |||
108 | >>> from html5lib.treewalkers.base import TreeWalker | ||
109 | >>> # Give it an empty tree just so it instantiates | ||
110 | >>> walker = TreeWalker([]) | ||
111 | >>> list(walker.text('')) | ||
112 | [] | ||
113 | >>> list(walker.text(' ')) | ||
114 | [{u'data': ' ', u'type': u'SpaceCharacters'}] | ||
115 | >>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE | ||
116 | [{u'data': ' ', u'type': u'SpaceCharacters'}, | ||
117 | {u'data': u'abc', u'type': u'Characters'}, | ||
118 | {u'data': u' ', u'type': u'SpaceCharacters'}] | ||
119 | |||
120 | :arg data: the text data | ||
121 | |||
122 | :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens | ||
123 | |||
124 | """ | ||
125 | data = data | ||
126 | middle = data.lstrip(spaceCharacters) | ||
127 | left = data[:len(data) - len(middle)] | ||
128 | if left: | ||
129 | yield {"type": "SpaceCharacters", "data": left} | ||
130 | data = middle | ||
131 | middle = data.rstrip(spaceCharacters) | ||
132 | right = data[len(middle):] | ||
133 | if middle: | ||
134 | yield {"type": "Characters", "data": middle} | ||
135 | if right: | ||
136 | yield {"type": "SpaceCharacters", "data": right} | ||
137 | |||
138 | def comment(self, data): | ||
139 | """Generates a Comment token | ||
140 | |||
141 | :arg data: the comment | ||
142 | |||
143 | :returns: Comment token | ||
144 | |||
145 | """ | ||
146 | return {"type": "Comment", "data": data} | ||
147 | |||
148 | def doctype(self, name, publicId=None, systemId=None): | ||
149 | """Generates a Doctype token | ||
150 | |||
151 | :arg name: | ||
152 | |||
153 | :arg publicId: | ||
154 | |||
155 | :arg systemId: | ||
156 | |||
157 | :returns: the Doctype token | ||
158 | |||
159 | """ | ||
160 | return {"type": "Doctype", | ||
161 | "name": name, | ||
162 | "publicId": publicId, | ||
163 | "systemId": systemId} | ||
164 | |||
165 | def entity(self, name): | ||
166 | """Generates an Entity token | ||
167 | |||
168 | :arg name: the entity name | ||
169 | |||
170 | :returns: an Entity token | ||
171 | |||
172 | """ | ||
173 | return {"type": "Entity", "name": name} | ||
174 | |||
175 | def unknown(self, nodeType): | ||
176 | """Handles unknown node types""" | ||
177 | return self.error("Unknown node type: " + nodeType) | ||
178 | |||
179 | |||
180 | class NonRecursiveTreeWalker(TreeWalker): | ||
181 | def getNodeDetails(self, node): | ||
182 | raise NotImplementedError | ||
183 | |||
184 | def getFirstChild(self, node): | ||
185 | raise NotImplementedError | ||
186 | |||
187 | def getNextSibling(self, node): | ||
188 | raise NotImplementedError | ||
189 | |||
190 | def getParentNode(self, node): | ||
191 | raise NotImplementedError | ||
192 | |||
193 | def __iter__(self): | ||
194 | currentNode = self.tree | ||
195 | while currentNode is not None: | ||
196 | details = self.getNodeDetails(currentNode) | ||
197 | type, details = details[0], details[1:] | ||
198 | hasChildren = False | ||
199 | |||
200 | if type == DOCTYPE: | ||
201 | yield self.doctype(*details) | ||
202 | |||
203 | elif type == TEXT: | ||
204 | for token in self.text(*details): | ||
205 | yield token | ||
206 | |||
207 | elif type == ELEMENT: | ||
208 | namespace, name, attributes, hasChildren = details | ||
209 | if (not namespace or namespace == namespaces["html"]) and name in voidElements: | ||
210 | for token in self.emptyTag(namespace, name, attributes, | ||
211 | hasChildren): | ||
212 | yield token | ||
213 | hasChildren = False | ||
214 | else: | ||
215 | yield self.startTag(namespace, name, attributes) | ||
216 | |||
217 | elif type == COMMENT: | ||
218 | yield self.comment(details[0]) | ||
219 | |||
220 | elif type == ENTITY: | ||
221 | yield self.entity(details[0]) | ||
222 | |||
223 | elif type == DOCUMENT: | ||
224 | hasChildren = True | ||
225 | |||
226 | else: | ||
227 | yield self.unknown(details[0]) | ||
228 | |||
229 | if hasChildren: | ||
230 | firstChild = self.getFirstChild(currentNode) | ||
231 | else: | ||
232 | firstChild = None | ||
233 | |||
234 | if firstChild is not None: | ||
235 | currentNode = firstChild | ||
236 | else: | ||
237 | while currentNode is not None: | ||
238 | details = self.getNodeDetails(currentNode) | ||
239 | type, details = details[0], details[1:] | ||
240 | if type == ELEMENT: | ||
241 | namespace, name, attributes, hasChildren = details | ||
242 | if (namespace and namespace != namespaces["html"]) or name not in voidElements: | ||
243 | yield self.endTag(namespace, name) | ||
244 | if self.tree is currentNode: | ||
245 | currentNode = None | ||
246 | break | ||
247 | nextSibling = self.getNextSibling(currentNode) | ||
248 | if nextSibling is not None: | ||
249 | currentNode = nextSibling | ||
250 | break | ||
251 | else: | ||
252 | currentNode = self.getParentNode(currentNode) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/dom.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/dom.py new file mode 100644 index 0000000..b3e2753 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/dom.py | |||
@@ -0,0 +1,43 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from xml.dom import Node | ||
4 | |||
5 | from . import base | ||
6 | |||
7 | |||
8 | class TreeWalker(base.NonRecursiveTreeWalker): | ||
9 | def getNodeDetails(self, node): | ||
10 | if node.nodeType == Node.DOCUMENT_TYPE_NODE: | ||
11 | return base.DOCTYPE, node.name, node.publicId, node.systemId | ||
12 | |||
13 | elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): | ||
14 | return base.TEXT, node.nodeValue | ||
15 | |||
16 | elif node.nodeType == Node.ELEMENT_NODE: | ||
17 | attrs = {} | ||
18 | for attr in list(node.attributes.keys()): | ||
19 | attr = node.getAttributeNode(attr) | ||
20 | if attr.namespaceURI: | ||
21 | attrs[(attr.namespaceURI, attr.localName)] = attr.value | ||
22 | else: | ||
23 | attrs[(None, attr.name)] = attr.value | ||
24 | return (base.ELEMENT, node.namespaceURI, node.nodeName, | ||
25 | attrs, node.hasChildNodes()) | ||
26 | |||
27 | elif node.nodeType == Node.COMMENT_NODE: | ||
28 | return base.COMMENT, node.nodeValue | ||
29 | |||
30 | elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE): | ||
31 | return (base.DOCUMENT,) | ||
32 | |||
33 | else: | ||
34 | return base.UNKNOWN, node.nodeType | ||
35 | |||
36 | def getFirstChild(self, node): | ||
37 | return node.firstChild | ||
38 | |||
39 | def getNextSibling(self, node): | ||
40 | return node.nextSibling | ||
41 | |||
42 | def getParentNode(self, node): | ||
43 | return node.parentNode | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree.py new file mode 100644 index 0000000..1a35add --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree.py | |||
@@ -0,0 +1,130 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from collections import OrderedDict | ||
4 | import re | ||
5 | |||
6 | from pip._vendor.six import string_types | ||
7 | |||
8 | from . import base | ||
9 | from .._utils import moduleFactoryFactory | ||
10 | |||
11 | tag_regexp = re.compile("{([^}]*)}(.*)") | ||
12 | |||
13 | |||
14 | def getETreeBuilder(ElementTreeImplementation): | ||
15 | ElementTree = ElementTreeImplementation | ||
16 | ElementTreeCommentType = ElementTree.Comment("asd").tag | ||
17 | |||
18 | class TreeWalker(base.NonRecursiveTreeWalker): # pylint:disable=unused-variable | ||
19 | """Given the particular ElementTree representation, this implementation, | ||
20 | to avoid using recursion, returns "nodes" as tuples with the following | ||
21 | content: | ||
22 | |||
23 | 1. The current element | ||
24 | |||
25 | 2. The index of the element relative to its parent | ||
26 | |||
27 | 3. A stack of ancestor elements | ||
28 | |||
29 | 4. A flag "text", "tail" or None to indicate if the current node is a | ||
30 | text node; either the text or tail of the current element (1) | ||
31 | """ | ||
32 | def getNodeDetails(self, node): | ||
33 | if isinstance(node, tuple): # It might be the root Element | ||
34 | elt, _, _, flag = node | ||
35 | if flag in ("text", "tail"): | ||
36 | return base.TEXT, getattr(elt, flag) | ||
37 | else: | ||
38 | node = elt | ||
39 | |||
40 | if not(hasattr(node, "tag")): | ||
41 | node = node.getroot() | ||
42 | |||
43 | if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"): | ||
44 | return (base.DOCUMENT,) | ||
45 | |||
46 | elif node.tag == "<!DOCTYPE>": | ||
47 | return (base.DOCTYPE, node.text, | ||
48 | node.get("publicId"), node.get("systemId")) | ||
49 | |||
50 | elif node.tag == ElementTreeCommentType: | ||
51 | return base.COMMENT, node.text | ||
52 | |||
53 | else: | ||
54 | assert isinstance(node.tag, string_types), type(node.tag) | ||
55 | # This is assumed to be an ordinary element | ||
56 | match = tag_regexp.match(node.tag) | ||
57 | if match: | ||
58 | namespace, tag = match.groups() | ||
59 | else: | ||
60 | namespace = None | ||
61 | tag = node.tag | ||
62 | attrs = OrderedDict() | ||
63 | for name, value in list(node.attrib.items()): | ||
64 | match = tag_regexp.match(name) | ||
65 | if match: | ||
66 | attrs[(match.group(1), match.group(2))] = value | ||
67 | else: | ||
68 | attrs[(None, name)] = value | ||
69 | return (base.ELEMENT, namespace, tag, | ||
70 | attrs, len(node) or node.text) | ||
71 | |||
72 | def getFirstChild(self, node): | ||
73 | if isinstance(node, tuple): | ||
74 | element, key, parents, flag = node | ||
75 | else: | ||
76 | element, key, parents, flag = node, None, [], None | ||
77 | |||
78 | if flag in ("text", "tail"): | ||
79 | return None | ||
80 | else: | ||
81 | if element.text: | ||
82 | return element, key, parents, "text" | ||
83 | elif len(element): | ||
84 | parents.append(element) | ||
85 | return element[0], 0, parents, None | ||
86 | else: | ||
87 | return None | ||
88 | |||
89 | def getNextSibling(self, node): | ||
90 | if isinstance(node, tuple): | ||
91 | element, key, parents, flag = node | ||
92 | else: | ||
93 | return None | ||
94 | |||
95 | if flag == "text": | ||
96 | if len(element): | ||
97 | parents.append(element) | ||
98 | return element[0], 0, parents, None | ||
99 | else: | ||
100 | return None | ||
101 | else: | ||
102 | if element.tail and flag != "tail": | ||
103 | return element, key, parents, "tail" | ||
104 | elif key < len(parents[-1]) - 1: | ||
105 | return parents[-1][key + 1], key + 1, parents, None | ||
106 | else: | ||
107 | return None | ||
108 | |||
109 | def getParentNode(self, node): | ||
110 | if isinstance(node, tuple): | ||
111 | element, key, parents, flag = node | ||
112 | else: | ||
113 | return None | ||
114 | |||
115 | if flag == "text": | ||
116 | if not parents: | ||
117 | return element | ||
118 | else: | ||
119 | return element, key, parents, None | ||
120 | else: | ||
121 | parent = parents.pop() | ||
122 | if not parents: | ||
123 | return parent | ||
124 | else: | ||
125 | assert list(parents[-1]).count(parent) == 1 | ||
126 | return parent, list(parents[-1]).index(parent), parents, None | ||
127 | |||
128 | return locals() | ||
129 | |||
130 | getETreeModule = moduleFactoryFactory(getETreeBuilder) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree_lxml.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree_lxml.py new file mode 100644 index 0000000..f6f395a --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree_lxml.py | |||
@@ -0,0 +1,213 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | from pip._vendor.six import text_type | ||
3 | |||
4 | from lxml import etree | ||
5 | from ..treebuilders.etree import tag_regexp | ||
6 | |||
7 | from . import base | ||
8 | |||
9 | from .. import _ihatexml | ||
10 | |||
11 | |||
12 | def ensure_str(s): | ||
13 | if s is None: | ||
14 | return None | ||
15 | elif isinstance(s, text_type): | ||
16 | return s | ||
17 | else: | ||
18 | return s.decode("ascii", "strict") | ||
19 | |||
20 | |||
21 | class Root(object): | ||
22 | def __init__(self, et): | ||
23 | self.elementtree = et | ||
24 | self.children = [] | ||
25 | |||
26 | try: | ||
27 | if et.docinfo.internalDTD: | ||
28 | self.children.append(Doctype(self, | ||
29 | ensure_str(et.docinfo.root_name), | ||
30 | ensure_str(et.docinfo.public_id), | ||
31 | ensure_str(et.docinfo.system_url))) | ||
32 | except AttributeError: | ||
33 | pass | ||
34 | |||
35 | try: | ||
36 | node = et.getroot() | ||
37 | except AttributeError: | ||
38 | node = et | ||
39 | |||
40 | while node.getprevious() is not None: | ||
41 | node = node.getprevious() | ||
42 | while node is not None: | ||
43 | self.children.append(node) | ||
44 | node = node.getnext() | ||
45 | |||
46 | self.text = None | ||
47 | self.tail = None | ||
48 | |||
49 | def __getitem__(self, key): | ||
50 | return self.children[key] | ||
51 | |||
52 | def getnext(self): | ||
53 | return None | ||
54 | |||
55 | def __len__(self): | ||
56 | return 1 | ||
57 | |||
58 | |||
59 | class Doctype(object): | ||
60 | def __init__(self, root_node, name, public_id, system_id): | ||
61 | self.root_node = root_node | ||
62 | self.name = name | ||
63 | self.public_id = public_id | ||
64 | self.system_id = system_id | ||
65 | |||
66 | self.text = None | ||
67 | self.tail = None | ||
68 | |||
69 | def getnext(self): | ||
70 | return self.root_node.children[1] | ||
71 | |||
72 | |||
73 | class FragmentRoot(Root): | ||
74 | def __init__(self, children): | ||
75 | self.children = [FragmentWrapper(self, child) for child in children] | ||
76 | self.text = self.tail = None | ||
77 | |||
78 | def getnext(self): | ||
79 | return None | ||
80 | |||
81 | |||
82 | class FragmentWrapper(object): | ||
83 | def __init__(self, fragment_root, obj): | ||
84 | self.root_node = fragment_root | ||
85 | self.obj = obj | ||
86 | if hasattr(self.obj, 'text'): | ||
87 | self.text = ensure_str(self.obj.text) | ||
88 | else: | ||
89 | self.text = None | ||
90 | if hasattr(self.obj, 'tail'): | ||
91 | self.tail = ensure_str(self.obj.tail) | ||
92 | else: | ||
93 | self.tail = None | ||
94 | |||
95 | def __getattr__(self, name): | ||
96 | return getattr(self.obj, name) | ||
97 | |||
98 | def getnext(self): | ||
99 | siblings = self.root_node.children | ||
100 | idx = siblings.index(self) | ||
101 | if idx < len(siblings) - 1: | ||
102 | return siblings[idx + 1] | ||
103 | else: | ||
104 | return None | ||
105 | |||
106 | def __getitem__(self, key): | ||
107 | return self.obj[key] | ||
108 | |||
109 | def __bool__(self): | ||
110 | return bool(self.obj) | ||
111 | |||
112 | def getparent(self): | ||
113 | return None | ||
114 | |||
115 | def __str__(self): | ||
116 | return str(self.obj) | ||
117 | |||
118 | def __unicode__(self): | ||
119 | return str(self.obj) | ||
120 | |||
121 | def __len__(self): | ||
122 | return len(self.obj) | ||
123 | |||
124 | |||
125 | class TreeWalker(base.NonRecursiveTreeWalker): | ||
126 | def __init__(self, tree): | ||
127 | # pylint:disable=redefined-variable-type | ||
128 | if isinstance(tree, list): | ||
129 | self.fragmentChildren = set(tree) | ||
130 | tree = FragmentRoot(tree) | ||
131 | else: | ||
132 | self.fragmentChildren = set() | ||
133 | tree = Root(tree) | ||
134 | base.NonRecursiveTreeWalker.__init__(self, tree) | ||
135 | self.filter = _ihatexml.InfosetFilter() | ||
136 | |||
137 | def getNodeDetails(self, node): | ||
138 | if isinstance(node, tuple): # Text node | ||
139 | node, key = node | ||
140 | assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key | ||
141 | return base.TEXT, ensure_str(getattr(node, key)) | ||
142 | |||
143 | elif isinstance(node, Root): | ||
144 | return (base.DOCUMENT,) | ||
145 | |||
146 | elif isinstance(node, Doctype): | ||
147 | return base.DOCTYPE, node.name, node.public_id, node.system_id | ||
148 | |||
149 | elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): | ||
150 | return base.TEXT, ensure_str(node.obj) | ||
151 | |||
152 | elif node.tag == etree.Comment: | ||
153 | return base.COMMENT, ensure_str(node.text) | ||
154 | |||
155 | elif node.tag == etree.Entity: | ||
156 | return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; | ||
157 | |||
158 | else: | ||
159 | # This is assumed to be an ordinary element | ||
160 | match = tag_regexp.match(ensure_str(node.tag)) | ||
161 | if match: | ||
162 | namespace, tag = match.groups() | ||
163 | else: | ||
164 | namespace = None | ||
165 | tag = ensure_str(node.tag) | ||
166 | attrs = {} | ||
167 | for name, value in list(node.attrib.items()): | ||
168 | name = ensure_str(name) | ||
169 | value = ensure_str(value) | ||
170 | match = tag_regexp.match(name) | ||
171 | if match: | ||
172 | attrs[(match.group(1), match.group(2))] = value | ||
173 | else: | ||
174 | attrs[(None, name)] = value | ||
175 | return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), | ||
176 | attrs, len(node) > 0 or node.text) | ||
177 | |||
178 | def getFirstChild(self, node): | ||
179 | assert not isinstance(node, tuple), "Text nodes have no children" | ||
180 | |||
181 | assert len(node) or node.text, "Node has no children" | ||
182 | if node.text: | ||
183 | return (node, "text") | ||
184 | else: | ||
185 | return node[0] | ||
186 | |||
187 | def getNextSibling(self, node): | ||
188 | if isinstance(node, tuple): # Text node | ||
189 | node, key = node | ||
190 | assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key | ||
191 | if key == "text": | ||
192 | # XXX: we cannot use a "bool(node) and node[0] or None" construct here | ||
193 | # because node[0] might evaluate to False if it has no child element | ||
194 | if len(node): | ||
195 | return node[0] | ||
196 | else: | ||
197 | return None | ||
198 | else: # tail | ||
199 | return node.getnext() | ||
200 | |||
201 | return (node, "tail") if node.tail else node.getnext() | ||
202 | |||
203 | def getParentNode(self, node): | ||
204 | if isinstance(node, tuple): # Text node | ||
205 | node, key = node | ||
206 | assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key | ||
207 | if key == "text": | ||
208 | return node | ||
209 | # else: fallback to "normal" processing | ||
210 | elif node in self.fragmentChildren: | ||
211 | return None | ||
212 | |||
213 | return node.getparent() | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/genshi.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/genshi.py new file mode 100644 index 0000000..42cd559 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/genshi.py | |||
@@ -0,0 +1,69 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from genshi.core import QName | ||
4 | from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT | ||
5 | from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT | ||
6 | |||
7 | from . import base | ||
8 | |||
9 | from ..constants import voidElements, namespaces | ||
10 | |||
11 | |||
12 | class TreeWalker(base.TreeWalker): | ||
13 | def __iter__(self): | ||
14 | # Buffer the events so we can pass in the following one | ||
15 | previous = None | ||
16 | for event in self.tree: | ||
17 | if previous is not None: | ||
18 | for token in self.tokens(previous, event): | ||
19 | yield token | ||
20 | previous = event | ||
21 | |||
22 | # Don't forget the final event! | ||
23 | if previous is not None: | ||
24 | for token in self.tokens(previous, None): | ||
25 | yield token | ||
26 | |||
27 | def tokens(self, event, next): | ||
28 | kind, data, _ = event | ||
29 | if kind == START: | ||
30 | tag, attribs = data | ||
31 | name = tag.localname | ||
32 | namespace = tag.namespace | ||
33 | converted_attribs = {} | ||
34 | for k, v in attribs: | ||
35 | if isinstance(k, QName): | ||
36 | converted_attribs[(k.namespace, k.localname)] = v | ||
37 | else: | ||
38 | converted_attribs[(None, k)] = v | ||
39 | |||
40 | if namespace == namespaces["html"] and name in voidElements: | ||
41 | for token in self.emptyTag(namespace, name, converted_attribs, | ||
42 | not next or next[0] != END or | ||
43 | next[1] != tag): | ||
44 | yield token | ||
45 | else: | ||
46 | yield self.startTag(namespace, name, converted_attribs) | ||
47 | |||
48 | elif kind == END: | ||
49 | name = data.localname | ||
50 | namespace = data.namespace | ||
51 | if namespace != namespaces["html"] or name not in voidElements: | ||
52 | yield self.endTag(namespace, name) | ||
53 | |||
54 | elif kind == COMMENT: | ||
55 | yield self.comment(data) | ||
56 | |||
57 | elif kind == TEXT: | ||
58 | for token in self.text(data): | ||
59 | yield token | ||
60 | |||
61 | elif kind == DOCTYPE: | ||
62 | yield self.doctype(*data) | ||
63 | |||
64 | elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, | ||
65 | START_CDATA, END_CDATA, PI): | ||
66 | pass | ||
67 | |||
68 | else: | ||
69 | yield self.unknown(kind) | ||