diff options
author | Shubham Saini <shubham6405@gmail.com> | 2018-12-11 10:01:23 +0000 |
---|---|---|
committer | Shubham Saini <shubham6405@gmail.com> | 2018-12-11 10:01:23 +0000 |
commit | 68df54d6629ec019142eb149dd037774f2d11e7c (patch) | |
tree | 345bc22d46b4e01a4ba8303b94278952a4ed2b9e /venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py |
First commit
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py')
-rw-r--r-- | venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py new file mode 100644 index 0000000..31a173d --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py | |||
@@ -0,0 +1,154 @@ | |||
1 | """A collection of modules for iterating through different kinds of | ||
2 | tree, generating tokens identical to those produced by the tokenizer | ||
3 | module. | ||
4 | |||
5 | To create a tree walker for a new type of tree, you need to do | ||
6 | implement a tree walker object (called TreeWalker by convention) that | ||
7 | implements a 'serialize' method taking a tree as sole argument and | ||
8 | returning an iterator generating tokens. | ||
9 | """ | ||
10 | |||
11 | from __future__ import absolute_import, division, unicode_literals | ||
12 | |||
13 | from .. import constants | ||
14 | from .._utils import default_etree | ||
15 | |||
16 | __all__ = ["getTreeWalker", "pprint"] | ||
17 | |||
18 | treeWalkerCache = {} | ||
19 | |||
20 | |||
21 | def getTreeWalker(treeType, implementation=None, **kwargs): | ||
22 | """Get a TreeWalker class for various types of tree with built-in support | ||
23 | |||
24 | :arg str treeType: the name of the tree type required (case-insensitive). | ||
25 | Supported values are: | ||
26 | |||
27 | * "dom": The xml.dom.minidom DOM implementation | ||
28 | * "etree": A generic walker for tree implementations exposing an | ||
29 | elementtree-like interface (known to work with ElementTree, | ||
30 | cElementTree and lxml.etree). | ||
31 | * "lxml": Optimized walker for lxml.etree | ||
32 | * "genshi": a Genshi stream | ||
33 | |||
34 | :arg implementation: A module implementing the tree type e.g. | ||
35 | xml.etree.ElementTree or cElementTree (Currently applies to the "etree" | ||
36 | tree type only). | ||
37 | |||
38 | :arg kwargs: keyword arguments passed to the etree walker--for other | ||
39 | walkers, this has no effect | ||
40 | |||
41 | :returns: a TreeWalker class | ||
42 | |||
43 | """ | ||
44 | |||
45 | treeType = treeType.lower() | ||
46 | if treeType not in treeWalkerCache: | ||
47 | if treeType == "dom": | ||
48 | from . import dom | ||
49 | treeWalkerCache[treeType] = dom.TreeWalker | ||
50 | elif treeType == "genshi": | ||
51 | from . import genshi | ||
52 | treeWalkerCache[treeType] = genshi.TreeWalker | ||
53 | elif treeType == "lxml": | ||
54 | from . import etree_lxml | ||
55 | treeWalkerCache[treeType] = etree_lxml.TreeWalker | ||
56 | elif treeType == "etree": | ||
57 | from . import etree | ||
58 | if implementation is None: | ||
59 | implementation = default_etree | ||
60 | # XXX: NEVER cache here, caching is done in the etree submodule | ||
61 | return etree.getETreeModule(implementation, **kwargs).TreeWalker | ||
62 | return treeWalkerCache.get(treeType) | ||
63 | |||
64 | |||
65 | def concatenateCharacterTokens(tokens): | ||
66 | pendingCharacters = [] | ||
67 | for token in tokens: | ||
68 | type = token["type"] | ||
69 | if type in ("Characters", "SpaceCharacters"): | ||
70 | pendingCharacters.append(token["data"]) | ||
71 | else: | ||
72 | if pendingCharacters: | ||
73 | yield {"type": "Characters", "data": "".join(pendingCharacters)} | ||
74 | pendingCharacters = [] | ||
75 | yield token | ||
76 | if pendingCharacters: | ||
77 | yield {"type": "Characters", "data": "".join(pendingCharacters)} | ||
78 | |||
79 | |||
80 | def pprint(walker): | ||
81 | """Pretty printer for tree walkers | ||
82 | |||
83 | Takes a TreeWalker instance and pretty prints the output of walking the tree. | ||
84 | |||
85 | :arg walker: a TreeWalker instance | ||
86 | |||
87 | """ | ||
88 | output = [] | ||
89 | indent = 0 | ||
90 | for token in concatenateCharacterTokens(walker): | ||
91 | type = token["type"] | ||
92 | if type in ("StartTag", "EmptyTag"): | ||
93 | # tag name | ||
94 | if token["namespace"] and token["namespace"] != constants.namespaces["html"]: | ||
95 | if token["namespace"] in constants.prefixes: | ||
96 | ns = constants.prefixes[token["namespace"]] | ||
97 | else: | ||
98 | ns = token["namespace"] | ||
99 | name = "%s %s" % (ns, token["name"]) | ||
100 | else: | ||
101 | name = token["name"] | ||
102 | output.append("%s<%s>" % (" " * indent, name)) | ||
103 | indent += 2 | ||
104 | # attributes (sorted for consistent ordering) | ||
105 | attrs = token["data"] | ||
106 | for (namespace, localname), value in sorted(attrs.items()): | ||
107 | if namespace: | ||
108 | if namespace in constants.prefixes: | ||
109 | ns = constants.prefixes[namespace] | ||
110 | else: | ||
111 | ns = namespace | ||
112 | name = "%s %s" % (ns, localname) | ||
113 | else: | ||
114 | name = localname | ||
115 | output.append("%s%s=\"%s\"" % (" " * indent, name, value)) | ||
116 | # self-closing | ||
117 | if type == "EmptyTag": | ||
118 | indent -= 2 | ||
119 | |||
120 | elif type == "EndTag": | ||
121 | indent -= 2 | ||
122 | |||
123 | elif type == "Comment": | ||
124 | output.append("%s<!-- %s -->" % (" " * indent, token["data"])) | ||
125 | |||
126 | elif type == "Doctype": | ||
127 | if token["name"]: | ||
128 | if token["publicId"]: | ||
129 | output.append("""%s<!DOCTYPE %s "%s" "%s">""" % | ||
130 | (" " * indent, | ||
131 | token["name"], | ||
132 | token["publicId"], | ||
133 | token["systemId"] if token["systemId"] else "")) | ||
134 | elif token["systemId"]: | ||
135 | output.append("""%s<!DOCTYPE %s "" "%s">""" % | ||
136 | (" " * indent, | ||
137 | token["name"], | ||
138 | token["systemId"])) | ||
139 | else: | ||
140 | output.append("%s<!DOCTYPE %s>" % (" " * indent, | ||
141 | token["name"])) | ||
142 | else: | ||
143 | output.append("%s<!DOCTYPE >" % (" " * indent,)) | ||
144 | |||
145 | elif type == "Characters": | ||
146 | output.append("%s\"%s\"" % (" " * indent, token["data"])) | ||
147 | |||
148 | elif type == "SpaceCharacters": | ||
149 | assert False, "concatenateCharacterTokens should have got rid of all Space tokens" | ||
150 | |||
151 | else: | ||
152 | raise ValueError("Unknown token type, %s" % type) | ||
153 | |||
154 | return "\n".join(output) | ||