summaryrefslogtreecommitdiff
path: root/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py
diff options
context:
space:
mode:
authorShubham Saini <shubham6405@gmail.com>2018-12-11 10:01:23 +0000
committerShubham Saini <shubham6405@gmail.com>2018-12-11 10:01:23 +0000
commit68df54d6629ec019142eb149dd037774f2d11e7c (patch)
tree345bc22d46b4e01a4ba8303b94278952a4ed2b9e /venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py
First commit
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py')
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py154
1 files changed, 154 insertions, 0 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py
new file mode 100644
index 0000000..31a173d
--- /dev/null
+++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py
@@ -0,0 +1,154 @@
1"""A collection of modules for iterating through different kinds of
2tree, generating tokens identical to those produced by the tokenizer
3module.
4
5To create a tree walker for a new type of tree, you need to do
6implement a tree walker object (called TreeWalker by convention) that
7implements a 'serialize' method taking a tree as sole argument and
8returning an iterator generating tokens.
9"""
10
11from __future__ import absolute_import, division, unicode_literals
12
13from .. import constants
14from .._utils import default_etree
15
16__all__ = ["getTreeWalker", "pprint"]
17
18treeWalkerCache = {}
19
20
21def getTreeWalker(treeType, implementation=None, **kwargs):
22 """Get a TreeWalker class for various types of tree with built-in support
23
24 :arg str treeType: the name of the tree type required (case-insensitive).
25 Supported values are:
26
27 * "dom": The xml.dom.minidom DOM implementation
28 * "etree": A generic walker for tree implementations exposing an
29 elementtree-like interface (known to work with ElementTree,
30 cElementTree and lxml.etree).
31 * "lxml": Optimized walker for lxml.etree
32 * "genshi": a Genshi stream
33
34 :arg implementation: A module implementing the tree type e.g.
35 xml.etree.ElementTree or cElementTree (Currently applies to the "etree"
36 tree type only).
37
38 :arg kwargs: keyword arguments passed to the etree walker--for other
39 walkers, this has no effect
40
41 :returns: a TreeWalker class
42
43 """
44
45 treeType = treeType.lower()
46 if treeType not in treeWalkerCache:
47 if treeType == "dom":
48 from . import dom
49 treeWalkerCache[treeType] = dom.TreeWalker
50 elif treeType == "genshi":
51 from . import genshi
52 treeWalkerCache[treeType] = genshi.TreeWalker
53 elif treeType == "lxml":
54 from . import etree_lxml
55 treeWalkerCache[treeType] = etree_lxml.TreeWalker
56 elif treeType == "etree":
57 from . import etree
58 if implementation is None:
59 implementation = default_etree
60 # XXX: NEVER cache here, caching is done in the etree submodule
61 return etree.getETreeModule(implementation, **kwargs).TreeWalker
62 return treeWalkerCache.get(treeType)
63
64
65def concatenateCharacterTokens(tokens):
66 pendingCharacters = []
67 for token in tokens:
68 type = token["type"]
69 if type in ("Characters", "SpaceCharacters"):
70 pendingCharacters.append(token["data"])
71 else:
72 if pendingCharacters:
73 yield {"type": "Characters", "data": "".join(pendingCharacters)}
74 pendingCharacters = []
75 yield token
76 if pendingCharacters:
77 yield {"type": "Characters", "data": "".join(pendingCharacters)}
78
79
80def pprint(walker):
81 """Pretty printer for tree walkers
82
83 Takes a TreeWalker instance and pretty prints the output of walking the tree.
84
85 :arg walker: a TreeWalker instance
86
87 """
88 output = []
89 indent = 0
90 for token in concatenateCharacterTokens(walker):
91 type = token["type"]
92 if type in ("StartTag", "EmptyTag"):
93 # tag name
94 if token["namespace"] and token["namespace"] != constants.namespaces["html"]:
95 if token["namespace"] in constants.prefixes:
96 ns = constants.prefixes[token["namespace"]]
97 else:
98 ns = token["namespace"]
99 name = "%s %s" % (ns, token["name"])
100 else:
101 name = token["name"]
102 output.append("%s<%s>" % (" " * indent, name))
103 indent += 2
104 # attributes (sorted for consistent ordering)
105 attrs = token["data"]
106 for (namespace, localname), value in sorted(attrs.items()):
107 if namespace:
108 if namespace in constants.prefixes:
109 ns = constants.prefixes[namespace]
110 else:
111 ns = namespace
112 name = "%s %s" % (ns, localname)
113 else:
114 name = localname
115 output.append("%s%s=\"%s\"" % (" " * indent, name, value))
116 # self-closing
117 if type == "EmptyTag":
118 indent -= 2
119
120 elif type == "EndTag":
121 indent -= 2
122
123 elif type == "Comment":
124 output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
125
126 elif type == "Doctype":
127 if token["name"]:
128 if token["publicId"]:
129 output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
130 (" " * indent,
131 token["name"],
132 token["publicId"],
133 token["systemId"] if token["systemId"] else ""))
134 elif token["systemId"]:
135 output.append("""%s<!DOCTYPE %s "" "%s">""" %
136 (" " * indent,
137 token["name"],
138 token["systemId"]))
139 else:
140 output.append("%s<!DOCTYPE %s>" % (" " * indent,
141 token["name"]))
142 else:
143 output.append("%s<!DOCTYPE >" % (" " * indent,))
144
145 elif type == "Characters":
146 output.append("%s\"%s\"" % (" " * indent, token["data"]))
147
148 elif type == "SpaceCharacters":
149 assert False, "concatenateCharacterTokens should have got rid of all Space tokens"
150
151 else:
152 raise ValueError("Unknown token type, %s" % type)
153
154 return "\n".join(output)