From 68df54d6629ec019142eb149dd037774f2d11e7c Mon Sep 17 00:00:00 2001 From: Shubham Saini Date: Tue, 11 Dec 2018 15:31:23 +0530 Subject: First commit --- .../pip/_vendor/html5lib/treewalkers/__init__.py | 154 +++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py') diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py new file mode 100644 index 0000000..31a173d --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py @@ -0,0 +1,154 @@ +"""A collection of modules for iterating through different kinds of +tree, generating tokens identical to those produced by the tokenizer +module. + +To create a tree walker for a new type of tree, you need to do +implement a tree walker object (called TreeWalker by convention) that +implements a 'serialize' method taking a tree as sole argument and +returning an iterator generating tokens. +""" + +from __future__ import absolute_import, division, unicode_literals + +from .. import constants +from .._utils import default_etree + +__all__ = ["getTreeWalker", "pprint"] + +treeWalkerCache = {} + + +def getTreeWalker(treeType, implementation=None, **kwargs): + """Get a TreeWalker class for various types of tree with built-in support + + :arg str treeType: the name of the tree type required (case-insensitive). + Supported values are: + + * "dom": The xml.dom.minidom DOM implementation + * "etree": A generic walker for tree implementations exposing an + elementtree-like interface (known to work with ElementTree, + cElementTree and lxml.etree). + * "lxml": Optimized walker for lxml.etree + * "genshi": a Genshi stream + + :arg implementation: A module implementing the tree type e.g. + xml.etree.ElementTree or cElementTree (Currently applies to the "etree" + tree type only). + + :arg kwargs: keyword arguments passed to the etree walker--for other + walkers, this has no effect + + :returns: a TreeWalker class + + """ + + treeType = treeType.lower() + if treeType not in treeWalkerCache: + if treeType == "dom": + from . import dom + treeWalkerCache[treeType] = dom.TreeWalker + elif treeType == "genshi": + from . import genshi + treeWalkerCache[treeType] = genshi.TreeWalker + elif treeType == "lxml": + from . import etree_lxml + treeWalkerCache[treeType] = etree_lxml.TreeWalker + elif treeType == "etree": + from . import etree + if implementation is None: + implementation = default_etree + # XXX: NEVER cache here, caching is done in the etree submodule + return etree.getETreeModule(implementation, **kwargs).TreeWalker + return treeWalkerCache.get(treeType) + + +def concatenateCharacterTokens(tokens): + pendingCharacters = [] + for token in tokens: + type = token["type"] + if type in ("Characters", "SpaceCharacters"): + pendingCharacters.append(token["data"]) + else: + if pendingCharacters: + yield {"type": "Characters", "data": "".join(pendingCharacters)} + pendingCharacters = [] + yield token + if pendingCharacters: + yield {"type": "Characters", "data": "".join(pendingCharacters)} + + +def pprint(walker): + """Pretty printer for tree walkers + + Takes a TreeWalker instance and pretty prints the output of walking the tree. + + :arg walker: a TreeWalker instance + + """ + output = [] + indent = 0 + for token in concatenateCharacterTokens(walker): + type = token["type"] + if type in ("StartTag", "EmptyTag"): + # tag name + if token["namespace"] and token["namespace"] != constants.namespaces["html"]: + if token["namespace"] in constants.prefixes: + ns = constants.prefixes[token["namespace"]] + else: + ns = token["namespace"] + name = "%s %s" % (ns, token["name"]) + else: + name = token["name"] + output.append("%s<%s>" % (" " * indent, name)) + indent += 2 + # attributes (sorted for consistent ordering) + attrs = token["data"] + for (namespace, localname), value in sorted(attrs.items()): + if namespace: + if namespace in constants.prefixes: + ns = constants.prefixes[namespace] + else: + ns = namespace + name = "%s %s" % (ns, localname) + else: + name = localname + output.append("%s%s=\"%s\"" % (" " * indent, name, value)) + # self-closing + if type == "EmptyTag": + indent -= 2 + + elif type == "EndTag": + indent -= 2 + + elif type == "Comment": + output.append("%s" % (" " * indent, token["data"])) + + elif type == "Doctype": + if token["name"]: + if token["publicId"]: + output.append("""%s""" % + (" " * indent, + token["name"], + token["publicId"], + token["systemId"] if token["systemId"] else "")) + elif token["systemId"]: + output.append("""%s""" % + (" " * indent, + token["name"], + token["systemId"])) + else: + output.append("%s" % (" " * indent, + token["name"])) + else: + output.append("%s" % (" " * indent,)) + + elif type == "Characters": + output.append("%s\"%s\"" % (" " * indent, token["data"])) + + elif type == "SpaceCharacters": + assert False, "concatenateCharacterTokens should have got rid of all Space tokens" + + else: + raise ValueError("Unknown token type, %s" % type) + + return "\n".join(output) -- cgit v1.2.3