From 227b2d30a8675b44918f9d9ca89b24144a938215 Mon Sep 17 00:00:00 2001 From: Shubham Saini Date: Mon, 5 Aug 2019 14:02:33 +0530 Subject: removing venv files --- .../pip/_vendor/html5lib/__init__.py | 35 - .../pip/_vendor/html5lib/_ihatexml.py | 288 -- .../pip/_vendor/html5lib/_inputstream.py | 923 ------ .../pip/_vendor/html5lib/_tokenizer.py | 1721 ------------ .../pip/_vendor/html5lib/_trie/__init__.py | 14 - .../pip/_vendor/html5lib/_trie/_base.py | 37 - .../pip/_vendor/html5lib/_trie/datrie.py | 44 - .../pip/_vendor/html5lib/_trie/py.py | 67 - .../pip/_vendor/html5lib/_utils.py | 124 - .../pip/_vendor/html5lib/constants.py | 2947 -------------------- .../pip/_vendor/html5lib/filters/__init__.py | 0 .../html5lib/filters/alphabeticalattributes.py | 29 - .../pip/_vendor/html5lib/filters/base.py | 12 - .../html5lib/filters/inject_meta_charset.py | 73 - .../pip/_vendor/html5lib/filters/lint.py | 93 - .../pip/_vendor/html5lib/filters/optionaltags.py | 207 -- .../pip/_vendor/html5lib/filters/sanitizer.py | 896 ------ .../pip/_vendor/html5lib/filters/whitespace.py | 38 - .../pip/_vendor/html5lib/html5parser.py | 2791 ------------------ .../pip/_vendor/html5lib/serializer.py | 409 --- .../pip/_vendor/html5lib/treeadapters/__init__.py | 30 - .../pip/_vendor/html5lib/treeadapters/genshi.py | 54 - .../pip/_vendor/html5lib/treeadapters/sax.py | 50 - .../pip/_vendor/html5lib/treebuilders/__init__.py | 88 - .../pip/_vendor/html5lib/treebuilders/base.py | 417 --- .../pip/_vendor/html5lib/treebuilders/dom.py | 236 -- .../pip/_vendor/html5lib/treebuilders/etree.py | 340 --- .../_vendor/html5lib/treebuilders/etree_lxml.py | 366 --- .../pip/_vendor/html5lib/treewalkers/__init__.py | 154 - .../pip/_vendor/html5lib/treewalkers/base.py | 252 -- .../pip/_vendor/html5lib/treewalkers/dom.py | 43 - .../pip/_vendor/html5lib/treewalkers/etree.py | 130 - .../pip/_vendor/html5lib/treewalkers/etree_lxml.py | 213 -- .../pip/_vendor/html5lib/treewalkers/genshi.py | 69 - 34 files changed, 13190 deletions(-) delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/__init__.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_ihatexml.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_inputstream.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_tokenizer.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/__init__.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/_base.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/datrie.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/py.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_utils.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/constants.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/__init__.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/alphabeticalattributes.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/base.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/inject_meta_charset.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/lint.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/whitespace.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/html5parser.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/serializer.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/__init__.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/genshi.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/sax.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/__init__.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/base.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/dom.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree_lxml.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/base.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/dom.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree_lxml.py delete mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/genshi.py (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib') diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/__init__.py deleted file mode 100644 index 0b54002..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -""" -HTML parsing library based on the `WHATWG HTML specification -`_. The parser is designed to be compatible with -existing HTML found in the wild and implements well-defined error recovery that -is largely compatible with modern desktop web browsers. - -Example usage:: - - from pip._vendor import html5lib - with open("my_document.html", "rb") as f: - tree = html5lib.parse(f) - -For convenience, this module re-exports the following names: - -* :func:`~.html5parser.parse` -* :func:`~.html5parser.parseFragment` -* :class:`~.html5parser.HTMLParser` -* :func:`~.treebuilders.getTreeBuilder` -* :func:`~.treewalkers.getTreeWalker` -* :func:`~.serializer.serialize` -""" - -from __future__ import absolute_import, division, unicode_literals - -from .html5parser import HTMLParser, parse, parseFragment -from .treebuilders import getTreeBuilder -from .treewalkers import getTreeWalker -from .serializer import serialize - -__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", - "getTreeWalker", "serialize"] - -# this has to be at the top level, see how setup.py parses this -#: Distribution version number. -__version__ = "1.0.1" diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_ihatexml.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_ihatexml.py deleted file mode 100644 index 68f9b1e..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_ihatexml.py +++ /dev/null @@ -1,288 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import re -import warnings - -from .constants import DataLossWarning - -baseChar = """ -[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | -[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | -[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | -[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | -[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | -[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | -[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | -[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | -[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | -[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | -[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | -[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | -[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | -[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | -[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | -[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | -[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | -[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | -[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | -[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | -[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | -[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | -[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | -[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | -[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | -[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | -[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | -[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | -[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | -[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | -#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | -#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | -#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | -[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | -[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | -#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | -[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | -[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | -[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | -[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | -[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | -#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | -[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | -[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | -[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | -[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]""" - -ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]""" - -combiningCharacter = """ -[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | -[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | -[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | -[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | -#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | -[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | -[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | -#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | -[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | -[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | -#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | -[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | -[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | -[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | -[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | -[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | -#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | -[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | -#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | -[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | -[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | -#x3099 | #x309A""" - -digit = """ -[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | -[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | -[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | -[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]""" - -extender = """ -#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | -#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]""" - -letter = " | ".join([baseChar, ideographic]) - -# Without the -name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter, - extender]) -nameFirst = " | ".join([letter, "_"]) - -reChar = re.compile(r"#x([\d|A-F]{4,4})") -reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]") - - -def charStringToList(chars): - charRanges = [item.strip() for item in chars.split(" | ")] - rv = [] - for item in charRanges: - foundMatch = False - for regexp in (reChar, reCharRange): - match = regexp.match(item) - if match is not None: - rv.append([hexToInt(item) for item in match.groups()]) - if len(rv[-1]) == 1: - rv[-1] = rv[-1] * 2 - foundMatch = True - break - if not foundMatch: - assert len(item) == 1 - - rv.append([ord(item)] * 2) - rv = normaliseCharList(rv) - return rv - - -def normaliseCharList(charList): - charList = sorted(charList) - for item in charList: - assert item[1] >= item[0] - rv = [] - i = 0 - while i < len(charList): - j = 1 - rv.append(charList[i]) - while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1: - rv[-1][1] = charList[i + j][1] - j += 1 - i += j - return rv - -# We don't really support characters above the BMP :( -max_unicode = int("FFFF", 16) - - -def missingRanges(charList): - rv = [] - if charList[0] != 0: - rv.append([0, charList[0][0] - 1]) - for i, item in enumerate(charList[:-1]): - rv.append([item[1] + 1, charList[i + 1][0] - 1]) - if charList[-1][1] != max_unicode: - rv.append([charList[-1][1] + 1, max_unicode]) - return rv - - -def listToRegexpStr(charList): - rv = [] - for item in charList: - if item[0] == item[1]: - rv.append(escapeRegexp(chr(item[0]))) - else: - rv.append(escapeRegexp(chr(item[0])) + "-" + - escapeRegexp(chr(item[1]))) - return "[%s]" % "".join(rv) - - -def hexToInt(hex_str): - return int(hex_str, 16) - - -def escapeRegexp(string): - specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}", - "[", "]", "|", "(", ")", "-") - for char in specialCharacters: - string = string.replace(char, "\\" + char) - - return string - -# output from the above -nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa - -nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa - -# Simpler things -nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]") - - -class InfosetFilter(object): - replacementRegexp = re.compile(r"U[\dA-F]{5,5}") - - def __init__(self, - dropXmlnsLocalName=False, - dropXmlnsAttrNs=False, - preventDoubleDashComments=False, - preventDashAtCommentEnd=False, - replaceFormFeedCharacters=True, - preventSingleQuotePubid=False): - - self.dropXmlnsLocalName = dropXmlnsLocalName - self.dropXmlnsAttrNs = dropXmlnsAttrNs - - self.preventDoubleDashComments = preventDoubleDashComments - self.preventDashAtCommentEnd = preventDashAtCommentEnd - - self.replaceFormFeedCharacters = replaceFormFeedCharacters - - self.preventSingleQuotePubid = preventSingleQuotePubid - - self.replaceCache = {} - - def coerceAttribute(self, name, namespace=None): - if self.dropXmlnsLocalName and name.startswith("xmlns:"): - warnings.warn("Attributes cannot begin with xmlns", DataLossWarning) - return None - elif (self.dropXmlnsAttrNs and - namespace == "http://www.w3.org/2000/xmlns/"): - warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning) - return None - else: - return self.toXmlName(name) - - def coerceElement(self, name): - return self.toXmlName(name) - - def coerceComment(self, data): - if self.preventDoubleDashComments: - while "--" in data: - warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) - data = data.replace("--", "- -") - if data.endswith("-"): - warnings.warn("Comments cannot end in a dash", DataLossWarning) - data += " " - return data - - def coerceCharacters(self, data): - if self.replaceFormFeedCharacters: - for _ in range(data.count("\x0C")): - warnings.warn("Text cannot contain U+000C", DataLossWarning) - data = data.replace("\x0C", " ") - # Other non-xml characters - return data - - def coercePubid(self, data): - dataOutput = data - for char in nonPubidCharRegexp.findall(data): - warnings.warn("Coercing non-XML pubid", DataLossWarning) - replacement = self.getReplacementCharacter(char) - dataOutput = dataOutput.replace(char, replacement) - if self.preventSingleQuotePubid and dataOutput.find("'") >= 0: - warnings.warn("Pubid cannot contain single quote", DataLossWarning) - dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'")) - return dataOutput - - def toXmlName(self, name): - nameFirst = name[0] - nameRest = name[1:] - m = nonXmlNameFirstBMPRegexp.match(nameFirst) - if m: - warnings.warn("Coercing non-XML name", DataLossWarning) - nameFirstOutput = self.getReplacementCharacter(nameFirst) - else: - nameFirstOutput = nameFirst - - nameRestOutput = nameRest - replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest)) - for char in replaceChars: - warnings.warn("Coercing non-XML name", DataLossWarning) - replacement = self.getReplacementCharacter(char) - nameRestOutput = nameRestOutput.replace(char, replacement) - return nameFirstOutput + nameRestOutput - - def getReplacementCharacter(self, char): - if char in self.replaceCache: - replacement = self.replaceCache[char] - else: - replacement = self.escapeChar(char) - return replacement - - def fromXmlName(self, name): - for item in set(self.replacementRegexp.findall(name)): - name = name.replace(item, self.unescapeChar(item)) - return name - - def escapeChar(self, char): - replacement = "U%05X" % ord(char) - self.replaceCache[char] = replacement - return replacement - - def unescapeChar(self, charcode): - return chr(int(charcode[1:], 16)) diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_inputstream.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_inputstream.py deleted file mode 100644 index 21c6bbc..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_inputstream.py +++ /dev/null @@ -1,923 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from pip._vendor.six import text_type, binary_type -from pip._vendor.six.moves import http_client, urllib - -import codecs -import re - -from pip._vendor import webencodings - -from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase -from .constants import _ReparseException -from . import _utils - -from io import StringIO - -try: - from io import BytesIO -except ImportError: - BytesIO = StringIO - -# Non-unicode versions of constants for use in the pre-parser -spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters]) -asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters]) -asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase]) -spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) - - -invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa - -if _utils.supports_lone_surrogates: - # Use one extra step of indirection and create surrogates with - # eval. Not using this indirection would introduce an illegal - # unicode literal on platforms not supporting such lone - # surrogates. - assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1 - invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] + - eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used - "]") -else: - invalid_unicode_re = re.compile(invalid_unicode_no_surrogate) - -non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, - 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, - 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, - 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, - 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, - 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, - 0x10FFFE, 0x10FFFF]) - -ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]") - -# Cache for charsUntil() -charsUntilRegEx = {} - - -class BufferedStream(object): - """Buffering for streams that do not have buffering of their own - - The buffer is implemented as a list of chunks on the assumption that - joining many strings will be slow since it is O(n**2) - """ - - def __init__(self, stream): - self.stream = stream - self.buffer = [] - self.position = [-1, 0] # chunk number, offset - - def tell(self): - pos = 0 - for chunk in self.buffer[:self.position[0]]: - pos += len(chunk) - pos += self.position[1] - return pos - - def seek(self, pos): - assert pos <= self._bufferedBytes() - offset = pos - i = 0 - while len(self.buffer[i]) < offset: - offset -= len(self.buffer[i]) - i += 1 - self.position = [i, offset] - - def read(self, bytes): - if not self.buffer: - return self._readStream(bytes) - elif (self.position[0] == len(self.buffer) and - self.position[1] == len(self.buffer[-1])): - return self._readStream(bytes) - else: - return self._readFromBuffer(bytes) - - def _bufferedBytes(self): - return sum([len(item) for item in self.buffer]) - - def _readStream(self, bytes): - data = self.stream.read(bytes) - self.buffer.append(data) - self.position[0] += 1 - self.position[1] = len(data) - return data - - def _readFromBuffer(self, bytes): - remainingBytes = bytes - rv = [] - bufferIndex = self.position[0] - bufferOffset = self.position[1] - while bufferIndex < len(self.buffer) and remainingBytes != 0: - assert remainingBytes > 0 - bufferedData = self.buffer[bufferIndex] - - if remainingBytes <= len(bufferedData) - bufferOffset: - bytesToRead = remainingBytes - self.position = [bufferIndex, bufferOffset + bytesToRead] - else: - bytesToRead = len(bufferedData) - bufferOffset - self.position = [bufferIndex, len(bufferedData)] - bufferIndex += 1 - rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead]) - remainingBytes -= bytesToRead - - bufferOffset = 0 - - if remainingBytes: - rv.append(self._readStream(remainingBytes)) - - return b"".join(rv) - - -def HTMLInputStream(source, **kwargs): - # Work around Python bug #20007: read(0) closes the connection. - # http://bugs.python.org/issue20007 - if (isinstance(source, http_client.HTTPResponse) or - # Also check for addinfourl wrapping HTTPResponse - (isinstance(source, urllib.response.addbase) and - isinstance(source.fp, http_client.HTTPResponse))): - isUnicode = False - elif hasattr(source, "read"): - isUnicode = isinstance(source.read(0), text_type) - else: - isUnicode = isinstance(source, text_type) - - if isUnicode: - encodings = [x for x in kwargs if x.endswith("_encoding")] - if encodings: - raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings) - - return HTMLUnicodeInputStream(source, **kwargs) - else: - return HTMLBinaryInputStream(source, **kwargs) - - -class HTMLUnicodeInputStream(object): - """Provides a unicode stream of characters to the HTMLTokenizer. - - This class takes care of character encoding and removing or replacing - incorrect byte-sequences and also provides column and line tracking. - - """ - - _defaultChunkSize = 10240 - - def __init__(self, source): - """Initialises the HTMLInputStream. - - HTMLInputStream(source, [encoding]) -> Normalized stream from source - for use by html5lib. - - source can be either a file-object, local filename or a string. - - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element) - - """ - - if not _utils.supports_lone_surrogates: - # Such platforms will have already checked for such - # surrogate errors, so no need to do this checking. - self.reportCharacterErrors = None - elif len("\U0010FFFF") == 1: - self.reportCharacterErrors = self.characterErrorsUCS4 - else: - self.reportCharacterErrors = self.characterErrorsUCS2 - - # List of where new lines occur - self.newLines = [0] - - self.charEncoding = (lookupEncoding("utf-8"), "certain") - self.dataStream = self.openStream(source) - - self.reset() - - def reset(self): - self.chunk = "" - self.chunkSize = 0 - self.chunkOffset = 0 - self.errors = [] - - # number of (complete) lines in previous chunks - self.prevNumLines = 0 - # number of columns in the last line of the previous chunk - self.prevNumCols = 0 - - # Deal with CR LF and surrogates split over chunk boundaries - self._bufferedCharacter = None - - def openStream(self, source): - """Produces a file object from source. - - source can be either a file object, local filename or a string. - - """ - # Already a file object - if hasattr(source, 'read'): - stream = source - else: - stream = StringIO(source) - - return stream - - def _position(self, offset): - chunk = self.chunk - nLines = chunk.count('\n', 0, offset) - positionLine = self.prevNumLines + nLines - lastLinePos = chunk.rfind('\n', 0, offset) - if lastLinePos == -1: - positionColumn = self.prevNumCols + offset - else: - positionColumn = offset - (lastLinePos + 1) - return (positionLine, positionColumn) - - def position(self): - """Returns (line, col) of the current position in the stream.""" - line, col = self._position(self.chunkOffset) - return (line + 1, col) - - def char(self): - """ Read one character from the stream or queue if available. Return - EOF when EOF is reached. - """ - # Read a new chunk from the input stream if necessary - if self.chunkOffset >= self.chunkSize: - if not self.readChunk(): - return EOF - - chunkOffset = self.chunkOffset - char = self.chunk[chunkOffset] - self.chunkOffset = chunkOffset + 1 - - return char - - def readChunk(self, chunkSize=None): - if chunkSize is None: - chunkSize = self._defaultChunkSize - - self.prevNumLines, self.prevNumCols = self._position(self.chunkSize) - - self.chunk = "" - self.chunkSize = 0 - self.chunkOffset = 0 - - data = self.dataStream.read(chunkSize) - - # Deal with CR LF and surrogates broken across chunks - if self._bufferedCharacter: - data = self._bufferedCharacter + data - self._bufferedCharacter = None - elif not data: - # We have no more data, bye-bye stream - return False - - if len(data) > 1: - lastv = ord(data[-1]) - if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF: - self._bufferedCharacter = data[-1] - data = data[:-1] - - if self.reportCharacterErrors: - self.reportCharacterErrors(data) - - # Replace invalid characters - data = data.replace("\r\n", "\n") - data = data.replace("\r", "\n") - - self.chunk = data - self.chunkSize = len(data) - - return True - - def characterErrorsUCS4(self, data): - for _ in range(len(invalid_unicode_re.findall(data))): - self.errors.append("invalid-codepoint") - - def characterErrorsUCS2(self, data): - # Someone picked the wrong compile option - # You lose - skip = False - for match in invalid_unicode_re.finditer(data): - if skip: - continue - codepoint = ord(match.group()) - pos = match.start() - # Pretty sure there should be endianness issues here - if _utils.isSurrogatePair(data[pos:pos + 2]): - # We have a surrogate pair! - char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2]) - if char_val in non_bmp_invalid_codepoints: - self.errors.append("invalid-codepoint") - skip = True - elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and - pos == len(data) - 1): - self.errors.append("invalid-codepoint") - else: - skip = False - self.errors.append("invalid-codepoint") - - def charsUntil(self, characters, opposite=False): - """ Returns a string of characters from the stream up to but not - including any character in 'characters' or EOF. 'characters' must be - a container that supports the 'in' method and iteration over its - characters. - """ - - # Use a cache of regexps to find the required characters - try: - chars = charsUntilRegEx[(characters, opposite)] - except KeyError: - if __debug__: - for c in characters: - assert(ord(c) < 128) - regex = "".join(["\\x%02x" % ord(c) for c in characters]) - if not opposite: - regex = "^%s" % regex - chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex) - - rv = [] - - while True: - # Find the longest matching prefix - m = chars.match(self.chunk, self.chunkOffset) - if m is None: - # If nothing matched, and it wasn't because we ran out of chunk, - # then stop - if self.chunkOffset != self.chunkSize: - break - else: - end = m.end() - # If not the whole chunk matched, return everything - # up to the part that didn't match - if end != self.chunkSize: - rv.append(self.chunk[self.chunkOffset:end]) - self.chunkOffset = end - break - # If the whole remainder of the chunk matched, - # use it all and read the next chunk - rv.append(self.chunk[self.chunkOffset:]) - if not self.readChunk(): - # Reached EOF - break - - r = "".join(rv) - return r - - def unget(self, char): - # Only one character is allowed to be ungotten at once - it must - # be consumed again before any further call to unget - if char is not None: - if self.chunkOffset == 0: - # unget is called quite rarely, so it's a good idea to do - # more work here if it saves a bit of work in the frequently - # called char and charsUntil. - # So, just prepend the ungotten character onto the current - # chunk: - self.chunk = char + self.chunk - self.chunkSize += 1 - else: - self.chunkOffset -= 1 - assert self.chunk[self.chunkOffset] == char - - -class HTMLBinaryInputStream(HTMLUnicodeInputStream): - """Provides a unicode stream of characters to the HTMLTokenizer. - - This class takes care of character encoding and removing or replacing - incorrect byte-sequences and also provides column and line tracking. - - """ - - def __init__(self, source, override_encoding=None, transport_encoding=None, - same_origin_parent_encoding=None, likely_encoding=None, - default_encoding="windows-1252", useChardet=True): - """Initialises the HTMLInputStream. - - HTMLInputStream(source, [encoding]) -> Normalized stream from source - for use by html5lib. - - source can be either a file-object, local filename or a string. - - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element) - - """ - # Raw Stream - for unicode objects this will encode to utf-8 and set - # self.charEncoding as appropriate - self.rawStream = self.openStream(source) - - HTMLUnicodeInputStream.__init__(self, self.rawStream) - - # Encoding Information - # Number of bytes to use when looking for a meta element with - # encoding information - self.numBytesMeta = 1024 - # Number of bytes to use when using detecting encoding using chardet - self.numBytesChardet = 100 - # Things from args - self.override_encoding = override_encoding - self.transport_encoding = transport_encoding - self.same_origin_parent_encoding = same_origin_parent_encoding - self.likely_encoding = likely_encoding - self.default_encoding = default_encoding - - # Determine encoding - self.charEncoding = self.determineEncoding(useChardet) - assert self.charEncoding[0] is not None - - # Call superclass - self.reset() - - def reset(self): - self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace') - HTMLUnicodeInputStream.reset(self) - - def openStream(self, source): - """Produces a file object from source. - - source can be either a file object, local filename or a string. - - """ - # Already a file object - if hasattr(source, 'read'): - stream = source - else: - stream = BytesIO(source) - - try: - stream.seek(stream.tell()) - except: # pylint:disable=bare-except - stream = BufferedStream(stream) - - return stream - - def determineEncoding(self, chardet=True): - # BOMs take precedence over everything - # This will also read past the BOM if present - charEncoding = self.detectBOM(), "certain" - if charEncoding[0] is not None: - return charEncoding - - # If we've been overriden, we've been overriden - charEncoding = lookupEncoding(self.override_encoding), "certain" - if charEncoding[0] is not None: - return charEncoding - - # Now check the transport layer - charEncoding = lookupEncoding(self.transport_encoding), "certain" - if charEncoding[0] is not None: - return charEncoding - - # Look for meta elements with encoding information - charEncoding = self.detectEncodingMeta(), "tentative" - if charEncoding[0] is not None: - return charEncoding - - # Parent document encoding - charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative" - if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"): - return charEncoding - - # "likely" encoding - charEncoding = lookupEncoding(self.likely_encoding), "tentative" - if charEncoding[0] is not None: - return charEncoding - - # Guess with chardet, if available - if chardet: - try: - from pip._vendor.chardet.universaldetector import UniversalDetector - except ImportError: - pass - else: - buffers = [] - detector = UniversalDetector() - while not detector.done: - buffer = self.rawStream.read(self.numBytesChardet) - assert isinstance(buffer, bytes) - if not buffer: - break - buffers.append(buffer) - detector.feed(buffer) - detector.close() - encoding = lookupEncoding(detector.result['encoding']) - self.rawStream.seek(0) - if encoding is not None: - return encoding, "tentative" - - # Try the default encoding - charEncoding = lookupEncoding(self.default_encoding), "tentative" - if charEncoding[0] is not None: - return charEncoding - - # Fallback to html5lib's default if even that hasn't worked - return lookupEncoding("windows-1252"), "tentative" - - def changeEncoding(self, newEncoding): - assert self.charEncoding[1] != "certain" - newEncoding = lookupEncoding(newEncoding) - if newEncoding is None: - return - if newEncoding.name in ("utf-16be", "utf-16le"): - newEncoding = lookupEncoding("utf-8") - assert newEncoding is not None - elif newEncoding == self.charEncoding[0]: - self.charEncoding = (self.charEncoding[0], "certain") - else: - self.rawStream.seek(0) - self.charEncoding = (newEncoding, "certain") - self.reset() - raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding)) - - def detectBOM(self): - """Attempts to detect at BOM at the start of the stream. If - an encoding can be determined from the BOM return the name of the - encoding otherwise return None""" - bomDict = { - codecs.BOM_UTF8: 'utf-8', - codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be', - codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be' - } - - # Go to beginning of file and read in 4 bytes - string = self.rawStream.read(4) - assert isinstance(string, bytes) - - # Try detecting the BOM using bytes from the string - encoding = bomDict.get(string[:3]) # UTF-8 - seek = 3 - if not encoding: - # Need to detect UTF-32 before UTF-16 - encoding = bomDict.get(string) # UTF-32 - seek = 4 - if not encoding: - encoding = bomDict.get(string[:2]) # UTF-16 - seek = 2 - - # Set the read position past the BOM if one was found, otherwise - # set it to the start of the stream - if encoding: - self.rawStream.seek(seek) - return lookupEncoding(encoding) - else: - self.rawStream.seek(0) - return None - - def detectEncodingMeta(self): - """Report the encoding declared by the meta element - """ - buffer = self.rawStream.read(self.numBytesMeta) - assert isinstance(buffer, bytes) - parser = EncodingParser(buffer) - self.rawStream.seek(0) - encoding = parser.getEncoding() - - if encoding is not None and encoding.name in ("utf-16be", "utf-16le"): - encoding = lookupEncoding("utf-8") - - return encoding - - -class EncodingBytes(bytes): - """String-like object with an associated position and various extra methods - If the position is ever greater than the string length then an exception is - raised""" - def __new__(self, value): - assert isinstance(value, bytes) - return bytes.__new__(self, value.lower()) - - def __init__(self, value): - # pylint:disable=unused-argument - self._position = -1 - - def __iter__(self): - return self - - def __next__(self): - p = self._position = self._position + 1 - if p >= len(self): - raise StopIteration - elif p < 0: - raise TypeError - return self[p:p + 1] - - def next(self): - # Py2 compat - return self.__next__() - - def previous(self): - p = self._position - if p >= len(self): - raise StopIteration - elif p < 0: - raise TypeError - self._position = p = p - 1 - return self[p:p + 1] - - def setPosition(self, position): - if self._position >= len(self): - raise StopIteration - self._position = position - - def getPosition(self): - if self._position >= len(self): - raise StopIteration - if self._position >= 0: - return self._position - else: - return None - - position = property(getPosition, setPosition) - - def getCurrentByte(self): - return self[self.position:self.position + 1] - - currentByte = property(getCurrentByte) - - def skip(self, chars=spaceCharactersBytes): - """Skip past a list of characters""" - p = self.position # use property for the error-checking - while p < len(self): - c = self[p:p + 1] - if c not in chars: - self._position = p - return c - p += 1 - self._position = p - return None - - def skipUntil(self, chars): - p = self.position - while p < len(self): - c = self[p:p + 1] - if c in chars: - self._position = p - return c - p += 1 - self._position = p - return None - - def matchBytes(self, bytes): - """Look for a sequence of bytes at the start of a string. If the bytes - are found return True and advance the position to the byte after the - match. Otherwise return False and leave the position alone""" - p = self.position - data = self[p:p + len(bytes)] - rv = data.startswith(bytes) - if rv: - self.position += len(bytes) - return rv - - def jumpTo(self, bytes): - """Look for the next sequence of bytes matching a given sequence. If - a match is found advance the position to the last byte of the match""" - newPosition = self[self.position:].find(bytes) - if newPosition > -1: - # XXX: This is ugly, but I can't see a nicer way to fix this. - if self._position == -1: - self._position = 0 - self._position += (newPosition + len(bytes) - 1) - return True - else: - raise StopIteration - - -class EncodingParser(object): - """Mini parser for detecting character encoding from meta elements""" - - def __init__(self, data): - """string - the data to work on for encoding detection""" - self.data = EncodingBytes(data) - self.encoding = None - - def getEncoding(self): - methodDispatch = ( - (b"") - - def handleMeta(self): - if self.data.currentByte not in spaceCharactersBytes: - # if we have ") - - def getAttribute(self): - """Return a name,value pair for the next attribute in the stream, - if one is found, or None""" - data = self.data - # Step 1 (skip chars) - c = data.skip(spaceCharactersBytes | frozenset([b"/"])) - assert c is None or len(c) == 1 - # Step 2 - if c in (b">", None): - return None - # Step 3 - attrName = [] - attrValue = [] - # Step 4 attribute name - while True: - if c == b"=" and attrName: - break - elif c in spaceCharactersBytes: - # Step 6! - c = data.skip() - break - elif c in (b"/", b">"): - return b"".join(attrName), b"" - elif c in asciiUppercaseBytes: - attrName.append(c.lower()) - elif c is None: - return None - else: - attrName.append(c) - # Step 5 - c = next(data) - # Step 7 - if c != b"=": - data.previous() - return b"".join(attrName), b"" - # Step 8 - next(data) - # Step 9 - c = data.skip() - # Step 10 - if c in (b"'", b'"'): - # 10.1 - quoteChar = c - while True: - # 10.2 - c = next(data) - # 10.3 - if c == quoteChar: - next(data) - return b"".join(attrName), b"".join(attrValue) - # 10.4 - elif c in asciiUppercaseBytes: - attrValue.append(c.lower()) - # 10.5 - else: - attrValue.append(c) - elif c == b">": - return b"".join(attrName), b"" - elif c in asciiUppercaseBytes: - attrValue.append(c.lower()) - elif c is None: - return None - else: - attrValue.append(c) - # Step 11 - while True: - c = next(data) - if c in spacesAngleBrackets: - return b"".join(attrName), b"".join(attrValue) - elif c in asciiUppercaseBytes: - attrValue.append(c.lower()) - elif c is None: - return None - else: - attrValue.append(c) - - -class ContentAttrParser(object): - def __init__(self, data): - assert isinstance(data, bytes) - self.data = data - - def parse(self): - try: - # Check if the attr name is charset - # otherwise return - self.data.jumpTo(b"charset") - self.data.position += 1 - self.data.skip() - if not self.data.currentByte == b"=": - # If there is no = sign keep looking for attrs - return None - self.data.position += 1 - self.data.skip() - # Look for an encoding between matching quote marks - if self.data.currentByte in (b'"', b"'"): - quoteMark = self.data.currentByte - self.data.position += 1 - oldPosition = self.data.position - if self.data.jumpTo(quoteMark): - return self.data[oldPosition:self.data.position] - else: - return None - else: - # Unquoted value - oldPosition = self.data.position - try: - self.data.skipUntil(spaceCharactersBytes) - return self.data[oldPosition:self.data.position] - except StopIteration: - # Return the whole remaining value - return self.data[oldPosition:] - except StopIteration: - return None - - -def lookupEncoding(encoding): - """Return the python codec name corresponding to an encoding or None if the - string doesn't correspond to a valid encoding.""" - if isinstance(encoding, binary_type): - try: - encoding = encoding.decode("ascii") - except UnicodeDecodeError: - return None - - if encoding is not None: - try: - return webencodings.lookup(encoding) - except AttributeError: - return None - else: - return None diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_tokenizer.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_tokenizer.py deleted file mode 100644 index ef1ccf8..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_tokenizer.py +++ /dev/null @@ -1,1721 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from pip._vendor.six import unichr as chr - -from collections import deque - -from .constants import spaceCharacters -from .constants import entities -from .constants import asciiLetters, asciiUpper2Lower -from .constants import digits, hexDigits, EOF -from .constants import tokenTypes, tagTokenTypes -from .constants import replacementCharacters - -from ._inputstream import HTMLInputStream - -from ._trie import Trie - -entitiesTrie = Trie(entities) - - -class HTMLTokenizer(object): - """ This class takes care of tokenizing HTML. - - * self.currentToken - Holds the token that is currently being processed. - - * self.state - Holds a reference to the method to be invoked... XXX - - * self.stream - Points to HTMLInputStream object. - """ - - def __init__(self, stream, parser=None, **kwargs): - - self.stream = HTMLInputStream(stream, **kwargs) - self.parser = parser - - # Setup the initial tokenizer state - self.escapeFlag = False - self.lastFourChars = [] - self.state = self.dataState - self.escape = False - - # The current token being created - self.currentToken = None - super(HTMLTokenizer, self).__init__() - - def __iter__(self): - """ This is where the magic happens. - - We do our usually processing through the states and when we have a token - to return we yield the token which pauses processing until the next token - is requested. - """ - self.tokenQueue = deque([]) - # Start processing. When EOF is reached self.state will return False - # instead of True and the loop will terminate. - while self.state(): - while self.stream.errors: - yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)} - while self.tokenQueue: - yield self.tokenQueue.popleft() - - def consumeNumberEntity(self, isHex): - """This function returns either U+FFFD or the character based on the - decimal or hexadecimal representation. It also discards ";" if present. - If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked. - """ - - allowed = digits - radix = 10 - if isHex: - allowed = hexDigits - radix = 16 - - charStack = [] - - # Consume all the characters that are in range while making sure we - # don't hit an EOF. - c = self.stream.char() - while c in allowed and c is not EOF: - charStack.append(c) - c = self.stream.char() - - # Convert the set of characters consumed to an int. - charAsInt = int("".join(charStack), radix) - - # Certain characters get replaced with others - if charAsInt in replacementCharacters: - char = replacementCharacters[charAsInt] - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "illegal-codepoint-for-numeric-entity", - "datavars": {"charAsInt": charAsInt}}) - elif ((0xD800 <= charAsInt <= 0xDFFF) or - (charAsInt > 0x10FFFF)): - char = "\uFFFD" - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "illegal-codepoint-for-numeric-entity", - "datavars": {"charAsInt": charAsInt}}) - else: - # Should speed up this check somehow (e.g. move the set to a constant) - if ((0x0001 <= charAsInt <= 0x0008) or - (0x000E <= charAsInt <= 0x001F) or - (0x007F <= charAsInt <= 0x009F) or - (0xFDD0 <= charAsInt <= 0xFDEF) or - charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, - 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, - 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, - 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, - 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, - 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, - 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, - 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, - 0xFFFFF, 0x10FFFE, 0x10FFFF])): - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": - "illegal-codepoint-for-numeric-entity", - "datavars": {"charAsInt": charAsInt}}) - try: - # Try/except needed as UCS-2 Python builds' unichar only works - # within the BMP. - char = chr(charAsInt) - except ValueError: - v = charAsInt - 0x10000 - char = chr(0xD800 | (v >> 10)) + chr(0xDC00 | (v & 0x3FF)) - - # Discard the ; if present. Otherwise, put it back on the queue and - # invoke parseError on parser. - if c != ";": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "numeric-entity-without-semicolon"}) - self.stream.unget(c) - - return char - - def consumeEntity(self, allowedChar=None, fromAttribute=False): - # Initialise to the default output for when no entity is matched - output = "&" - - charStack = [self.stream.char()] - if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or - (allowedChar is not None and allowedChar == charStack[0])): - self.stream.unget(charStack[0]) - - elif charStack[0] == "#": - # Read the next character to see if it's hex or decimal - hex = False - charStack.append(self.stream.char()) - if charStack[-1] in ("x", "X"): - hex = True - charStack.append(self.stream.char()) - - # charStack[-1] should be the first digit - if (hex and charStack[-1] in hexDigits) \ - or (not hex and charStack[-1] in digits): - # At least one digit found, so consume the whole number - self.stream.unget(charStack[-1]) - output = self.consumeNumberEntity(hex) - else: - # No digits found - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "expected-numeric-entity"}) - self.stream.unget(charStack.pop()) - output = "&" + "".join(charStack) - - else: - # At this point in the process might have named entity. Entities - # are stored in the global variable "entities". - # - # Consume characters and compare to these to a substring of the - # entity names in the list until the substring no longer matches. - while (charStack[-1] is not EOF): - if not entitiesTrie.has_keys_with_prefix("".join(charStack)): - break - charStack.append(self.stream.char()) - - # At this point we have a string that starts with some characters - # that may match an entity - # Try to find the longest entity the string will match to take care - # of ¬i for instance. - try: - entityName = entitiesTrie.longest_prefix("".join(charStack[:-1])) - entityLength = len(entityName) - except KeyError: - entityName = None - - if entityName is not None: - if entityName[-1] != ";": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "named-entity-without-semicolon"}) - if (entityName[-1] != ";" and fromAttribute and - (charStack[entityLength] in asciiLetters or - charStack[entityLength] in digits or - charStack[entityLength] == "=")): - self.stream.unget(charStack.pop()) - output = "&" + "".join(charStack) - else: - output = entities[entityName] - self.stream.unget(charStack.pop()) - output += "".join(charStack[entityLength:]) - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-named-entity"}) - self.stream.unget(charStack.pop()) - output = "&" + "".join(charStack) - - if fromAttribute: - self.currentToken["data"][-1][1] += output - else: - if output in spaceCharacters: - tokenType = "SpaceCharacters" - else: - tokenType = "Characters" - self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output}) - - def processEntityInAttribute(self, allowedChar): - """This method replaces the need for "entityInAttributeValueState". - """ - self.consumeEntity(allowedChar=allowedChar, fromAttribute=True) - - def emitCurrentToken(self): - """This method is a generic handler for emitting the tags. It also sets - the state to "data" because that's what's needed after a token has been - emitted. - """ - token = self.currentToken - # Add token to the queue to be yielded - if (token["type"] in tagTokenTypes): - token["name"] = token["name"].translate(asciiUpper2Lower) - if token["type"] == tokenTypes["EndTag"]: - if token["data"]: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "attributes-in-end-tag"}) - if token["selfClosing"]: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "self-closing-flag-on-end-tag"}) - self.tokenQueue.append(token) - self.state = self.dataState - - # Below are the various tokenizer states worked out. - def dataState(self): - data = self.stream.char() - if data == "&": - self.state = self.entityDataState - elif data == "<": - self.state = self.tagOpenState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\u0000"}) - elif data is EOF: - # Tokenization ends. - return False - elif data in spaceCharacters: - # Directly after emitting a token you switch back to the "data - # state". At that point spaceCharacters are important so they are - # emitted separately. - self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": - data + self.stream.charsUntil(spaceCharacters, True)}) - # No need to update lastFourChars here, since the first space will - # have already been appended to lastFourChars and will have broken - # any sequences - else: - chars = self.stream.charsUntil(("&", "<", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) - return True - - def entityDataState(self): - self.consumeEntity() - self.state = self.dataState - return True - - def rcdataState(self): - data = self.stream.char() - if data == "&": - self.state = self.characterReferenceInRcdata - elif data == "<": - self.state = self.rcdataLessThanSignState - elif data == EOF: - # Tokenization ends. - return False - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - elif data in spaceCharacters: - # Directly after emitting a token you switch back to the "data - # state". At that point spaceCharacters are important so they are - # emitted separately. - self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": - data + self.stream.charsUntil(spaceCharacters, True)}) - # No need to update lastFourChars here, since the first space will - # have already been appended to lastFourChars and will have broken - # any sequences - else: - chars = self.stream.charsUntil(("&", "<", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) - return True - - def characterReferenceInRcdata(self): - self.consumeEntity() - self.state = self.rcdataState - return True - - def rawtextState(self): - data = self.stream.char() - if data == "<": - self.state = self.rawtextLessThanSignState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - elif data == EOF: - # Tokenization ends. - return False - else: - chars = self.stream.charsUntil(("<", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) - return True - - def scriptDataState(self): - data = self.stream.char() - if data == "<": - self.state = self.scriptDataLessThanSignState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - elif data == EOF: - # Tokenization ends. - return False - else: - chars = self.stream.charsUntil(("<", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) - return True - - def plaintextState(self): - data = self.stream.char() - if data == EOF: - # Tokenization ends. - return False - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + self.stream.charsUntil("\u0000")}) - return True - - def tagOpenState(self): - data = self.stream.char() - if data == "!": - self.state = self.markupDeclarationOpenState - elif data == "/": - self.state = self.closeTagOpenState - elif data in asciiLetters: - self.currentToken = {"type": tokenTypes["StartTag"], - "name": data, "data": [], - "selfClosing": False, - "selfClosingAcknowledged": False} - self.state = self.tagNameState - elif data == ">": - # XXX In theory it could be something besides a tag name. But - # do we really care? - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-tag-name-but-got-right-bracket"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"}) - self.state = self.dataState - elif data == "?": - # XXX In theory it could be something besides a tag name. But - # do we really care? - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-tag-name-but-got-question-mark"}) - self.stream.unget(data) - self.state = self.bogusCommentState - else: - # XXX - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-tag-name"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.stream.unget(data) - self.state = self.dataState - return True - - def closeTagOpenState(self): - data = self.stream.char() - if data in asciiLetters: - self.currentToken = {"type": tokenTypes["EndTag"], "name": data, - "data": [], "selfClosing": False} - self.state = self.tagNameState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-closing-tag-but-got-right-bracket"}) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-closing-tag-but-got-eof"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "": - self.emitCurrentToken() - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-tag-name"}) - self.state = self.dataState - elif data == "/": - self.state = self.selfClosingStartTagState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["name"] += "\uFFFD" - else: - self.currentToken["name"] += data - # (Don't use charsUntil here, because tag names are - # very short and it's faster to not do anything fancy) - return True - - def rcdataLessThanSignState(self): - data = self.stream.char() - if data == "/": - self.temporaryBuffer = "" - self.state = self.rcdataEndTagOpenState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.stream.unget(data) - self.state = self.rcdataState - return True - - def rcdataEndTagOpenState(self): - data = self.stream.char() - if data in asciiLetters: - self.temporaryBuffer += data - self.state = self.rcdataEndTagNameState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "" and appropriate: - self.currentToken = {"type": tokenTypes["EndTag"], - "name": self.temporaryBuffer, - "data": [], "selfClosing": False} - self.emitCurrentToken() - self.state = self.dataState - elif data in asciiLetters: - self.temporaryBuffer += data - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "" and appropriate: - self.currentToken = {"type": tokenTypes["EndTag"], - "name": self.temporaryBuffer, - "data": [], "selfClosing": False} - self.emitCurrentToken() - self.state = self.dataState - elif data in asciiLetters: - self.temporaryBuffer += data - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "" and appropriate: - self.currentToken = {"type": tokenTypes["EndTag"], - "name": self.temporaryBuffer, - "data": [], "selfClosing": False} - self.emitCurrentToken() - self.state = self.dataState - elif data in asciiLetters: - self.temporaryBuffer += data - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) - self.state = self.scriptDataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - self.state = self.scriptDataEscapedState - elif data == EOF: - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.state = self.scriptDataEscapedState - return True - - def scriptDataEscapedLessThanSignState(self): - data = self.stream.char() - if data == "/": - self.temporaryBuffer = "" - self.state = self.scriptDataEscapedEndTagOpenState - elif data in asciiLetters: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data}) - self.temporaryBuffer = data - self.state = self.scriptDataDoubleEscapeStartState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.stream.unget(data) - self.state = self.scriptDataEscapedState - return True - - def scriptDataEscapedEndTagOpenState(self): - data = self.stream.char() - if data in asciiLetters: - self.temporaryBuffer = data - self.state = self.scriptDataEscapedEndTagNameState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "" and appropriate: - self.currentToken = {"type": tokenTypes["EndTag"], - "name": self.temporaryBuffer, - "data": [], "selfClosing": False} - self.emitCurrentToken() - self.state = self.dataState - elif data in asciiLetters: - self.temporaryBuffer += data - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": ""))): - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - if self.temporaryBuffer.lower() == "script": - self.state = self.scriptDataDoubleEscapedState - else: - self.state = self.scriptDataEscapedState - elif data in asciiLetters: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.temporaryBuffer += data - else: - self.stream.unget(data) - self.state = self.scriptDataEscapedState - return True - - def scriptDataDoubleEscapedState(self): - data = self.stream.char() - if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) - self.state = self.scriptDataDoubleEscapedDashState - elif data == "<": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.state = self.scriptDataDoubleEscapedLessThanSignState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - elif data == EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-script-in-script"}) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - return True - - def scriptDataDoubleEscapedDashState(self): - data = self.stream.char() - if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) - self.state = self.scriptDataDoubleEscapedDashDashState - elif data == "<": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.state = self.scriptDataDoubleEscapedLessThanSignState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - self.state = self.scriptDataDoubleEscapedState - elif data == EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-script-in-script"}) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.state = self.scriptDataDoubleEscapedState - return True - - def scriptDataDoubleEscapedDashDashState(self): - data = self.stream.char() - if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) - elif data == "<": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.state = self.scriptDataDoubleEscapedLessThanSignState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) - self.state = self.scriptDataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - self.state = self.scriptDataDoubleEscapedState - elif data == EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-script-in-script"}) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.state = self.scriptDataDoubleEscapedState - return True - - def scriptDataDoubleEscapedLessThanSignState(self): - data = self.stream.char() - if data == "/": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"}) - self.temporaryBuffer = "" - self.state = self.scriptDataDoubleEscapeEndState - else: - self.stream.unget(data) - self.state = self.scriptDataDoubleEscapedState - return True - - def scriptDataDoubleEscapeEndState(self): - data = self.stream.char() - if data in (spaceCharacters | frozenset(("/", ">"))): - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - if self.temporaryBuffer.lower() == "script": - self.state = self.scriptDataEscapedState - else: - self.state = self.scriptDataDoubleEscapedState - elif data in asciiLetters: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.temporaryBuffer += data - else: - self.stream.unget(data) - self.state = self.scriptDataDoubleEscapedState - return True - - def beforeAttributeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - self.stream.charsUntil(spaceCharacters, True) - elif data in asciiLetters: - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - elif data == ">": - self.emitCurrentToken() - elif data == "/": - self.state = self.selfClosingStartTagState - elif data in ("'", '"', "=", "<"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "invalid-character-in-attribute-name"}) - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"].append(["\uFFFD", ""]) - self.state = self.attributeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-attribute-name-but-got-eof"}) - self.state = self.dataState - else: - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - return True - - def attributeNameState(self): - data = self.stream.char() - leavingThisState = True - emitToken = False - if data == "=": - self.state = self.beforeAttributeValueState - elif data in asciiLetters: - self.currentToken["data"][-1][0] += data +\ - self.stream.charsUntil(asciiLetters, True) - leavingThisState = False - elif data == ">": - # XXX If we emit here the attributes are converted to a dict - # without being checked and when the code below runs we error - # because data is a dict not a list - emitToken = True - elif data in spaceCharacters: - self.state = self.afterAttributeNameState - elif data == "/": - self.state = self.selfClosingStartTagState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][0] += "\uFFFD" - leavingThisState = False - elif data in ("'", '"', "<"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": - "invalid-character-in-attribute-name"}) - self.currentToken["data"][-1][0] += data - leavingThisState = False - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "eof-in-attribute-name"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][0] += data - leavingThisState = False - - if leavingThisState: - # Attributes are not dropped at this stage. That happens when the - # start tag token is emitted so values can still be safely appended - # to attributes, but we do want to report the parse error in time. - self.currentToken["data"][-1][0] = ( - self.currentToken["data"][-1][0].translate(asciiUpper2Lower)) - for name, _ in self.currentToken["data"][:-1]: - if self.currentToken["data"][-1][0] == name: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "duplicate-attribute"}) - break - # XXX Fix for above XXX - if emitToken: - self.emitCurrentToken() - return True - - def afterAttributeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - self.stream.charsUntil(spaceCharacters, True) - elif data == "=": - self.state = self.beforeAttributeValueState - elif data == ">": - self.emitCurrentToken() - elif data in asciiLetters: - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - elif data == "/": - self.state = self.selfClosingStartTagState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"].append(["\uFFFD", ""]) - self.state = self.attributeNameState - elif data in ("'", '"', "<"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "invalid-character-after-attribute-name"}) - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-end-of-tag-but-got-eof"}) - self.state = self.dataState - else: - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - return True - - def beforeAttributeValueState(self): - data = self.stream.char() - if data in spaceCharacters: - self.stream.charsUntil(spaceCharacters, True) - elif data == "\"": - self.state = self.attributeValueDoubleQuotedState - elif data == "&": - self.state = self.attributeValueUnQuotedState - self.stream.unget(data) - elif data == "'": - self.state = self.attributeValueSingleQuotedState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-attribute-value-but-got-right-bracket"}) - self.emitCurrentToken() - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += "\uFFFD" - self.state = self.attributeValueUnQuotedState - elif data in ("=", "<", "`"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "equals-in-unquoted-attribute-value"}) - self.currentToken["data"][-1][1] += data - self.state = self.attributeValueUnQuotedState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-attribute-value-but-got-eof"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][1] += data - self.state = self.attributeValueUnQuotedState - return True - - def attributeValueDoubleQuotedState(self): - data = self.stream.char() - if data == "\"": - self.state = self.afterAttributeValueState - elif data == "&": - self.processEntityInAttribute('"') - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += "\uFFFD" - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-attribute-value-double-quote"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][1] += data +\ - self.stream.charsUntil(("\"", "&", "\u0000")) - return True - - def attributeValueSingleQuotedState(self): - data = self.stream.char() - if data == "'": - self.state = self.afterAttributeValueState - elif data == "&": - self.processEntityInAttribute("'") - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += "\uFFFD" - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-attribute-value-single-quote"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][1] += data +\ - self.stream.charsUntil(("'", "&", "\u0000")) - return True - - def attributeValueUnQuotedState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeAttributeNameState - elif data == "&": - self.processEntityInAttribute(">") - elif data == ">": - self.emitCurrentToken() - elif data in ('"', "'", "=", "<", "`"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-character-in-unquoted-attribute-value"}) - self.currentToken["data"][-1][1] += data - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += "\uFFFD" - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-attribute-value-no-quotes"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][1] += data + self.stream.charsUntil( - frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters) - return True - - def afterAttributeValueState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeAttributeNameState - elif data == ">": - self.emitCurrentToken() - elif data == "/": - self.state = self.selfClosingStartTagState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-EOF-after-attribute-value"}) - self.stream.unget(data) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-character-after-attribute-value"}) - self.stream.unget(data) - self.state = self.beforeAttributeNameState - return True - - def selfClosingStartTagState(self): - data = self.stream.char() - if data == ">": - self.currentToken["selfClosing"] = True - self.emitCurrentToken() - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": - "unexpected-EOF-after-solidus-in-tag"}) - self.stream.unget(data) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-character-after-solidus-in-tag"}) - self.stream.unget(data) - self.state = self.beforeAttributeNameState - return True - - def bogusCommentState(self): - # Make a new comment token and give it as value all the characters - # until the first > or EOF (charsUntil checks for EOF automatically) - # and emit it. - data = self.stream.charsUntil(">") - data = data.replace("\u0000", "\uFFFD") - self.tokenQueue.append( - {"type": tokenTypes["Comment"], "data": data}) - - # Eat the character directly after the bogus comment which is either a - # ">" or an EOF. - self.stream.char() - self.state = self.dataState - return True - - def markupDeclarationOpenState(self): - charStack = [self.stream.char()] - if charStack[-1] == "-": - charStack.append(self.stream.char()) - if charStack[-1] == "-": - self.currentToken = {"type": tokenTypes["Comment"], "data": ""} - self.state = self.commentStartState - return True - elif charStack[-1] in ('d', 'D'): - matched = True - for expected in (('o', 'O'), ('c', 'C'), ('t', 'T'), - ('y', 'Y'), ('p', 'P'), ('e', 'E')): - charStack.append(self.stream.char()) - if charStack[-1] not in expected: - matched = False - break - if matched: - self.currentToken = {"type": tokenTypes["Doctype"], - "name": "", - "publicId": None, "systemId": None, - "correct": True} - self.state = self.doctypeState - return True - elif (charStack[-1] == "[" and - self.parser is not None and - self.parser.tree.openElements and - self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace): - matched = True - for expected in ["C", "D", "A", "T", "A", "["]: - charStack.append(self.stream.char()) - if charStack[-1] != expected: - matched = False - break - if matched: - self.state = self.cdataSectionState - return True - - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-dashes-or-doctype"}) - - while charStack: - self.stream.unget(charStack.pop()) - self.state = self.bogusCommentState - return True - - def commentStartState(self): - data = self.stream.char() - if data == "-": - self.state = self.commentStartDashState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "incorrect-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += data - self.state = self.commentState - return True - - def commentStartDashState(self): - data = self.stream.char() - if data == "-": - self.state = self.commentEndState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "-\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "incorrect-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += "-" + data - self.state = self.commentState - return True - - def commentState(self): - data = self.stream.char() - if data == "-": - self.state = self.commentEndDashState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "\uFFFD" - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "eof-in-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += data + \ - self.stream.charsUntil(("-", "\u0000")) - return True - - def commentEndDashState(self): - data = self.stream.char() - if data == "-": - self.state = self.commentEndState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "-\uFFFD" - self.state = self.commentState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment-end-dash"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += "-" + data - self.state = self.commentState - return True - - def commentEndState(self): - data = self.stream.char() - if data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "--\uFFFD" - self.state = self.commentState - elif data == "!": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-bang-after-double-dash-in-comment"}) - self.state = self.commentEndBangState - elif data == "-": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-dash-after-double-dash-in-comment"}) - self.currentToken["data"] += data - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment-double-dash"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - # XXX - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-comment"}) - self.currentToken["data"] += "--" + data - self.state = self.commentState - return True - - def commentEndBangState(self): - data = self.stream.char() - if data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == "-": - self.currentToken["data"] += "--!" - self.state = self.commentEndDashState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "--!\uFFFD" - self.state = self.commentState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment-end-bang-state"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += "--!" + data - self.state = self.commentState - return True - - def doctypeState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeDoctypeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-doctype-name-but-got-eof"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "need-space-after-doctype"}) - self.stream.unget(data) - self.state = self.beforeDoctypeNameState - return True - - def beforeDoctypeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-doctype-name-but-got-right-bracket"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["name"] = "\uFFFD" - self.state = self.doctypeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-doctype-name-but-got-eof"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["name"] = data - self.state = self.doctypeNameState - return True - - def doctypeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) - self.state = self.afterDoctypeNameState - elif data == ">": - self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["name"] += "\uFFFD" - self.state = self.doctypeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype-name"}) - self.currentToken["correct"] = False - self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["name"] += data - return True - - def afterDoctypeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.currentToken["correct"] = False - self.stream.unget(data) - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - if data in ("p", "P"): - matched = True - for expected in (("u", "U"), ("b", "B"), ("l", "L"), - ("i", "I"), ("c", "C")): - data = self.stream.char() - if data not in expected: - matched = False - break - if matched: - self.state = self.afterDoctypePublicKeywordState - return True - elif data in ("s", "S"): - matched = True - for expected in (("y", "Y"), ("s", "S"), ("t", "T"), - ("e", "E"), ("m", "M")): - data = self.stream.char() - if data not in expected: - matched = False - break - if matched: - self.state = self.afterDoctypeSystemKeywordState - return True - - # All the characters read before the current 'data' will be - # [a-zA-Z], so they're garbage in the bogus doctype and can be - # discarded; only the latest character might be '>' or EOF - # and needs to be ungetted - self.stream.unget(data) - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-space-or-right-bracket-in-doctype", "datavars": - {"data": data}}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - - return True - - def afterDoctypePublicKeywordState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeDoctypePublicIdentifierState - elif data in ("'", '"'): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.stream.unget(data) - self.state = self.beforeDoctypePublicIdentifierState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.stream.unget(data) - self.state = self.beforeDoctypePublicIdentifierState - return True - - def beforeDoctypePublicIdentifierState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == "\"": - self.currentToken["publicId"] = "" - self.state = self.doctypePublicIdentifierDoubleQuotedState - elif data == "'": - self.currentToken["publicId"] = "" - self.state = self.doctypePublicIdentifierSingleQuotedState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - return True - - def doctypePublicIdentifierDoubleQuotedState(self): - data = self.stream.char() - if data == "\"": - self.state = self.afterDoctypePublicIdentifierState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["publicId"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["publicId"] += data - return True - - def doctypePublicIdentifierSingleQuotedState(self): - data = self.stream.char() - if data == "'": - self.state = self.afterDoctypePublicIdentifierState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["publicId"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["publicId"] += data - return True - - def afterDoctypePublicIdentifierState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.betweenDoctypePublicAndSystemIdentifiersState - elif data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == '"': - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierDoubleQuotedState - elif data == "'": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierSingleQuotedState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - return True - - def betweenDoctypePublicAndSystemIdentifiersState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == '"': - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierDoubleQuotedState - elif data == "'": - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierSingleQuotedState - elif data == EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - return True - - def afterDoctypeSystemKeywordState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeDoctypeSystemIdentifierState - elif data in ("'", '"'): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.stream.unget(data) - self.state = self.beforeDoctypeSystemIdentifierState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.stream.unget(data) - self.state = self.beforeDoctypeSystemIdentifierState - return True - - def beforeDoctypeSystemIdentifierState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == "\"": - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierDoubleQuotedState - elif data == "'": - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierSingleQuotedState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - return True - - def doctypeSystemIdentifierDoubleQuotedState(self): - data = self.stream.char() - if data == "\"": - self.state = self.afterDoctypeSystemIdentifierState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["systemId"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["systemId"] += data - return True - - def doctypeSystemIdentifierSingleQuotedState(self): - data = self.stream.char() - if data == "'": - self.state = self.afterDoctypeSystemIdentifierState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["systemId"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["systemId"] += data - return True - - def afterDoctypeSystemIdentifierState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.state = self.bogusDoctypeState - return True - - def bogusDoctypeState(self): - data = self.stream.char() - if data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - # XXX EMIT - self.stream.unget(data) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - pass - return True - - def cdataSectionState(self): - data = [] - while True: - data.append(self.stream.charsUntil("]")) - data.append(self.stream.charsUntil(">")) - char = self.stream.char() - if char == EOF: - break - else: - assert char == ">" - if data[-1][-2:] == "]]": - data[-1] = data[-1][:-2] - break - else: - data.append(char) - - data = "".join(data) # pylint:disable=redefined-variable-type - # Deal with null here rather than in the parser - nullCount = data.count("\u0000") - if nullCount > 0: - for _ in range(nullCount): - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - data = data.replace("\u0000", "\uFFFD") - if data: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": data}) - self.state = self.dataState - return True diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/__init__.py deleted file mode 100644 index ccc70bd..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from .py import Trie as PyTrie - -Trie = PyTrie - -# pylint:disable=wrong-import-position -try: - from .datrie import Trie as DATrie -except ImportError: - pass -else: - Trie = DATrie -# pylint:enable=wrong-import-position diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/_base.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/_base.py deleted file mode 100644 index ecfff32..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/_base.py +++ /dev/null @@ -1,37 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from collections import Mapping - - -class Trie(Mapping): - """Abstract base class for tries""" - - def keys(self, prefix=None): - # pylint:disable=arguments-differ - keys = super(Trie, self).keys() - - if prefix is None: - return set(keys) - - return {x for x in keys if x.startswith(prefix)} - - def has_keys_with_prefix(self, prefix): - for key in self.keys(): - if key.startswith(prefix): - return True - - return False - - def longest_prefix(self, prefix): - if prefix in self: - return prefix - - for i in range(1, len(prefix) + 1): - if prefix[:-i] in self: - return prefix[:-i] - - raise KeyError(prefix) - - def longest_prefix_item(self, prefix): - lprefix = self.longest_prefix(prefix) - return (lprefix, self[lprefix]) diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/datrie.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/datrie.py deleted file mode 100644 index cb1af60..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/datrie.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from datrie import Trie as DATrie -from pip._vendor.six import text_type - -from ._base import Trie as ABCTrie - - -class Trie(ABCTrie): - def __init__(self, data): - chars = set() - for key in data.keys(): - if not isinstance(key, text_type): - raise TypeError("All keys must be strings") - for char in key: - chars.add(char) - - self._data = DATrie("".join(chars)) - for key, value in data.items(): - self._data[key] = value - - def __contains__(self, key): - return key in self._data - - def __len__(self): - return len(self._data) - - def __iter__(self): - raise NotImplementedError() - - def __getitem__(self, key): - return self._data[key] - - def keys(self, prefix=None): - return self._data.keys(prefix) - - def has_keys_with_prefix(self, prefix): - return self._data.has_keys_with_prefix(prefix) - - def longest_prefix(self, prefix): - return self._data.longest_prefix(prefix) - - def longest_prefix_item(self, prefix): - return self._data.longest_prefix_item(prefix) diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/py.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/py.py deleted file mode 100644 index 5531263..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/py.py +++ /dev/null @@ -1,67 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from pip._vendor.six import text_type - -from bisect import bisect_left - -from ._base import Trie as ABCTrie - - -class Trie(ABCTrie): - def __init__(self, data): - if not all(isinstance(x, text_type) for x in data.keys()): - raise TypeError("All keys must be strings") - - self._data = data - self._keys = sorted(data.keys()) - self._cachestr = "" - self._cachepoints = (0, len(data)) - - def __contains__(self, key): - return key in self._data - - def __len__(self): - return len(self._data) - - def __iter__(self): - return iter(self._data) - - def __getitem__(self, key): - return self._data[key] - - def keys(self, prefix=None): - if prefix is None or prefix == "" or not self._keys: - return set(self._keys) - - if prefix.startswith(self._cachestr): - lo, hi = self._cachepoints - start = i = bisect_left(self._keys, prefix, lo, hi) - else: - start = i = bisect_left(self._keys, prefix) - - keys = set() - if start == len(self._keys): - return keys - - while self._keys[i].startswith(prefix): - keys.add(self._keys[i]) - i += 1 - - self._cachestr = prefix - self._cachepoints = (start, i) - - return keys - - def has_keys_with_prefix(self, prefix): - if prefix in self._data: - return True - - if prefix.startswith(self._cachestr): - lo, hi = self._cachepoints - i = bisect_left(self._keys, prefix, lo, hi) - else: - i = bisect_left(self._keys, prefix) - - if i == len(self._keys): - return False - - return self._keys[i].startswith(prefix) diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_utils.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_utils.py deleted file mode 100644 index a559fa0..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_utils.py +++ /dev/null @@ -1,124 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from types import ModuleType - -from pip._vendor.six import text_type - -try: - import xml.etree.cElementTree as default_etree -except ImportError: - import xml.etree.ElementTree as default_etree - - -__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", - "surrogatePairToCodepoint", "moduleFactoryFactory", - "supports_lone_surrogates"] - - -# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be -# caught by the below test. In general this would be any platform -# using UTF-16 as its encoding of unicode strings, such as -# Jython. This is because UTF-16 itself is based on the use of such -# surrogates, and there is no mechanism to further escape such -# escapes. -try: - _x = eval('"\\uD800"') # pylint:disable=eval-used - if not isinstance(_x, text_type): - # We need this with u"" because of http://bugs.jython.org/issue2039 - _x = eval('u"\\uD800"') # pylint:disable=eval-used - assert isinstance(_x, text_type) -except: # pylint:disable=bare-except - supports_lone_surrogates = False -else: - supports_lone_surrogates = True - - -class MethodDispatcher(dict): - """Dict with 2 special properties: - - On initiation, keys that are lists, sets or tuples are converted to - multiple keys so accessing any one of the items in the original - list-like object returns the matching value - - md = MethodDispatcher({("foo", "bar"):"baz"}) - md["foo"] == "baz" - - A default value which can be set through the default attribute. - """ - - def __init__(self, items=()): - # Using _dictEntries instead of directly assigning to self is about - # twice as fast. Please do careful performance testing before changing - # anything here. - _dictEntries = [] - for name, value in items: - if isinstance(name, (list, tuple, frozenset, set)): - for item in name: - _dictEntries.append((item, value)) - else: - _dictEntries.append((name, value)) - dict.__init__(self, _dictEntries) - assert len(self) == len(_dictEntries) - self.default = None - - def __getitem__(self, key): - return dict.get(self, key, self.default) - - -# Some utility functions to deal with weirdness around UCS2 vs UCS4 -# python builds - -def isSurrogatePair(data): - return (len(data) == 2 and - ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and - ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF) - - -def surrogatePairToCodepoint(data): - char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + - (ord(data[1]) - 0xDC00)) - return char_val - -# Module Factory Factory (no, this isn't Java, I know) -# Here to stop this being duplicated all over the place. - - -def moduleFactoryFactory(factory): - moduleCache = {} - - def moduleFactory(baseModule, *args, **kwargs): - if isinstance(ModuleType.__name__, type("")): - name = "_%s_factory" % baseModule.__name__ - else: - name = b"_%s_factory" % baseModule.__name__ - - kwargs_tuple = tuple(kwargs.items()) - - try: - return moduleCache[name][args][kwargs_tuple] - except KeyError: - mod = ModuleType(name) - objs = factory(baseModule, *args, **kwargs) - mod.__dict__.update(objs) - if "name" not in moduleCache: - moduleCache[name] = {} - if "args" not in moduleCache[name]: - moduleCache[name][args] = {} - if "kwargs" not in moduleCache[name][args]: - moduleCache[name][args][kwargs_tuple] = {} - moduleCache[name][args][kwargs_tuple] = mod - return mod - - return moduleFactory - - -def memoize(func): - cache = {} - - def wrapped(*args, **kwargs): - key = (tuple(args), tuple(kwargs.items())) - if key not in cache: - cache[key] = func(*args, **kwargs) - return cache[key] - - return wrapped diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/constants.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/constants.py deleted file mode 100644 index bca155e..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/constants.py +++ /dev/null @@ -1,2947 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import string - -EOF = None - -E = { - "null-character": - "Null character in input stream, replaced with U+FFFD.", - "invalid-codepoint": - "Invalid codepoint in stream.", - "incorrectly-placed-solidus": - "Solidus (/) incorrectly placed in tag.", - "incorrect-cr-newline-entity": - "Incorrect CR newline entity, replaced with LF.", - "illegal-windows-1252-entity": - "Entity used with illegal number (windows-1252 reference).", - "cant-convert-numeric-entity": - "Numeric entity couldn't be converted to character " - "(codepoint U+%(charAsInt)08x).", - "illegal-codepoint-for-numeric-entity": - "Numeric entity represents an illegal codepoint: " - "U+%(charAsInt)08x.", - "numeric-entity-without-semicolon": - "Numeric entity didn't end with ';'.", - "expected-numeric-entity-but-got-eof": - "Numeric entity expected. Got end of file instead.", - "expected-numeric-entity": - "Numeric entity expected but none found.", - "named-entity-without-semicolon": - "Named entity didn't end with ';'.", - "expected-named-entity": - "Named entity expected. Got none.", - "attributes-in-end-tag": - "End tag contains unexpected attributes.", - 'self-closing-flag-on-end-tag': - "End tag contains unexpected self-closing flag.", - "expected-tag-name-but-got-right-bracket": - "Expected tag name. Got '>' instead.", - "expected-tag-name-but-got-question-mark": - "Expected tag name. Got '?' instead. (HTML doesn't " - "support processing instructions.)", - "expected-tag-name": - "Expected tag name. Got something else instead", - "expected-closing-tag-but-got-right-bracket": - "Expected closing tag. Got '>' instead. Ignoring ''.", - "expected-closing-tag-but-got-eof": - "Expected closing tag. Unexpected end of file.", - "expected-closing-tag-but-got-char": - "Expected closing tag. Unexpected character '%(data)s' found.", - "eof-in-tag-name": - "Unexpected end of file in the tag name.", - "expected-attribute-name-but-got-eof": - "Unexpected end of file. Expected attribute name instead.", - "eof-in-attribute-name": - "Unexpected end of file in attribute name.", - "invalid-character-in-attribute-name": - "Invalid character in attribute name", - "duplicate-attribute": - "Dropped duplicate attribute on tag.", - "expected-end-of-tag-name-but-got-eof": - "Unexpected end of file. Expected = or end of tag.", - "expected-attribute-value-but-got-eof": - "Unexpected end of file. Expected attribute value.", - "expected-attribute-value-but-got-right-bracket": - "Expected attribute value. Got '>' instead.", - 'equals-in-unquoted-attribute-value': - "Unexpected = in unquoted attribute", - 'unexpected-character-in-unquoted-attribute-value': - "Unexpected character in unquoted attribute", - "invalid-character-after-attribute-name": - "Unexpected character after attribute name.", - "unexpected-character-after-attribute-value": - "Unexpected character after attribute value.", - "eof-in-attribute-value-double-quote": - "Unexpected end of file in attribute value (\").", - "eof-in-attribute-value-single-quote": - "Unexpected end of file in attribute value (').", - "eof-in-attribute-value-no-quotes": - "Unexpected end of file in attribute value.", - "unexpected-EOF-after-solidus-in-tag": - "Unexpected end of file in tag. Expected >", - "unexpected-character-after-solidus-in-tag": - "Unexpected character after / in tag. Expected >", - "expected-dashes-or-doctype": - "Expected '--' or 'DOCTYPE'. Not found.", - "unexpected-bang-after-double-dash-in-comment": - "Unexpected ! after -- in comment", - "unexpected-space-after-double-dash-in-comment": - "Unexpected space after -- in comment", - "incorrect-comment": - "Incorrect comment.", - "eof-in-comment": - "Unexpected end of file in comment.", - "eof-in-comment-end-dash": - "Unexpected end of file in comment (-)", - "unexpected-dash-after-double-dash-in-comment": - "Unexpected '-' after '--' found in comment.", - "eof-in-comment-double-dash": - "Unexpected end of file in comment (--).", - "eof-in-comment-end-space-state": - "Unexpected end of file in comment.", - "eof-in-comment-end-bang-state": - "Unexpected end of file in comment.", - "unexpected-char-in-comment": - "Unexpected character in comment found.", - "need-space-after-doctype": - "No space after literal string 'DOCTYPE'.", - "expected-doctype-name-but-got-right-bracket": - "Unexpected > character. Expected DOCTYPE name.", - "expected-doctype-name-but-got-eof": - "Unexpected end of file. Expected DOCTYPE name.", - "eof-in-doctype-name": - "Unexpected end of file in DOCTYPE name.", - "eof-in-doctype": - "Unexpected end of file in DOCTYPE.", - "expected-space-or-right-bracket-in-doctype": - "Expected space or '>'. Got '%(data)s'", - "unexpected-end-of-doctype": - "Unexpected end of DOCTYPE.", - "unexpected-char-in-doctype": - "Unexpected character in DOCTYPE.", - "eof-in-innerhtml": - "XXX innerHTML EOF", - "unexpected-doctype": - "Unexpected DOCTYPE. Ignored.", - "non-html-root": - "html needs to be the first start tag.", - "expected-doctype-but-got-eof": - "Unexpected End of file. Expected DOCTYPE.", - "unknown-doctype": - "Erroneous DOCTYPE.", - "expected-doctype-but-got-chars": - "Unexpected non-space characters. Expected DOCTYPE.", - "expected-doctype-but-got-start-tag": - "Unexpected start tag (%(name)s). Expected DOCTYPE.", - "expected-doctype-but-got-end-tag": - "Unexpected end tag (%(name)s). Expected DOCTYPE.", - "end-tag-after-implied-root": - "Unexpected end tag (%(name)s) after the (implied) root element.", - "expected-named-closing-tag-but-got-eof": - "Unexpected end of file. Expected end tag (%(name)s).", - "two-heads-are-not-better-than-one": - "Unexpected start tag head in existing head. Ignored.", - "unexpected-end-tag": - "Unexpected end tag (%(name)s). Ignored.", - "unexpected-start-tag-out-of-my-head": - "Unexpected start tag (%(name)s) that can be in head. Moved.", - "unexpected-start-tag": - "Unexpected start tag (%(name)s).", - "missing-end-tag": - "Missing end tag (%(name)s).", - "missing-end-tags": - "Missing end tags (%(name)s).", - "unexpected-start-tag-implies-end-tag": - "Unexpected start tag (%(startName)s) " - "implies end tag (%(endName)s).", - "unexpected-start-tag-treated-as": - "Unexpected start tag (%(originalName)s). Treated as %(newName)s.", - "deprecated-tag": - "Unexpected start tag %(name)s. Don't use it!", - "unexpected-start-tag-ignored": - "Unexpected start tag %(name)s. Ignored.", - "expected-one-end-tag-but-got-another": - "Unexpected end tag (%(gotName)s). " - "Missing end tag (%(expectedName)s).", - "end-tag-too-early": - "End tag (%(name)s) seen too early. Expected other end tag.", - "end-tag-too-early-named": - "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).", - "end-tag-too-early-ignored": - "End tag (%(name)s) seen too early. Ignored.", - "adoption-agency-1.1": - "End tag (%(name)s) violates step 1, " - "paragraph 1 of the adoption agency algorithm.", - "adoption-agency-1.2": - "End tag (%(name)s) violates step 1, " - "paragraph 2 of the adoption agency algorithm.", - "adoption-agency-1.3": - "End tag (%(name)s) violates step 1, " - "paragraph 3 of the adoption agency algorithm.", - "adoption-agency-4.4": - "End tag (%(name)s) violates step 4, " - "paragraph 4 of the adoption agency algorithm.", - "unexpected-end-tag-treated-as": - "Unexpected end tag (%(originalName)s). Treated as %(newName)s.", - "no-end-tag": - "This element (%(name)s) has no end tag.", - "unexpected-implied-end-tag-in-table": - "Unexpected implied end tag (%(name)s) in the table phase.", - "unexpected-implied-end-tag-in-table-body": - "Unexpected implied end tag (%(name)s) in the table body phase.", - "unexpected-char-implies-table-voodoo": - "Unexpected non-space characters in " - "table context caused voodoo mode.", - "unexpected-hidden-input-in-table": - "Unexpected input with type hidden in table context.", - "unexpected-form-in-table": - "Unexpected form in table context.", - "unexpected-start-tag-implies-table-voodoo": - "Unexpected start tag (%(name)s) in " - "table context caused voodoo mode.", - "unexpected-end-tag-implies-table-voodoo": - "Unexpected end tag (%(name)s) in " - "table context caused voodoo mode.", - "unexpected-cell-in-table-body": - "Unexpected table cell start tag (%(name)s) " - "in the table body phase.", - "unexpected-cell-end-tag": - "Got table cell end tag (%(name)s) " - "while required end tags are missing.", - "unexpected-end-tag-in-table-body": - "Unexpected end tag (%(name)s) in the table body phase. Ignored.", - "unexpected-implied-end-tag-in-table-row": - "Unexpected implied end tag (%(name)s) in the table row phase.", - "unexpected-end-tag-in-table-row": - "Unexpected end tag (%(name)s) in the table row phase. Ignored.", - "unexpected-select-in-select": - "Unexpected select start tag in the select phase " - "treated as select end tag.", - "unexpected-input-in-select": - "Unexpected input start tag in the select phase.", - "unexpected-start-tag-in-select": - "Unexpected start tag token (%(name)s in the select phase. " - "Ignored.", - "unexpected-end-tag-in-select": - "Unexpected end tag (%(name)s) in the select phase. Ignored.", - "unexpected-table-element-start-tag-in-select-in-table": - "Unexpected table element start tag (%(name)s) in the select in table phase.", - "unexpected-table-element-end-tag-in-select-in-table": - "Unexpected table element end tag (%(name)s) in the select in table phase.", - "unexpected-char-after-body": - "Unexpected non-space characters in the after body phase.", - "unexpected-start-tag-after-body": - "Unexpected start tag token (%(name)s)" - " in the after body phase.", - "unexpected-end-tag-after-body": - "Unexpected end tag token (%(name)s)" - " in the after body phase.", - "unexpected-char-in-frameset": - "Unexpected characters in the frameset phase. Characters ignored.", - "unexpected-start-tag-in-frameset": - "Unexpected start tag token (%(name)s)" - " in the frameset phase. Ignored.", - "unexpected-frameset-in-frameset-innerhtml": - "Unexpected end tag token (frameset) " - "in the frameset phase (innerHTML).", - "unexpected-end-tag-in-frameset": - "Unexpected end tag token (%(name)s)" - " in the frameset phase. Ignored.", - "unexpected-char-after-frameset": - "Unexpected non-space characters in the " - "after frameset phase. Ignored.", - "unexpected-start-tag-after-frameset": - "Unexpected start tag (%(name)s)" - " in the after frameset phase. Ignored.", - "unexpected-end-tag-after-frameset": - "Unexpected end tag (%(name)s)" - " in the after frameset phase. Ignored.", - "unexpected-end-tag-after-body-innerhtml": - "Unexpected end tag after body(innerHtml)", - "expected-eof-but-got-char": - "Unexpected non-space characters. Expected end of file.", - "expected-eof-but-got-start-tag": - "Unexpected start tag (%(name)s)" - ". Expected end of file.", - "expected-eof-but-got-end-tag": - "Unexpected end tag (%(name)s)" - ". Expected end of file.", - "eof-in-table": - "Unexpected end of file. Expected table content.", - "eof-in-select": - "Unexpected end of file. Expected select content.", - "eof-in-frameset": - "Unexpected end of file. Expected frameset content.", - "eof-in-script-in-script": - "Unexpected end of file. Expected script content.", - "eof-in-foreign-lands": - "Unexpected end of file. Expected foreign content", - "non-void-element-with-trailing-solidus": - "Trailing solidus not allowed on element %(name)s", - "unexpected-html-element-in-foreign-content": - "Element %(name)s not allowed in a non-html context", - "unexpected-end-tag-before-html": - "Unexpected end tag (%(name)s) before html.", - "unexpected-inhead-noscript-tag": - "Element %(name)s not allowed in a inhead-noscript context", - "eof-in-head-noscript": - "Unexpected end of file. Expected inhead-noscript content", - "char-in-head-noscript": - "Unexpected non-space character. Expected inhead-noscript content", - "XXX-undefined-error": - "Undefined error (this sucks and should be fixed)", -} - -namespaces = { - "html": "http://www.w3.org/1999/xhtml", - "mathml": "http://www.w3.org/1998/Math/MathML", - "svg": "http://www.w3.org/2000/svg", - "xlink": "http://www.w3.org/1999/xlink", - "xml": "http://www.w3.org/XML/1998/namespace", - "xmlns": "http://www.w3.org/2000/xmlns/" -} - -scopingElements = frozenset([ - (namespaces["html"], "applet"), - (namespaces["html"], "caption"), - (namespaces["html"], "html"), - (namespaces["html"], "marquee"), - (namespaces["html"], "object"), - (namespaces["html"], "table"), - (namespaces["html"], "td"), - (namespaces["html"], "th"), - (namespaces["mathml"], "mi"), - (namespaces["mathml"], "mo"), - (namespaces["mathml"], "mn"), - (namespaces["mathml"], "ms"), - (namespaces["mathml"], "mtext"), - (namespaces["mathml"], "annotation-xml"), - (namespaces["svg"], "foreignObject"), - (namespaces["svg"], "desc"), - (namespaces["svg"], "title"), -]) - -formattingElements = frozenset([ - (namespaces["html"], "a"), - (namespaces["html"], "b"), - (namespaces["html"], "big"), - (namespaces["html"], "code"), - (namespaces["html"], "em"), - (namespaces["html"], "font"), - (namespaces["html"], "i"), - (namespaces["html"], "nobr"), - (namespaces["html"], "s"), - (namespaces["html"], "small"), - (namespaces["html"], "strike"), - (namespaces["html"], "strong"), - (namespaces["html"], "tt"), - (namespaces["html"], "u") -]) - -specialElements = frozenset([ - (namespaces["html"], "address"), - (namespaces["html"], "applet"), - (namespaces["html"], "area"), - (namespaces["html"], "article"), - (namespaces["html"], "aside"), - (namespaces["html"], "base"), - (namespaces["html"], "basefont"), - (namespaces["html"], "bgsound"), - (namespaces["html"], "blockquote"), - (namespaces["html"], "body"), - (namespaces["html"], "br"), - (namespaces["html"], "button"), - (namespaces["html"], "caption"), - (namespaces["html"], "center"), - (namespaces["html"], "col"), - (namespaces["html"], "colgroup"), - (namespaces["html"], "command"), - (namespaces["html"], "dd"), - (namespaces["html"], "details"), - (namespaces["html"], "dir"), - (namespaces["html"], "div"), - (namespaces["html"], "dl"), - (namespaces["html"], "dt"), - (namespaces["html"], "embed"), - (namespaces["html"], "fieldset"), - (namespaces["html"], "figure"), - (namespaces["html"], "footer"), - (namespaces["html"], "form"), - (namespaces["html"], "frame"), - (namespaces["html"], "frameset"), - (namespaces["html"], "h1"), - (namespaces["html"], "h2"), - (namespaces["html"], "h3"), - (namespaces["html"], "h4"), - (namespaces["html"], "h5"), - (namespaces["html"], "h6"), - (namespaces["html"], "head"), - (namespaces["html"], "header"), - (namespaces["html"], "hr"), - (namespaces["html"], "html"), - (namespaces["html"], "iframe"), - # Note that image is commented out in the spec as "this isn't an - # element that can end up on the stack, so it doesn't matter," - (namespaces["html"], "image"), - (namespaces["html"], "img"), - (namespaces["html"], "input"), - (namespaces["html"], "isindex"), - (namespaces["html"], "li"), - (namespaces["html"], "link"), - (namespaces["html"], "listing"), - (namespaces["html"], "marquee"), - (namespaces["html"], "menu"), - (namespaces["html"], "meta"), - (namespaces["html"], "nav"), - (namespaces["html"], "noembed"), - (namespaces["html"], "noframes"), - (namespaces["html"], "noscript"), - (namespaces["html"], "object"), - (namespaces["html"], "ol"), - (namespaces["html"], "p"), - (namespaces["html"], "param"), - (namespaces["html"], "plaintext"), - (namespaces["html"], "pre"), - (namespaces["html"], "script"), - (namespaces["html"], "section"), - (namespaces["html"], "select"), - (namespaces["html"], "style"), - (namespaces["html"], "table"), - (namespaces["html"], "tbody"), - (namespaces["html"], "td"), - (namespaces["html"], "textarea"), - (namespaces["html"], "tfoot"), - (namespaces["html"], "th"), - (namespaces["html"], "thead"), - (namespaces["html"], "title"), - (namespaces["html"], "tr"), - (namespaces["html"], "ul"), - (namespaces["html"], "wbr"), - (namespaces["html"], "xmp"), - (namespaces["svg"], "foreignObject") -]) - -htmlIntegrationPointElements = frozenset([ - (namespaces["mathml"], "annotation-xml"), - (namespaces["svg"], "foreignObject"), - (namespaces["svg"], "desc"), - (namespaces["svg"], "title") -]) - -mathmlTextIntegrationPointElements = frozenset([ - (namespaces["mathml"], "mi"), - (namespaces["mathml"], "mo"), - (namespaces["mathml"], "mn"), - (namespaces["mathml"], "ms"), - (namespaces["mathml"], "mtext") -]) - -adjustSVGAttributes = { - "attributename": "attributeName", - "attributetype": "attributeType", - "basefrequency": "baseFrequency", - "baseprofile": "baseProfile", - "calcmode": "calcMode", - "clippathunits": "clipPathUnits", - "contentscripttype": "contentScriptType", - "contentstyletype": "contentStyleType", - "diffuseconstant": "diffuseConstant", - "edgemode": "edgeMode", - "externalresourcesrequired": "externalResourcesRequired", - "filterres": "filterRes", - "filterunits": "filterUnits", - "glyphref": "glyphRef", - "gradienttransform": "gradientTransform", - "gradientunits": "gradientUnits", - "kernelmatrix": "kernelMatrix", - "kernelunitlength": "kernelUnitLength", - "keypoints": "keyPoints", - "keysplines": "keySplines", - "keytimes": "keyTimes", - "lengthadjust": "lengthAdjust", - "limitingconeangle": "limitingConeAngle", - "markerheight": "markerHeight", - "markerunits": "markerUnits", - "markerwidth": "markerWidth", - "maskcontentunits": "maskContentUnits", - "maskunits": "maskUnits", - "numoctaves": "numOctaves", - "pathlength": "pathLength", - "patterncontentunits": "patternContentUnits", - "patterntransform": "patternTransform", - "patternunits": "patternUnits", - "pointsatx": "pointsAtX", - "pointsaty": "pointsAtY", - "pointsatz": "pointsAtZ", - "preservealpha": "preserveAlpha", - "preserveaspectratio": "preserveAspectRatio", - "primitiveunits": "primitiveUnits", - "refx": "refX", - "refy": "refY", - "repeatcount": "repeatCount", - "repeatdur": "repeatDur", - "requiredextensions": "requiredExtensions", - "requiredfeatures": "requiredFeatures", - "specularconstant": "specularConstant", - "specularexponent": "specularExponent", - "spreadmethod": "spreadMethod", - "startoffset": "startOffset", - "stddeviation": "stdDeviation", - "stitchtiles": "stitchTiles", - "surfacescale": "surfaceScale", - "systemlanguage": "systemLanguage", - "tablevalues": "tableValues", - "targetx": "targetX", - "targety": "targetY", - "textlength": "textLength", - "viewbox": "viewBox", - "viewtarget": "viewTarget", - "xchannelselector": "xChannelSelector", - "ychannelselector": "yChannelSelector", - "zoomandpan": "zoomAndPan" -} - -adjustMathMLAttributes = {"definitionurl": "definitionURL"} - -adjustForeignAttributes = { - "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), - "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]), - "xlink:href": ("xlink", "href", namespaces["xlink"]), - "xlink:role": ("xlink", "role", namespaces["xlink"]), - "xlink:show": ("xlink", "show", namespaces["xlink"]), - "xlink:title": ("xlink", "title", namespaces["xlink"]), - "xlink:type": ("xlink", "type", namespaces["xlink"]), - "xml:base": ("xml", "base", namespaces["xml"]), - "xml:lang": ("xml", "lang", namespaces["xml"]), - "xml:space": ("xml", "space", namespaces["xml"]), - "xmlns": (None, "xmlns", namespaces["xmlns"]), - "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"]) -} - -unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in - adjustForeignAttributes.items()]) - -spaceCharacters = frozenset([ - "\t", - "\n", - "\u000C", - " ", - "\r" -]) - -tableInsertModeElements = frozenset([ - "table", - "tbody", - "tfoot", - "thead", - "tr" -]) - -asciiLowercase = frozenset(string.ascii_lowercase) -asciiUppercase = frozenset(string.ascii_uppercase) -asciiLetters = frozenset(string.ascii_letters) -digits = frozenset(string.digits) -hexDigits = frozenset(string.hexdigits) - -asciiUpper2Lower = dict([(ord(c), ord(c.lower())) - for c in string.ascii_uppercase]) - -# Heading elements need to be ordered -headingElements = ( - "h1", - "h2", - "h3", - "h4", - "h5", - "h6" -) - -voidElements = frozenset([ - "base", - "command", - "event-source", - "link", - "meta", - "hr", - "br", - "img", - "embed", - "param", - "area", - "col", - "input", - "source", - "track" -]) - -cdataElements = frozenset(['title', 'textarea']) - -rcdataElements = frozenset([ - 'style', - 'script', - 'xmp', - 'iframe', - 'noembed', - 'noframes', - 'noscript' -]) - -booleanAttributes = { - "": frozenset(["irrelevant", "itemscope"]), - "style": frozenset(["scoped"]), - "img": frozenset(["ismap"]), - "audio": frozenset(["autoplay", "controls"]), - "video": frozenset(["autoplay", "controls"]), - "script": frozenset(["defer", "async"]), - "details": frozenset(["open"]), - "datagrid": frozenset(["multiple", "disabled"]), - "command": frozenset(["hidden", "disabled", "checked", "default"]), - "hr": frozenset(["noshade"]), - "menu": frozenset(["autosubmit"]), - "fieldset": frozenset(["disabled", "readonly"]), - "option": frozenset(["disabled", "readonly", "selected"]), - "optgroup": frozenset(["disabled", "readonly"]), - "button": frozenset(["disabled", "autofocus"]), - "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]), - "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]), - "output": frozenset(["disabled", "readonly"]), - "iframe": frozenset(["seamless"]), -} - -# entitiesWindows1252 has to be _ordered_ and needs to have an index. It -# therefore can't be a frozenset. -entitiesWindows1252 = ( - 8364, # 0x80 0x20AC EURO SIGN - 65533, # 0x81 UNDEFINED - 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK - 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK - 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK - 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS - 8224, # 0x86 0x2020 DAGGER - 8225, # 0x87 0x2021 DOUBLE DAGGER - 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT - 8240, # 0x89 0x2030 PER MILLE SIGN - 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON - 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE - 65533, # 0x8D UNDEFINED - 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON - 65533, # 0x8F UNDEFINED - 65533, # 0x90 UNDEFINED - 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK - 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK - 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK - 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK - 8226, # 0x95 0x2022 BULLET - 8211, # 0x96 0x2013 EN DASH - 8212, # 0x97 0x2014 EM DASH - 732, # 0x98 0x02DC SMALL TILDE - 8482, # 0x99 0x2122 TRADE MARK SIGN - 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON - 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE - 65533, # 0x9D UNDEFINED - 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON - 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS -) - -xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;']) - -entities = { - "AElig": "\xc6", - "AElig;": "\xc6", - "AMP": "&", - "AMP;": "&", - "Aacute": "\xc1", - "Aacute;": "\xc1", - "Abreve;": "\u0102", - "Acirc": "\xc2", - "Acirc;": "\xc2", - "Acy;": "\u0410", - "Afr;": "\U0001d504", - "Agrave": "\xc0", - "Agrave;": "\xc0", - "Alpha;": "\u0391", - "Amacr;": "\u0100", - "And;": "\u2a53", - "Aogon;": "\u0104", - "Aopf;": "\U0001d538", - "ApplyFunction;": "\u2061", - "Aring": "\xc5", - "Aring;": "\xc5", - "Ascr;": "\U0001d49c", - "Assign;": "\u2254", - "Atilde": "\xc3", - "Atilde;": "\xc3", - "Auml": "\xc4", - "Auml;": "\xc4", - "Backslash;": "\u2216", - "Barv;": "\u2ae7", - "Barwed;": "\u2306", - "Bcy;": "\u0411", - "Because;": "\u2235", - "Bernoullis;": "\u212c", - "Beta;": "\u0392", - "Bfr;": "\U0001d505", - "Bopf;": "\U0001d539", - "Breve;": "\u02d8", - "Bscr;": "\u212c", - "Bumpeq;": "\u224e", - "CHcy;": "\u0427", - "COPY": "\xa9", - "COPY;": "\xa9", - "Cacute;": "\u0106", - "Cap;": "\u22d2", - "CapitalDifferentialD;": "\u2145", - "Cayleys;": "\u212d", - "Ccaron;": "\u010c", - "Ccedil": "\xc7", - "Ccedil;": "\xc7", - "Ccirc;": "\u0108", - "Cconint;": "\u2230", - "Cdot;": "\u010a", - "Cedilla;": "\xb8", - "CenterDot;": "\xb7", - "Cfr;": "\u212d", - "Chi;": "\u03a7", - "CircleDot;": "\u2299", - "CircleMinus;": "\u2296", - "CirclePlus;": "\u2295", - "CircleTimes;": "\u2297", - "ClockwiseContourIntegral;": "\u2232", - "CloseCurlyDoubleQuote;": "\u201d", - "CloseCurlyQuote;": "\u2019", - "Colon;": "\u2237", - "Colone;": "\u2a74", - "Congruent;": "\u2261", - "Conint;": "\u222f", - "ContourIntegral;": "\u222e", - "Copf;": "\u2102", - "Coproduct;": "\u2210", - "CounterClockwiseContourIntegral;": "\u2233", - "Cross;": "\u2a2f", - "Cscr;": "\U0001d49e", - "Cup;": "\u22d3", - "CupCap;": "\u224d", - "DD;": "\u2145", - "DDotrahd;": "\u2911", - "DJcy;": "\u0402", - "DScy;": "\u0405", - "DZcy;": "\u040f", - "Dagger;": "\u2021", - "Darr;": "\u21a1", - "Dashv;": "\u2ae4", - "Dcaron;": "\u010e", - "Dcy;": "\u0414", - "Del;": "\u2207", - "Delta;": "\u0394", - "Dfr;": "\U0001d507", - "DiacriticalAcute;": "\xb4", - "DiacriticalDot;": "\u02d9", - "DiacriticalDoubleAcute;": "\u02dd", - "DiacriticalGrave;": "`", - "DiacriticalTilde;": "\u02dc", - "Diamond;": "\u22c4", - "DifferentialD;": "\u2146", - "Dopf;": "\U0001d53b", - "Dot;": "\xa8", - "DotDot;": "\u20dc", - "DotEqual;": "\u2250", - "DoubleContourIntegral;": "\u222f", - "DoubleDot;": "\xa8", - "DoubleDownArrow;": "\u21d3", - "DoubleLeftArrow;": "\u21d0", - "DoubleLeftRightArrow;": "\u21d4", - "DoubleLeftTee;": "\u2ae4", - "DoubleLongLeftArrow;": "\u27f8", - "DoubleLongLeftRightArrow;": "\u27fa", - "DoubleLongRightArrow;": "\u27f9", - "DoubleRightArrow;": "\u21d2", - "DoubleRightTee;": "\u22a8", - "DoubleUpArrow;": "\u21d1", - "DoubleUpDownArrow;": "\u21d5", - "DoubleVerticalBar;": "\u2225", - "DownArrow;": "\u2193", - "DownArrowBar;": "\u2913", - "DownArrowUpArrow;": "\u21f5", - "DownBreve;": "\u0311", - "DownLeftRightVector;": "\u2950", - "DownLeftTeeVector;": "\u295e", - "DownLeftVector;": "\u21bd", - "DownLeftVectorBar;": "\u2956", - "DownRightTeeVector;": "\u295f", - "DownRightVector;": "\u21c1", - "DownRightVectorBar;": "\u2957", - "DownTee;": "\u22a4", - "DownTeeArrow;": "\u21a7", - "Downarrow;": "\u21d3", - "Dscr;": "\U0001d49f", - "Dstrok;": "\u0110", - "ENG;": "\u014a", - "ETH": "\xd0", - "ETH;": "\xd0", - "Eacute": "\xc9", - "Eacute;": "\xc9", - "Ecaron;": "\u011a", - "Ecirc": "\xca", - "Ecirc;": "\xca", - "Ecy;": "\u042d", - "Edot;": "\u0116", - "Efr;": "\U0001d508", - "Egrave": "\xc8", - "Egrave;": "\xc8", - "Element;": "\u2208", - "Emacr;": "\u0112", - "EmptySmallSquare;": "\u25fb", - "EmptyVerySmallSquare;": "\u25ab", - "Eogon;": "\u0118", - "Eopf;": "\U0001d53c", - "Epsilon;": "\u0395", - "Equal;": "\u2a75", - "EqualTilde;": "\u2242", - "Equilibrium;": "\u21cc", - "Escr;": "\u2130", - "Esim;": "\u2a73", - "Eta;": "\u0397", - "Euml": "\xcb", - "Euml;": "\xcb", - "Exists;": "\u2203", - "ExponentialE;": "\u2147", - "Fcy;": "\u0424", - "Ffr;": "\U0001d509", - "FilledSmallSquare;": "\u25fc", - "FilledVerySmallSquare;": "\u25aa", - "Fopf;": "\U0001d53d", - "ForAll;": "\u2200", - "Fouriertrf;": "\u2131", - "Fscr;": "\u2131", - "GJcy;": "\u0403", - "GT": ">", - "GT;": ">", - "Gamma;": "\u0393", - "Gammad;": "\u03dc", - "Gbreve;": "\u011e", - "Gcedil;": "\u0122", - "Gcirc;": "\u011c", - "Gcy;": "\u0413", - "Gdot;": "\u0120", - "Gfr;": "\U0001d50a", - "Gg;": "\u22d9", - "Gopf;": "\U0001d53e", - "GreaterEqual;": "\u2265", - "GreaterEqualLess;": "\u22db", - "GreaterFullEqual;": "\u2267", - "GreaterGreater;": "\u2aa2", - "GreaterLess;": "\u2277", - "GreaterSlantEqual;": "\u2a7e", - "GreaterTilde;": "\u2273", - "Gscr;": "\U0001d4a2", - "Gt;": "\u226b", - "HARDcy;": "\u042a", - "Hacek;": "\u02c7", - "Hat;": "^", - "Hcirc;": "\u0124", - "Hfr;": "\u210c", - "HilbertSpace;": "\u210b", - "Hopf;": "\u210d", - "HorizontalLine;": "\u2500", - "Hscr;": "\u210b", - "Hstrok;": "\u0126", - "HumpDownHump;": "\u224e", - "HumpEqual;": "\u224f", - "IEcy;": "\u0415", - "IJlig;": "\u0132", - "IOcy;": "\u0401", - "Iacute": "\xcd", - "Iacute;": "\xcd", - "Icirc": "\xce", - "Icirc;": "\xce", - "Icy;": "\u0418", - "Idot;": "\u0130", - "Ifr;": "\u2111", - "Igrave": "\xcc", - "Igrave;": "\xcc", - "Im;": "\u2111", - "Imacr;": "\u012a", - "ImaginaryI;": "\u2148", - "Implies;": "\u21d2", - "Int;": "\u222c", - "Integral;": "\u222b", - "Intersection;": "\u22c2", - "InvisibleComma;": "\u2063", - "InvisibleTimes;": "\u2062", - "Iogon;": "\u012e", - "Iopf;": "\U0001d540", - "Iota;": "\u0399", - "Iscr;": "\u2110", - "Itilde;": "\u0128", - "Iukcy;": "\u0406", - "Iuml": "\xcf", - "Iuml;": "\xcf", - "Jcirc;": "\u0134", - "Jcy;": "\u0419", - "Jfr;": "\U0001d50d", - "Jopf;": "\U0001d541", - "Jscr;": "\U0001d4a5", - "Jsercy;": "\u0408", - "Jukcy;": "\u0404", - "KHcy;": "\u0425", - "KJcy;": "\u040c", - "Kappa;": "\u039a", - "Kcedil;": "\u0136", - "Kcy;": "\u041a", - "Kfr;": "\U0001d50e", - "Kopf;": "\U0001d542", - "Kscr;": "\U0001d4a6", - "LJcy;": "\u0409", - "LT": "<", - "LT;": "<", - "Lacute;": "\u0139", - "Lambda;": "\u039b", - "Lang;": "\u27ea", - "Laplacetrf;": "\u2112", - "Larr;": "\u219e", - "Lcaron;": "\u013d", - "Lcedil;": "\u013b", - "Lcy;": "\u041b", - "LeftAngleBracket;": "\u27e8", - "LeftArrow;": "\u2190", - "LeftArrowBar;": "\u21e4", - "LeftArrowRightArrow;": "\u21c6", - "LeftCeiling;": "\u2308", - "LeftDoubleBracket;": "\u27e6", - "LeftDownTeeVector;": "\u2961", - "LeftDownVector;": "\u21c3", - "LeftDownVectorBar;": "\u2959", - "LeftFloor;": "\u230a", - "LeftRightArrow;": "\u2194", - "LeftRightVector;": "\u294e", - "LeftTee;": "\u22a3", - "LeftTeeArrow;": "\u21a4", - "LeftTeeVector;": "\u295a", - "LeftTriangle;": "\u22b2", - "LeftTriangleBar;": "\u29cf", - "LeftTriangleEqual;": "\u22b4", - "LeftUpDownVector;": "\u2951", - "LeftUpTeeVector;": "\u2960", - "LeftUpVector;": "\u21bf", - "LeftUpVectorBar;": "\u2958", - "LeftVector;": "\u21bc", - "LeftVectorBar;": "\u2952", - "Leftarrow;": "\u21d0", - "Leftrightarrow;": "\u21d4", - "LessEqualGreater;": "\u22da", - "LessFullEqual;": "\u2266", - "LessGreater;": "\u2276", - "LessLess;": "\u2aa1", - "LessSlantEqual;": "\u2a7d", - "LessTilde;": "\u2272", - "Lfr;": "\U0001d50f", - "Ll;": "\u22d8", - "Lleftarrow;": "\u21da", - "Lmidot;": "\u013f", - "LongLeftArrow;": "\u27f5", - "LongLeftRightArrow;": "\u27f7", - "LongRightArrow;": "\u27f6", - "Longleftarrow;": "\u27f8", - "Longleftrightarrow;": "\u27fa", - "Longrightarrow;": "\u27f9", - "Lopf;": "\U0001d543", - "LowerLeftArrow;": "\u2199", - "LowerRightArrow;": "\u2198", - "Lscr;": "\u2112", - "Lsh;": "\u21b0", - "Lstrok;": "\u0141", - "Lt;": "\u226a", - "Map;": "\u2905", - "Mcy;": "\u041c", - "MediumSpace;": "\u205f", - "Mellintrf;": "\u2133", - "Mfr;": "\U0001d510", - "MinusPlus;": "\u2213", - "Mopf;": "\U0001d544", - "Mscr;": "\u2133", - "Mu;": "\u039c", - "NJcy;": "\u040a", - "Nacute;": "\u0143", - "Ncaron;": "\u0147", - "Ncedil;": "\u0145", - "Ncy;": "\u041d", - "NegativeMediumSpace;": "\u200b", - "NegativeThickSpace;": "\u200b", - "NegativeThinSpace;": "\u200b", - "NegativeVeryThinSpace;": "\u200b", - "NestedGreaterGreater;": "\u226b", - "NestedLessLess;": "\u226a", - "NewLine;": "\n", - "Nfr;": "\U0001d511", - "NoBreak;": "\u2060", - "NonBreakingSpace;": "\xa0", - "Nopf;": "\u2115", - "Not;": "\u2aec", - "NotCongruent;": "\u2262", - "NotCupCap;": "\u226d", - "NotDoubleVerticalBar;": "\u2226", - "NotElement;": "\u2209", - "NotEqual;": "\u2260", - "NotEqualTilde;": "\u2242\u0338", - "NotExists;": "\u2204", - "NotGreater;": "\u226f", - "NotGreaterEqual;": "\u2271", - "NotGreaterFullEqual;": "\u2267\u0338", - "NotGreaterGreater;": "\u226b\u0338", - "NotGreaterLess;": "\u2279", - "NotGreaterSlantEqual;": "\u2a7e\u0338", - "NotGreaterTilde;": "\u2275", - "NotHumpDownHump;": "\u224e\u0338", - "NotHumpEqual;": "\u224f\u0338", - "NotLeftTriangle;": "\u22ea", - "NotLeftTriangleBar;": "\u29cf\u0338", - "NotLeftTriangleEqual;": "\u22ec", - "NotLess;": "\u226e", - "NotLessEqual;": "\u2270", - "NotLessGreater;": "\u2278", - "NotLessLess;": "\u226a\u0338", - "NotLessSlantEqual;": "\u2a7d\u0338", - "NotLessTilde;": "\u2274", - "NotNestedGreaterGreater;": "\u2aa2\u0338", - "NotNestedLessLess;": "\u2aa1\u0338", - "NotPrecedes;": "\u2280", - "NotPrecedesEqual;": "\u2aaf\u0338", - "NotPrecedesSlantEqual;": "\u22e0", - "NotReverseElement;": "\u220c", - "NotRightTriangle;": "\u22eb", - "NotRightTriangleBar;": "\u29d0\u0338", - "NotRightTriangleEqual;": "\u22ed", - "NotSquareSubset;": "\u228f\u0338", - "NotSquareSubsetEqual;": "\u22e2", - "NotSquareSuperset;": "\u2290\u0338", - "NotSquareSupersetEqual;": "\u22e3", - "NotSubset;": "\u2282\u20d2", - "NotSubsetEqual;": "\u2288", - "NotSucceeds;": "\u2281", - "NotSucceedsEqual;": "\u2ab0\u0338", - "NotSucceedsSlantEqual;": "\u22e1", - "NotSucceedsTilde;": "\u227f\u0338", - "NotSuperset;": "\u2283\u20d2", - "NotSupersetEqual;": "\u2289", - "NotTilde;": "\u2241", - "NotTildeEqual;": "\u2244", - "NotTildeFullEqual;": "\u2247", - "NotTildeTilde;": "\u2249", - "NotVerticalBar;": "\u2224", - "Nscr;": "\U0001d4a9", - "Ntilde": "\xd1", - "Ntilde;": "\xd1", - "Nu;": "\u039d", - "OElig;": "\u0152", - "Oacute": "\xd3", - "Oacute;": "\xd3", - "Ocirc": "\xd4", - "Ocirc;": "\xd4", - "Ocy;": "\u041e", - "Odblac;": "\u0150", - "Ofr;": "\U0001d512", - "Ograve": "\xd2", - "Ograve;": "\xd2", - "Omacr;": "\u014c", - "Omega;": "\u03a9", - "Omicron;": "\u039f", - "Oopf;": "\U0001d546", - "OpenCurlyDoubleQuote;": "\u201c", - "OpenCurlyQuote;": "\u2018", - "Or;": "\u2a54", - "Oscr;": "\U0001d4aa", - "Oslash": "\xd8", - "Oslash;": "\xd8", - "Otilde": "\xd5", - "Otilde;": "\xd5", - "Otimes;": "\u2a37", - "Ouml": "\xd6", - "Ouml;": "\xd6", - "OverBar;": "\u203e", - "OverBrace;": "\u23de", - "OverBracket;": "\u23b4", - "OverParenthesis;": "\u23dc", - "PartialD;": "\u2202", - "Pcy;": "\u041f", - "Pfr;": "\U0001d513", - "Phi;": "\u03a6", - "Pi;": "\u03a0", - "PlusMinus;": "\xb1", - "Poincareplane;": "\u210c", - "Popf;": "\u2119", - "Pr;": "\u2abb", - "Precedes;": "\u227a", - "PrecedesEqual;": "\u2aaf", - "PrecedesSlantEqual;": "\u227c", - "PrecedesTilde;": "\u227e", - "Prime;": "\u2033", - "Product;": "\u220f", - "Proportion;": "\u2237", - "Proportional;": "\u221d", - "Pscr;": "\U0001d4ab", - "Psi;": "\u03a8", - "QUOT": "\"", - "QUOT;": "\"", - "Qfr;": "\U0001d514", - "Qopf;": "\u211a", - "Qscr;": "\U0001d4ac", - "RBarr;": "\u2910", - "REG": "\xae", - "REG;": "\xae", - "Racute;": "\u0154", - "Rang;": "\u27eb", - "Rarr;": "\u21a0", - "Rarrtl;": "\u2916", - "Rcaron;": "\u0158", - "Rcedil;": "\u0156", - "Rcy;": "\u0420", - "Re;": "\u211c", - "ReverseElement;": "\u220b", - "ReverseEquilibrium;": "\u21cb", - "ReverseUpEquilibrium;": "\u296f", - "Rfr;": "\u211c", - "Rho;": "\u03a1", - "RightAngleBracket;": "\u27e9", - "RightArrow;": "\u2192", - "RightArrowBar;": "\u21e5", - "RightArrowLeftArrow;": "\u21c4", - "RightCeiling;": "\u2309", - "RightDoubleBracket;": "\u27e7", - "RightDownTeeVector;": "\u295d", - "RightDownVector;": "\u21c2", - "RightDownVectorBar;": "\u2955", - "RightFloor;": "\u230b", - "RightTee;": "\u22a2", - "RightTeeArrow;": "\u21a6", - "RightTeeVector;": "\u295b", - "RightTriangle;": "\u22b3", - "RightTriangleBar;": "\u29d0", - "RightTriangleEqual;": "\u22b5", - "RightUpDownVector;": "\u294f", - "RightUpTeeVector;": "\u295c", - "RightUpVector;": "\u21be", - "RightUpVectorBar;": "\u2954", - "RightVector;": "\u21c0", - "RightVectorBar;": "\u2953", - "Rightarrow;": "\u21d2", - "Ropf;": "\u211d", - "RoundImplies;": "\u2970", - "Rrightarrow;": "\u21db", - "Rscr;": "\u211b", - "Rsh;": "\u21b1", - "RuleDelayed;": "\u29f4", - "SHCHcy;": "\u0429", - "SHcy;": "\u0428", - "SOFTcy;": "\u042c", - "Sacute;": "\u015a", - "Sc;": "\u2abc", - "Scaron;": "\u0160", - "Scedil;": "\u015e", - "Scirc;": "\u015c", - "Scy;": "\u0421", - "Sfr;": "\U0001d516", - "ShortDownArrow;": "\u2193", - "ShortLeftArrow;": "\u2190", - "ShortRightArrow;": "\u2192", - "ShortUpArrow;": "\u2191", - "Sigma;": "\u03a3", - "SmallCircle;": "\u2218", - "Sopf;": "\U0001d54a", - "Sqrt;": "\u221a", - "Square;": "\u25a1", - "SquareIntersection;": "\u2293", - "SquareSubset;": "\u228f", - "SquareSubsetEqual;": "\u2291", - "SquareSuperset;": "\u2290", - "SquareSupersetEqual;": "\u2292", - "SquareUnion;": "\u2294", - "Sscr;": "\U0001d4ae", - "Star;": "\u22c6", - "Sub;": "\u22d0", - "Subset;": "\u22d0", - "SubsetEqual;": "\u2286", - "Succeeds;": "\u227b", - "SucceedsEqual;": "\u2ab0", - "SucceedsSlantEqual;": "\u227d", - "SucceedsTilde;": "\u227f", - "SuchThat;": "\u220b", - "Sum;": "\u2211", - "Sup;": "\u22d1", - "Superset;": "\u2283", - "SupersetEqual;": "\u2287", - "Supset;": "\u22d1", - "THORN": "\xde", - "THORN;": "\xde", - "TRADE;": "\u2122", - "TSHcy;": "\u040b", - "TScy;": "\u0426", - "Tab;": "\t", - "Tau;": "\u03a4", - "Tcaron;": "\u0164", - "Tcedil;": "\u0162", - "Tcy;": "\u0422", - "Tfr;": "\U0001d517", - "Therefore;": "\u2234", - "Theta;": "\u0398", - "ThickSpace;": "\u205f\u200a", - "ThinSpace;": "\u2009", - "Tilde;": "\u223c", - "TildeEqual;": "\u2243", - "TildeFullEqual;": "\u2245", - "TildeTilde;": "\u2248", - "Topf;": "\U0001d54b", - "TripleDot;": "\u20db", - "Tscr;": "\U0001d4af", - "Tstrok;": "\u0166", - "Uacute": "\xda", - "Uacute;": "\xda", - "Uarr;": "\u219f", - "Uarrocir;": "\u2949", - "Ubrcy;": "\u040e", - "Ubreve;": "\u016c", - "Ucirc": "\xdb", - "Ucirc;": "\xdb", - "Ucy;": "\u0423", - "Udblac;": "\u0170", - "Ufr;": "\U0001d518", - "Ugrave": "\xd9", - "Ugrave;": "\xd9", - "Umacr;": "\u016a", - "UnderBar;": "_", - "UnderBrace;": "\u23df", - "UnderBracket;": "\u23b5", - "UnderParenthesis;": "\u23dd", - "Union;": "\u22c3", - "UnionPlus;": "\u228e", - "Uogon;": "\u0172", - "Uopf;": "\U0001d54c", - "UpArrow;": "\u2191", - "UpArrowBar;": "\u2912", - "UpArrowDownArrow;": "\u21c5", - "UpDownArrow;": "\u2195", - "UpEquilibrium;": "\u296e", - "UpTee;": "\u22a5", - "UpTeeArrow;": "\u21a5", - "Uparrow;": "\u21d1", - "Updownarrow;": "\u21d5", - "UpperLeftArrow;": "\u2196", - "UpperRightArrow;": "\u2197", - "Upsi;": "\u03d2", - "Upsilon;": "\u03a5", - "Uring;": "\u016e", - "Uscr;": "\U0001d4b0", - "Utilde;": "\u0168", - "Uuml": "\xdc", - "Uuml;": "\xdc", - "VDash;": "\u22ab", - "Vbar;": "\u2aeb", - "Vcy;": "\u0412", - "Vdash;": "\u22a9", - "Vdashl;": "\u2ae6", - "Vee;": "\u22c1", - "Verbar;": "\u2016", - "Vert;": "\u2016", - "VerticalBar;": "\u2223", - "VerticalLine;": "|", - "VerticalSeparator;": "\u2758", - "VerticalTilde;": "\u2240", - "VeryThinSpace;": "\u200a", - "Vfr;": "\U0001d519", - "Vopf;": "\U0001d54d", - "Vscr;": "\U0001d4b1", - "Vvdash;": "\u22aa", - "Wcirc;": "\u0174", - "Wedge;": "\u22c0", - "Wfr;": "\U0001d51a", - "Wopf;": "\U0001d54e", - "Wscr;": "\U0001d4b2", - "Xfr;": "\U0001d51b", - "Xi;": "\u039e", - "Xopf;": "\U0001d54f", - "Xscr;": "\U0001d4b3", - "YAcy;": "\u042f", - "YIcy;": "\u0407", - "YUcy;": "\u042e", - "Yacute": "\xdd", - "Yacute;": "\xdd", - "Ycirc;": "\u0176", - "Ycy;": "\u042b", - "Yfr;": "\U0001d51c", - "Yopf;": "\U0001d550", - "Yscr;": "\U0001d4b4", - "Yuml;": "\u0178", - "ZHcy;": "\u0416", - "Zacute;": "\u0179", - "Zcaron;": "\u017d", - "Zcy;": "\u0417", - "Zdot;": "\u017b", - "ZeroWidthSpace;": "\u200b", - "Zeta;": "\u0396", - "Zfr;": "\u2128", - "Zopf;": "\u2124", - "Zscr;": "\U0001d4b5", - "aacute": "\xe1", - "aacute;": "\xe1", - "abreve;": "\u0103", - "ac;": "\u223e", - "acE;": "\u223e\u0333", - "acd;": "\u223f", - "acirc": "\xe2", - "acirc;": "\xe2", - "acute": "\xb4", - "acute;": "\xb4", - "acy;": "\u0430", - "aelig": "\xe6", - "aelig;": "\xe6", - "af;": "\u2061", - "afr;": "\U0001d51e", - "agrave": "\xe0", - "agrave;": "\xe0", - "alefsym;": "\u2135", - "aleph;": "\u2135", - "alpha;": "\u03b1", - "amacr;": "\u0101", - "amalg;": "\u2a3f", - "amp": "&", - "amp;": "&", - "and;": "\u2227", - "andand;": "\u2a55", - "andd;": "\u2a5c", - "andslope;": "\u2a58", - "andv;": "\u2a5a", - "ang;": "\u2220", - "ange;": "\u29a4", - "angle;": "\u2220", - "angmsd;": "\u2221", - "angmsdaa;": "\u29a8", - "angmsdab;": "\u29a9", - "angmsdac;": "\u29aa", - "angmsdad;": "\u29ab", - "angmsdae;": "\u29ac", - "angmsdaf;": "\u29ad", - "angmsdag;": "\u29ae", - "angmsdah;": "\u29af", - "angrt;": "\u221f", - "angrtvb;": "\u22be", - "angrtvbd;": "\u299d", - "angsph;": "\u2222", - "angst;": "\xc5", - "angzarr;": "\u237c", - "aogon;": "\u0105", - "aopf;": "\U0001d552", - "ap;": "\u2248", - "apE;": "\u2a70", - "apacir;": "\u2a6f", - "ape;": "\u224a", - "apid;": "\u224b", - "apos;": "'", - "approx;": "\u2248", - "approxeq;": "\u224a", - "aring": "\xe5", - "aring;": "\xe5", - "ascr;": "\U0001d4b6", - "ast;": "*", - "asymp;": "\u2248", - "asympeq;": "\u224d", - "atilde": "\xe3", - "atilde;": "\xe3", - "auml": "\xe4", - "auml;": "\xe4", - "awconint;": "\u2233", - "awint;": "\u2a11", - "bNot;": "\u2aed", - "backcong;": "\u224c", - "backepsilon;": "\u03f6", - "backprime;": "\u2035", - "backsim;": "\u223d", - "backsimeq;": "\u22cd", - "barvee;": "\u22bd", - "barwed;": "\u2305", - "barwedge;": "\u2305", - "bbrk;": "\u23b5", - "bbrktbrk;": "\u23b6", - "bcong;": "\u224c", - "bcy;": "\u0431", - "bdquo;": "\u201e", - "becaus;": "\u2235", - "because;": "\u2235", - "bemptyv;": "\u29b0", - "bepsi;": "\u03f6", - "bernou;": "\u212c", - "beta;": "\u03b2", - "beth;": "\u2136", - "between;": "\u226c", - "bfr;": "\U0001d51f", - "bigcap;": "\u22c2", - "bigcirc;": "\u25ef", - "bigcup;": "\u22c3", - "bigodot;": "\u2a00", - "bigoplus;": "\u2a01", - "bigotimes;": "\u2a02", - "bigsqcup;": "\u2a06", - "bigstar;": "\u2605", - "bigtriangledown;": "\u25bd", - "bigtriangleup;": "\u25b3", - "biguplus;": "\u2a04", - "bigvee;": "\u22c1", - "bigwedge;": "\u22c0", - "bkarow;": "\u290d", - "blacklozenge;": "\u29eb", - "blacksquare;": "\u25aa", - "blacktriangle;": "\u25b4", - "blacktriangledown;": "\u25be", - "blacktriangleleft;": "\u25c2", - "blacktriangleright;": "\u25b8", - "blank;": "\u2423", - "blk12;": "\u2592", - "blk14;": "\u2591", - "blk34;": "\u2593", - "block;": "\u2588", - "bne;": "=\u20e5", - "bnequiv;": "\u2261\u20e5", - "bnot;": "\u2310", - "bopf;": "\U0001d553", - "bot;": "\u22a5", - "bottom;": "\u22a5", - "bowtie;": "\u22c8", - "boxDL;": "\u2557", - "boxDR;": "\u2554", - "boxDl;": "\u2556", - "boxDr;": "\u2553", - "boxH;": "\u2550", - "boxHD;": "\u2566", - "boxHU;": "\u2569", - "boxHd;": "\u2564", - "boxHu;": "\u2567", - "boxUL;": "\u255d", - "boxUR;": "\u255a", - "boxUl;": "\u255c", - "boxUr;": "\u2559", - "boxV;": "\u2551", - "boxVH;": "\u256c", - "boxVL;": "\u2563", - "boxVR;": "\u2560", - "boxVh;": "\u256b", - "boxVl;": "\u2562", - "boxVr;": "\u255f", - "boxbox;": "\u29c9", - "boxdL;": "\u2555", - "boxdR;": "\u2552", - "boxdl;": "\u2510", - "boxdr;": "\u250c", - "boxh;": "\u2500", - "boxhD;": "\u2565", - "boxhU;": "\u2568", - "boxhd;": "\u252c", - "boxhu;": "\u2534", - "boxminus;": "\u229f", - "boxplus;": "\u229e", - "boxtimes;": "\u22a0", - "boxuL;": "\u255b", - "boxuR;": "\u2558", - "boxul;": "\u2518", - "boxur;": "\u2514", - "boxv;": "\u2502", - "boxvH;": "\u256a", - "boxvL;": "\u2561", - "boxvR;": "\u255e", - "boxvh;": "\u253c", - "boxvl;": "\u2524", - "boxvr;": "\u251c", - "bprime;": "\u2035", - "breve;": "\u02d8", - "brvbar": "\xa6", - "brvbar;": "\xa6", - "bscr;": "\U0001d4b7", - "bsemi;": "\u204f", - "bsim;": "\u223d", - "bsime;": "\u22cd", - "bsol;": "\\", - "bsolb;": "\u29c5", - "bsolhsub;": "\u27c8", - "bull;": "\u2022", - "bullet;": "\u2022", - "bump;": "\u224e", - "bumpE;": "\u2aae", - "bumpe;": "\u224f", - "bumpeq;": "\u224f", - "cacute;": "\u0107", - "cap;": "\u2229", - "capand;": "\u2a44", - "capbrcup;": "\u2a49", - "capcap;": "\u2a4b", - "capcup;": "\u2a47", - "capdot;": "\u2a40", - "caps;": "\u2229\ufe00", - "caret;": "\u2041", - "caron;": "\u02c7", - "ccaps;": "\u2a4d", - "ccaron;": "\u010d", - "ccedil": "\xe7", - "ccedil;": "\xe7", - "ccirc;": "\u0109", - "ccups;": "\u2a4c", - "ccupssm;": "\u2a50", - "cdot;": "\u010b", - "cedil": "\xb8", - "cedil;": "\xb8", - "cemptyv;": "\u29b2", - "cent": "\xa2", - "cent;": "\xa2", - "centerdot;": "\xb7", - "cfr;": "\U0001d520", - "chcy;": "\u0447", - "check;": "\u2713", - "checkmark;": "\u2713", - "chi;": "\u03c7", - "cir;": "\u25cb", - "cirE;": "\u29c3", - "circ;": "\u02c6", - "circeq;": "\u2257", - "circlearrowleft;": "\u21ba", - "circlearrowright;": "\u21bb", - "circledR;": "\xae", - "circledS;": "\u24c8", - "circledast;": "\u229b", - "circledcirc;": "\u229a", - "circleddash;": "\u229d", - "cire;": "\u2257", - "cirfnint;": "\u2a10", - "cirmid;": "\u2aef", - "cirscir;": "\u29c2", - "clubs;": "\u2663", - "clubsuit;": "\u2663", - "colon;": ":", - "colone;": "\u2254", - "coloneq;": "\u2254", - "comma;": ",", - "commat;": "@", - "comp;": "\u2201", - "compfn;": "\u2218", - "complement;": "\u2201", - "complexes;": "\u2102", - "cong;": "\u2245", - "congdot;": "\u2a6d", - "conint;": "\u222e", - "copf;": "\U0001d554", - "coprod;": "\u2210", - "copy": "\xa9", - "copy;": "\xa9", - "copysr;": "\u2117", - "crarr;": "\u21b5", - "cross;": "\u2717", - "cscr;": "\U0001d4b8", - "csub;": "\u2acf", - "csube;": "\u2ad1", - "csup;": "\u2ad0", - "csupe;": "\u2ad2", - "ctdot;": "\u22ef", - "cudarrl;": "\u2938", - "cudarrr;": "\u2935", - "cuepr;": "\u22de", - "cuesc;": "\u22df", - "cularr;": "\u21b6", - "cularrp;": "\u293d", - "cup;": "\u222a", - "cupbrcap;": "\u2a48", - "cupcap;": "\u2a46", - "cupcup;": "\u2a4a", - "cupdot;": "\u228d", - "cupor;": "\u2a45", - "cups;": "\u222a\ufe00", - "curarr;": "\u21b7", - "curarrm;": "\u293c", - "curlyeqprec;": "\u22de", - "curlyeqsucc;": "\u22df", - "curlyvee;": "\u22ce", - "curlywedge;": "\u22cf", - "curren": "\xa4", - "curren;": "\xa4", - "curvearrowleft;": "\u21b6", - "curvearrowright;": "\u21b7", - "cuvee;": "\u22ce", - "cuwed;": "\u22cf", - "cwconint;": "\u2232", - "cwint;": "\u2231", - "cylcty;": "\u232d", - "dArr;": "\u21d3", - "dHar;": "\u2965", - "dagger;": "\u2020", - "daleth;": "\u2138", - "darr;": "\u2193", - "dash;": "\u2010", - "dashv;": "\u22a3", - "dbkarow;": "\u290f", - "dblac;": "\u02dd", - "dcaron;": "\u010f", - "dcy;": "\u0434", - "dd;": "\u2146", - "ddagger;": "\u2021", - "ddarr;": "\u21ca", - "ddotseq;": "\u2a77", - "deg": "\xb0", - "deg;": "\xb0", - "delta;": "\u03b4", - "demptyv;": "\u29b1", - "dfisht;": "\u297f", - "dfr;": "\U0001d521", - "dharl;": "\u21c3", - "dharr;": "\u21c2", - "diam;": "\u22c4", - "diamond;": "\u22c4", - "diamondsuit;": "\u2666", - "diams;": "\u2666", - "die;": "\xa8", - "digamma;": "\u03dd", - "disin;": "\u22f2", - "div;": "\xf7", - "divide": "\xf7", - "divide;": "\xf7", - "divideontimes;": "\u22c7", - "divonx;": "\u22c7", - "djcy;": "\u0452", - "dlcorn;": "\u231e", - "dlcrop;": "\u230d", - "dollar;": "$", - "dopf;": "\U0001d555", - "dot;": "\u02d9", - "doteq;": "\u2250", - "doteqdot;": "\u2251", - "dotminus;": "\u2238", - "dotplus;": "\u2214", - "dotsquare;": "\u22a1", - "doublebarwedge;": "\u2306", - "downarrow;": "\u2193", - "downdownarrows;": "\u21ca", - "downharpoonleft;": "\u21c3", - "downharpoonright;": "\u21c2", - "drbkarow;": "\u2910", - "drcorn;": "\u231f", - "drcrop;": "\u230c", - "dscr;": "\U0001d4b9", - "dscy;": "\u0455", - "dsol;": "\u29f6", - "dstrok;": "\u0111", - "dtdot;": "\u22f1", - "dtri;": "\u25bf", - "dtrif;": "\u25be", - "duarr;": "\u21f5", - "duhar;": "\u296f", - "dwangle;": "\u29a6", - "dzcy;": "\u045f", - "dzigrarr;": "\u27ff", - "eDDot;": "\u2a77", - "eDot;": "\u2251", - "eacute": "\xe9", - "eacute;": "\xe9", - "easter;": "\u2a6e", - "ecaron;": "\u011b", - "ecir;": "\u2256", - "ecirc": "\xea", - "ecirc;": "\xea", - "ecolon;": "\u2255", - "ecy;": "\u044d", - "edot;": "\u0117", - "ee;": "\u2147", - "efDot;": "\u2252", - "efr;": "\U0001d522", - "eg;": "\u2a9a", - "egrave": "\xe8", - "egrave;": "\xe8", - "egs;": "\u2a96", - "egsdot;": "\u2a98", - "el;": "\u2a99", - "elinters;": "\u23e7", - "ell;": "\u2113", - "els;": "\u2a95", - "elsdot;": "\u2a97", - "emacr;": "\u0113", - "empty;": "\u2205", - "emptyset;": "\u2205", - "emptyv;": "\u2205", - "emsp13;": "\u2004", - "emsp14;": "\u2005", - "emsp;": "\u2003", - "eng;": "\u014b", - "ensp;": "\u2002", - "eogon;": "\u0119", - "eopf;": "\U0001d556", - "epar;": "\u22d5", - "eparsl;": "\u29e3", - "eplus;": "\u2a71", - "epsi;": "\u03b5", - "epsilon;": "\u03b5", - "epsiv;": "\u03f5", - "eqcirc;": "\u2256", - "eqcolon;": "\u2255", - "eqsim;": "\u2242", - "eqslantgtr;": "\u2a96", - "eqslantless;": "\u2a95", - "equals;": "=", - "equest;": "\u225f", - "equiv;": "\u2261", - "equivDD;": "\u2a78", - "eqvparsl;": "\u29e5", - "erDot;": "\u2253", - "erarr;": "\u2971", - "escr;": "\u212f", - "esdot;": "\u2250", - "esim;": "\u2242", - "eta;": "\u03b7", - "eth": "\xf0", - "eth;": "\xf0", - "euml": "\xeb", - "euml;": "\xeb", - "euro;": "\u20ac", - "excl;": "!", - "exist;": "\u2203", - "expectation;": "\u2130", - "exponentiale;": "\u2147", - "fallingdotseq;": "\u2252", - "fcy;": "\u0444", - "female;": "\u2640", - "ffilig;": "\ufb03", - "fflig;": "\ufb00", - "ffllig;": "\ufb04", - "ffr;": "\U0001d523", - "filig;": "\ufb01", - "fjlig;": "fj", - "flat;": "\u266d", - "fllig;": "\ufb02", - "fltns;": "\u25b1", - "fnof;": "\u0192", - "fopf;": "\U0001d557", - "forall;": "\u2200", - "fork;": "\u22d4", - "forkv;": "\u2ad9", - "fpartint;": "\u2a0d", - "frac12": "\xbd", - "frac12;": "\xbd", - "frac13;": "\u2153", - "frac14": "\xbc", - "frac14;": "\xbc", - "frac15;": "\u2155", - "frac16;": "\u2159", - "frac18;": "\u215b", - "frac23;": "\u2154", - "frac25;": "\u2156", - "frac34": "\xbe", - "frac34;": "\xbe", - "frac35;": "\u2157", - "frac38;": "\u215c", - "frac45;": "\u2158", - "frac56;": "\u215a", - "frac58;": "\u215d", - "frac78;": "\u215e", - "frasl;": "\u2044", - "frown;": "\u2322", - "fscr;": "\U0001d4bb", - "gE;": "\u2267", - "gEl;": "\u2a8c", - "gacute;": "\u01f5", - "gamma;": "\u03b3", - "gammad;": "\u03dd", - "gap;": "\u2a86", - "gbreve;": "\u011f", - "gcirc;": "\u011d", - "gcy;": "\u0433", - "gdot;": "\u0121", - "ge;": "\u2265", - "gel;": "\u22db", - "geq;": "\u2265", - "geqq;": "\u2267", - "geqslant;": "\u2a7e", - "ges;": "\u2a7e", - "gescc;": "\u2aa9", - "gesdot;": "\u2a80", - "gesdoto;": "\u2a82", - "gesdotol;": "\u2a84", - "gesl;": "\u22db\ufe00", - "gesles;": "\u2a94", - "gfr;": "\U0001d524", - "gg;": "\u226b", - "ggg;": "\u22d9", - "gimel;": "\u2137", - "gjcy;": "\u0453", - "gl;": "\u2277", - "glE;": "\u2a92", - "gla;": "\u2aa5", - "glj;": "\u2aa4", - "gnE;": "\u2269", - "gnap;": "\u2a8a", - "gnapprox;": "\u2a8a", - "gne;": "\u2a88", - "gneq;": "\u2a88", - "gneqq;": "\u2269", - "gnsim;": "\u22e7", - "gopf;": "\U0001d558", - "grave;": "`", - "gscr;": "\u210a", - "gsim;": "\u2273", - "gsime;": "\u2a8e", - "gsiml;": "\u2a90", - "gt": ">", - "gt;": ">", - "gtcc;": "\u2aa7", - "gtcir;": "\u2a7a", - "gtdot;": "\u22d7", - "gtlPar;": "\u2995", - "gtquest;": "\u2a7c", - "gtrapprox;": "\u2a86", - "gtrarr;": "\u2978", - "gtrdot;": "\u22d7", - "gtreqless;": "\u22db", - "gtreqqless;": "\u2a8c", - "gtrless;": "\u2277", - "gtrsim;": "\u2273", - "gvertneqq;": "\u2269\ufe00", - "gvnE;": "\u2269\ufe00", - "hArr;": "\u21d4", - "hairsp;": "\u200a", - "half;": "\xbd", - "hamilt;": "\u210b", - "hardcy;": "\u044a", - "harr;": "\u2194", - "harrcir;": "\u2948", - "harrw;": "\u21ad", - "hbar;": "\u210f", - "hcirc;": "\u0125", - "hearts;": "\u2665", - "heartsuit;": "\u2665", - "hellip;": "\u2026", - "hercon;": "\u22b9", - "hfr;": "\U0001d525", - "hksearow;": "\u2925", - "hkswarow;": "\u2926", - "hoarr;": "\u21ff", - "homtht;": "\u223b", - "hookleftarrow;": "\u21a9", - "hookrightarrow;": "\u21aa", - "hopf;": "\U0001d559", - "horbar;": "\u2015", - "hscr;": "\U0001d4bd", - "hslash;": "\u210f", - "hstrok;": "\u0127", - "hybull;": "\u2043", - "hyphen;": "\u2010", - "iacute": "\xed", - "iacute;": "\xed", - "ic;": "\u2063", - "icirc": "\xee", - "icirc;": "\xee", - "icy;": "\u0438", - "iecy;": "\u0435", - "iexcl": "\xa1", - "iexcl;": "\xa1", - "iff;": "\u21d4", - "ifr;": "\U0001d526", - "igrave": "\xec", - "igrave;": "\xec", - "ii;": "\u2148", - "iiiint;": "\u2a0c", - "iiint;": "\u222d", - "iinfin;": "\u29dc", - "iiota;": "\u2129", - "ijlig;": "\u0133", - "imacr;": "\u012b", - "image;": "\u2111", - "imagline;": "\u2110", - "imagpart;": "\u2111", - "imath;": "\u0131", - "imof;": "\u22b7", - "imped;": "\u01b5", - "in;": "\u2208", - "incare;": "\u2105", - "infin;": "\u221e", - "infintie;": "\u29dd", - "inodot;": "\u0131", - "int;": "\u222b", - "intcal;": "\u22ba", - "integers;": "\u2124", - "intercal;": "\u22ba", - "intlarhk;": "\u2a17", - "intprod;": "\u2a3c", - "iocy;": "\u0451", - "iogon;": "\u012f", - "iopf;": "\U0001d55a", - "iota;": "\u03b9", - "iprod;": "\u2a3c", - "iquest": "\xbf", - "iquest;": "\xbf", - "iscr;": "\U0001d4be", - "isin;": "\u2208", - "isinE;": "\u22f9", - "isindot;": "\u22f5", - "isins;": "\u22f4", - "isinsv;": "\u22f3", - "isinv;": "\u2208", - "it;": "\u2062", - "itilde;": "\u0129", - "iukcy;": "\u0456", - "iuml": "\xef", - "iuml;": "\xef", - "jcirc;": "\u0135", - "jcy;": "\u0439", - "jfr;": "\U0001d527", - "jmath;": "\u0237", - "jopf;": "\U0001d55b", - "jscr;": "\U0001d4bf", - "jsercy;": "\u0458", - "jukcy;": "\u0454", - "kappa;": "\u03ba", - "kappav;": "\u03f0", - "kcedil;": "\u0137", - "kcy;": "\u043a", - "kfr;": "\U0001d528", - "kgreen;": "\u0138", - "khcy;": "\u0445", - "kjcy;": "\u045c", - "kopf;": "\U0001d55c", - "kscr;": "\U0001d4c0", - "lAarr;": "\u21da", - "lArr;": "\u21d0", - "lAtail;": "\u291b", - "lBarr;": "\u290e", - "lE;": "\u2266", - "lEg;": "\u2a8b", - "lHar;": "\u2962", - "lacute;": "\u013a", - "laemptyv;": "\u29b4", - "lagran;": "\u2112", - "lambda;": "\u03bb", - "lang;": "\u27e8", - "langd;": "\u2991", - "langle;": "\u27e8", - "lap;": "\u2a85", - "laquo": "\xab", - "laquo;": "\xab", - "larr;": "\u2190", - "larrb;": "\u21e4", - "larrbfs;": "\u291f", - "larrfs;": "\u291d", - "larrhk;": "\u21a9", - "larrlp;": "\u21ab", - "larrpl;": "\u2939", - "larrsim;": "\u2973", - "larrtl;": "\u21a2", - "lat;": "\u2aab", - "latail;": "\u2919", - "late;": "\u2aad", - "lates;": "\u2aad\ufe00", - "lbarr;": "\u290c", - "lbbrk;": "\u2772", - "lbrace;": "{", - "lbrack;": "[", - "lbrke;": "\u298b", - "lbrksld;": "\u298f", - "lbrkslu;": "\u298d", - "lcaron;": "\u013e", - "lcedil;": "\u013c", - "lceil;": "\u2308", - "lcub;": "{", - "lcy;": "\u043b", - "ldca;": "\u2936", - "ldquo;": "\u201c", - "ldquor;": "\u201e", - "ldrdhar;": "\u2967", - "ldrushar;": "\u294b", - "ldsh;": "\u21b2", - "le;": "\u2264", - "leftarrow;": "\u2190", - "leftarrowtail;": "\u21a2", - "leftharpoondown;": "\u21bd", - "leftharpoonup;": "\u21bc", - "leftleftarrows;": "\u21c7", - "leftrightarrow;": "\u2194", - "leftrightarrows;": "\u21c6", - "leftrightharpoons;": "\u21cb", - "leftrightsquigarrow;": "\u21ad", - "leftthreetimes;": "\u22cb", - "leg;": "\u22da", - "leq;": "\u2264", - "leqq;": "\u2266", - "leqslant;": "\u2a7d", - "les;": "\u2a7d", - "lescc;": "\u2aa8", - "lesdot;": "\u2a7f", - "lesdoto;": "\u2a81", - "lesdotor;": "\u2a83", - "lesg;": "\u22da\ufe00", - "lesges;": "\u2a93", - "lessapprox;": "\u2a85", - "lessdot;": "\u22d6", - "lesseqgtr;": "\u22da", - "lesseqqgtr;": "\u2a8b", - "lessgtr;": "\u2276", - "lesssim;": "\u2272", - "lfisht;": "\u297c", - "lfloor;": "\u230a", - "lfr;": "\U0001d529", - "lg;": "\u2276", - "lgE;": "\u2a91", - "lhard;": "\u21bd", - "lharu;": "\u21bc", - "lharul;": "\u296a", - "lhblk;": "\u2584", - "ljcy;": "\u0459", - "ll;": "\u226a", - "llarr;": "\u21c7", - "llcorner;": "\u231e", - "llhard;": "\u296b", - "lltri;": "\u25fa", - "lmidot;": "\u0140", - "lmoust;": "\u23b0", - "lmoustache;": "\u23b0", - "lnE;": "\u2268", - "lnap;": "\u2a89", - "lnapprox;": "\u2a89", - "lne;": "\u2a87", - "lneq;": "\u2a87", - "lneqq;": "\u2268", - "lnsim;": "\u22e6", - "loang;": "\u27ec", - "loarr;": "\u21fd", - "lobrk;": "\u27e6", - "longleftarrow;": "\u27f5", - "longleftrightarrow;": "\u27f7", - "longmapsto;": "\u27fc", - "longrightarrow;": "\u27f6", - "looparrowleft;": "\u21ab", - "looparrowright;": "\u21ac", - "lopar;": "\u2985", - "lopf;": "\U0001d55d", - "loplus;": "\u2a2d", - "lotimes;": "\u2a34", - "lowast;": "\u2217", - "lowbar;": "_", - "loz;": "\u25ca", - "lozenge;": "\u25ca", - "lozf;": "\u29eb", - "lpar;": "(", - "lparlt;": "\u2993", - "lrarr;": "\u21c6", - "lrcorner;": "\u231f", - "lrhar;": "\u21cb", - "lrhard;": "\u296d", - "lrm;": "\u200e", - "lrtri;": "\u22bf", - "lsaquo;": "\u2039", - "lscr;": "\U0001d4c1", - "lsh;": "\u21b0", - "lsim;": "\u2272", - "lsime;": "\u2a8d", - "lsimg;": "\u2a8f", - "lsqb;": "[", - "lsquo;": "\u2018", - "lsquor;": "\u201a", - "lstrok;": "\u0142", - "lt": "<", - "lt;": "<", - "ltcc;": "\u2aa6", - "ltcir;": "\u2a79", - "ltdot;": "\u22d6", - "lthree;": "\u22cb", - "ltimes;": "\u22c9", - "ltlarr;": "\u2976", - "ltquest;": "\u2a7b", - "ltrPar;": "\u2996", - "ltri;": "\u25c3", - "ltrie;": "\u22b4", - "ltrif;": "\u25c2", - "lurdshar;": "\u294a", - "luruhar;": "\u2966", - "lvertneqq;": "\u2268\ufe00", - "lvnE;": "\u2268\ufe00", - "mDDot;": "\u223a", - "macr": "\xaf", - "macr;": "\xaf", - "male;": "\u2642", - "malt;": "\u2720", - "maltese;": "\u2720", - "map;": "\u21a6", - "mapsto;": "\u21a6", - "mapstodown;": "\u21a7", - "mapstoleft;": "\u21a4", - "mapstoup;": "\u21a5", - "marker;": "\u25ae", - "mcomma;": "\u2a29", - "mcy;": "\u043c", - "mdash;": "\u2014", - "measuredangle;": "\u2221", - "mfr;": "\U0001d52a", - "mho;": "\u2127", - "micro": "\xb5", - "micro;": "\xb5", - "mid;": "\u2223", - "midast;": "*", - "midcir;": "\u2af0", - "middot": "\xb7", - "middot;": "\xb7", - "minus;": "\u2212", - "minusb;": "\u229f", - "minusd;": "\u2238", - "minusdu;": "\u2a2a", - "mlcp;": "\u2adb", - "mldr;": "\u2026", - "mnplus;": "\u2213", - "models;": "\u22a7", - "mopf;": "\U0001d55e", - "mp;": "\u2213", - "mscr;": "\U0001d4c2", - "mstpos;": "\u223e", - "mu;": "\u03bc", - "multimap;": "\u22b8", - "mumap;": "\u22b8", - "nGg;": "\u22d9\u0338", - "nGt;": "\u226b\u20d2", - "nGtv;": "\u226b\u0338", - "nLeftarrow;": "\u21cd", - "nLeftrightarrow;": "\u21ce", - "nLl;": "\u22d8\u0338", - "nLt;": "\u226a\u20d2", - "nLtv;": "\u226a\u0338", - "nRightarrow;": "\u21cf", - "nVDash;": "\u22af", - "nVdash;": "\u22ae", - "nabla;": "\u2207", - "nacute;": "\u0144", - "nang;": "\u2220\u20d2", - "nap;": "\u2249", - "napE;": "\u2a70\u0338", - "napid;": "\u224b\u0338", - "napos;": "\u0149", - "napprox;": "\u2249", - "natur;": "\u266e", - "natural;": "\u266e", - "naturals;": "\u2115", - "nbsp": "\xa0", - "nbsp;": "\xa0", - "nbump;": "\u224e\u0338", - "nbumpe;": "\u224f\u0338", - "ncap;": "\u2a43", - "ncaron;": "\u0148", - "ncedil;": "\u0146", - "ncong;": "\u2247", - "ncongdot;": "\u2a6d\u0338", - "ncup;": "\u2a42", - "ncy;": "\u043d", - "ndash;": "\u2013", - "ne;": "\u2260", - "neArr;": "\u21d7", - "nearhk;": "\u2924", - "nearr;": "\u2197", - "nearrow;": "\u2197", - "nedot;": "\u2250\u0338", - "nequiv;": "\u2262", - "nesear;": "\u2928", - "nesim;": "\u2242\u0338", - "nexist;": "\u2204", - "nexists;": "\u2204", - "nfr;": "\U0001d52b", - "ngE;": "\u2267\u0338", - "nge;": "\u2271", - "ngeq;": "\u2271", - "ngeqq;": "\u2267\u0338", - "ngeqslant;": "\u2a7e\u0338", - "nges;": "\u2a7e\u0338", - "ngsim;": "\u2275", - "ngt;": "\u226f", - "ngtr;": "\u226f", - "nhArr;": "\u21ce", - "nharr;": "\u21ae", - "nhpar;": "\u2af2", - "ni;": "\u220b", - "nis;": "\u22fc", - "nisd;": "\u22fa", - "niv;": "\u220b", - "njcy;": "\u045a", - "nlArr;": "\u21cd", - "nlE;": "\u2266\u0338", - "nlarr;": "\u219a", - "nldr;": "\u2025", - "nle;": "\u2270", - "nleftarrow;": "\u219a", - "nleftrightarrow;": "\u21ae", - "nleq;": "\u2270", - "nleqq;": "\u2266\u0338", - "nleqslant;": "\u2a7d\u0338", - "nles;": "\u2a7d\u0338", - "nless;": "\u226e", - "nlsim;": "\u2274", - "nlt;": "\u226e", - "nltri;": "\u22ea", - "nltrie;": "\u22ec", - "nmid;": "\u2224", - "nopf;": "\U0001d55f", - "not": "\xac", - "not;": "\xac", - "notin;": "\u2209", - "notinE;": "\u22f9\u0338", - "notindot;": "\u22f5\u0338", - "notinva;": "\u2209", - "notinvb;": "\u22f7", - "notinvc;": "\u22f6", - "notni;": "\u220c", - "notniva;": "\u220c", - "notnivb;": "\u22fe", - "notnivc;": "\u22fd", - "npar;": "\u2226", - "nparallel;": "\u2226", - "nparsl;": "\u2afd\u20e5", - "npart;": "\u2202\u0338", - "npolint;": "\u2a14", - "npr;": "\u2280", - "nprcue;": "\u22e0", - "npre;": "\u2aaf\u0338", - "nprec;": "\u2280", - "npreceq;": "\u2aaf\u0338", - "nrArr;": "\u21cf", - "nrarr;": "\u219b", - "nrarrc;": "\u2933\u0338", - "nrarrw;": "\u219d\u0338", - "nrightarrow;": "\u219b", - "nrtri;": "\u22eb", - "nrtrie;": "\u22ed", - "nsc;": "\u2281", - "nsccue;": "\u22e1", - "nsce;": "\u2ab0\u0338", - "nscr;": "\U0001d4c3", - "nshortmid;": "\u2224", - "nshortparallel;": "\u2226", - "nsim;": "\u2241", - "nsime;": "\u2244", - "nsimeq;": "\u2244", - "nsmid;": "\u2224", - "nspar;": "\u2226", - "nsqsube;": "\u22e2", - "nsqsupe;": "\u22e3", - "nsub;": "\u2284", - "nsubE;": "\u2ac5\u0338", - "nsube;": "\u2288", - "nsubset;": "\u2282\u20d2", - "nsubseteq;": "\u2288", - "nsubseteqq;": "\u2ac5\u0338", - "nsucc;": "\u2281", - "nsucceq;": "\u2ab0\u0338", - "nsup;": "\u2285", - "nsupE;": "\u2ac6\u0338", - "nsupe;": "\u2289", - "nsupset;": "\u2283\u20d2", - "nsupseteq;": "\u2289", - "nsupseteqq;": "\u2ac6\u0338", - "ntgl;": "\u2279", - "ntilde": "\xf1", - "ntilde;": "\xf1", - "ntlg;": "\u2278", - "ntriangleleft;": "\u22ea", - "ntrianglelefteq;": "\u22ec", - "ntriangleright;": "\u22eb", - "ntrianglerighteq;": "\u22ed", - "nu;": "\u03bd", - "num;": "#", - "numero;": "\u2116", - "numsp;": "\u2007", - "nvDash;": "\u22ad", - "nvHarr;": "\u2904", - "nvap;": "\u224d\u20d2", - "nvdash;": "\u22ac", - "nvge;": "\u2265\u20d2", - "nvgt;": ">\u20d2", - "nvinfin;": "\u29de", - "nvlArr;": "\u2902", - "nvle;": "\u2264\u20d2", - "nvlt;": "<\u20d2", - "nvltrie;": "\u22b4\u20d2", - "nvrArr;": "\u2903", - "nvrtrie;": "\u22b5\u20d2", - "nvsim;": "\u223c\u20d2", - "nwArr;": "\u21d6", - "nwarhk;": "\u2923", - "nwarr;": "\u2196", - "nwarrow;": "\u2196", - "nwnear;": "\u2927", - "oS;": "\u24c8", - "oacute": "\xf3", - "oacute;": "\xf3", - "oast;": "\u229b", - "ocir;": "\u229a", - "ocirc": "\xf4", - "ocirc;": "\xf4", - "ocy;": "\u043e", - "odash;": "\u229d", - "odblac;": "\u0151", - "odiv;": "\u2a38", - "odot;": "\u2299", - "odsold;": "\u29bc", - "oelig;": "\u0153", - "ofcir;": "\u29bf", - "ofr;": "\U0001d52c", - "ogon;": "\u02db", - "ograve": "\xf2", - "ograve;": "\xf2", - "ogt;": "\u29c1", - "ohbar;": "\u29b5", - "ohm;": "\u03a9", - "oint;": "\u222e", - "olarr;": "\u21ba", - "olcir;": "\u29be", - "olcross;": "\u29bb", - "oline;": "\u203e", - "olt;": "\u29c0", - "omacr;": "\u014d", - "omega;": "\u03c9", - "omicron;": "\u03bf", - "omid;": "\u29b6", - "ominus;": "\u2296", - "oopf;": "\U0001d560", - "opar;": "\u29b7", - "operp;": "\u29b9", - "oplus;": "\u2295", - "or;": "\u2228", - "orarr;": "\u21bb", - "ord;": "\u2a5d", - "order;": "\u2134", - "orderof;": "\u2134", - "ordf": "\xaa", - "ordf;": "\xaa", - "ordm": "\xba", - "ordm;": "\xba", - "origof;": "\u22b6", - "oror;": "\u2a56", - "orslope;": "\u2a57", - "orv;": "\u2a5b", - "oscr;": "\u2134", - "oslash": "\xf8", - "oslash;": "\xf8", - "osol;": "\u2298", - "otilde": "\xf5", - "otilde;": "\xf5", - "otimes;": "\u2297", - "otimesas;": "\u2a36", - "ouml": "\xf6", - "ouml;": "\xf6", - "ovbar;": "\u233d", - "par;": "\u2225", - "para": "\xb6", - "para;": "\xb6", - "parallel;": "\u2225", - "parsim;": "\u2af3", - "parsl;": "\u2afd", - "part;": "\u2202", - "pcy;": "\u043f", - "percnt;": "%", - "period;": ".", - "permil;": "\u2030", - "perp;": "\u22a5", - "pertenk;": "\u2031", - "pfr;": "\U0001d52d", - "phi;": "\u03c6", - "phiv;": "\u03d5", - "phmmat;": "\u2133", - "phone;": "\u260e", - "pi;": "\u03c0", - "pitchfork;": "\u22d4", - "piv;": "\u03d6", - "planck;": "\u210f", - "planckh;": "\u210e", - "plankv;": "\u210f", - "plus;": "+", - "plusacir;": "\u2a23", - "plusb;": "\u229e", - "pluscir;": "\u2a22", - "plusdo;": "\u2214", - "plusdu;": "\u2a25", - "pluse;": "\u2a72", - "plusmn": "\xb1", - "plusmn;": "\xb1", - "plussim;": "\u2a26", - "plustwo;": "\u2a27", - "pm;": "\xb1", - "pointint;": "\u2a15", - "popf;": "\U0001d561", - "pound": "\xa3", - "pound;": "\xa3", - "pr;": "\u227a", - "prE;": "\u2ab3", - "prap;": "\u2ab7", - "prcue;": "\u227c", - "pre;": "\u2aaf", - "prec;": "\u227a", - "precapprox;": "\u2ab7", - "preccurlyeq;": "\u227c", - "preceq;": "\u2aaf", - "precnapprox;": "\u2ab9", - "precneqq;": "\u2ab5", - "precnsim;": "\u22e8", - "precsim;": "\u227e", - "prime;": "\u2032", - "primes;": "\u2119", - "prnE;": "\u2ab5", - "prnap;": "\u2ab9", - "prnsim;": "\u22e8", - "prod;": "\u220f", - "profalar;": "\u232e", - "profline;": "\u2312", - "profsurf;": "\u2313", - "prop;": "\u221d", - "propto;": "\u221d", - "prsim;": "\u227e", - "prurel;": "\u22b0", - "pscr;": "\U0001d4c5", - "psi;": "\u03c8", - "puncsp;": "\u2008", - "qfr;": "\U0001d52e", - "qint;": "\u2a0c", - "qopf;": "\U0001d562", - "qprime;": "\u2057", - "qscr;": "\U0001d4c6", - "quaternions;": "\u210d", - "quatint;": "\u2a16", - "quest;": "?", - "questeq;": "\u225f", - "quot": "\"", - "quot;": "\"", - "rAarr;": "\u21db", - "rArr;": "\u21d2", - "rAtail;": "\u291c", - "rBarr;": "\u290f", - "rHar;": "\u2964", - "race;": "\u223d\u0331", - "racute;": "\u0155", - "radic;": "\u221a", - "raemptyv;": "\u29b3", - "rang;": "\u27e9", - "rangd;": "\u2992", - "range;": "\u29a5", - "rangle;": "\u27e9", - "raquo": "\xbb", - "raquo;": "\xbb", - "rarr;": "\u2192", - "rarrap;": "\u2975", - "rarrb;": "\u21e5", - "rarrbfs;": "\u2920", - "rarrc;": "\u2933", - "rarrfs;": "\u291e", - "rarrhk;": "\u21aa", - "rarrlp;": "\u21ac", - "rarrpl;": "\u2945", - "rarrsim;": "\u2974", - "rarrtl;": "\u21a3", - "rarrw;": "\u219d", - "ratail;": "\u291a", - "ratio;": "\u2236", - "rationals;": "\u211a", - "rbarr;": "\u290d", - "rbbrk;": "\u2773", - "rbrace;": "}", - "rbrack;": "]", - "rbrke;": "\u298c", - "rbrksld;": "\u298e", - "rbrkslu;": "\u2990", - "rcaron;": "\u0159", - "rcedil;": "\u0157", - "rceil;": "\u2309", - "rcub;": "}", - "rcy;": "\u0440", - "rdca;": "\u2937", - "rdldhar;": "\u2969", - "rdquo;": "\u201d", - "rdquor;": "\u201d", - "rdsh;": "\u21b3", - "real;": "\u211c", - "realine;": "\u211b", - "realpart;": "\u211c", - "reals;": "\u211d", - "rect;": "\u25ad", - "reg": "\xae", - "reg;": "\xae", - "rfisht;": "\u297d", - "rfloor;": "\u230b", - "rfr;": "\U0001d52f", - "rhard;": "\u21c1", - "rharu;": "\u21c0", - "rharul;": "\u296c", - "rho;": "\u03c1", - "rhov;": "\u03f1", - "rightarrow;": "\u2192", - "rightarrowtail;": "\u21a3", - "rightharpoondown;": "\u21c1", - "rightharpoonup;": "\u21c0", - "rightleftarrows;": "\u21c4", - "rightleftharpoons;": "\u21cc", - "rightrightarrows;": "\u21c9", - "rightsquigarrow;": "\u219d", - "rightthreetimes;": "\u22cc", - "ring;": "\u02da", - "risingdotseq;": "\u2253", - "rlarr;": "\u21c4", - "rlhar;": "\u21cc", - "rlm;": "\u200f", - "rmoust;": "\u23b1", - "rmoustache;": "\u23b1", - "rnmid;": "\u2aee", - "roang;": "\u27ed", - "roarr;": "\u21fe", - "robrk;": "\u27e7", - "ropar;": "\u2986", - "ropf;": "\U0001d563", - "roplus;": "\u2a2e", - "rotimes;": "\u2a35", - "rpar;": ")", - "rpargt;": "\u2994", - "rppolint;": "\u2a12", - "rrarr;": "\u21c9", - "rsaquo;": "\u203a", - "rscr;": "\U0001d4c7", - "rsh;": "\u21b1", - "rsqb;": "]", - "rsquo;": "\u2019", - "rsquor;": "\u2019", - "rthree;": "\u22cc", - "rtimes;": "\u22ca", - "rtri;": "\u25b9", - "rtrie;": "\u22b5", - "rtrif;": "\u25b8", - "rtriltri;": "\u29ce", - "ruluhar;": "\u2968", - "rx;": "\u211e", - "sacute;": "\u015b", - "sbquo;": "\u201a", - "sc;": "\u227b", - "scE;": "\u2ab4", - "scap;": "\u2ab8", - "scaron;": "\u0161", - "sccue;": "\u227d", - "sce;": "\u2ab0", - "scedil;": "\u015f", - "scirc;": "\u015d", - "scnE;": "\u2ab6", - "scnap;": "\u2aba", - "scnsim;": "\u22e9", - "scpolint;": "\u2a13", - "scsim;": "\u227f", - "scy;": "\u0441", - "sdot;": "\u22c5", - "sdotb;": "\u22a1", - "sdote;": "\u2a66", - "seArr;": "\u21d8", - "searhk;": "\u2925", - "searr;": "\u2198", - "searrow;": "\u2198", - "sect": "\xa7", - "sect;": "\xa7", - "semi;": ";", - "seswar;": "\u2929", - "setminus;": "\u2216", - "setmn;": "\u2216", - "sext;": "\u2736", - "sfr;": "\U0001d530", - "sfrown;": "\u2322", - "sharp;": "\u266f", - "shchcy;": "\u0449", - "shcy;": "\u0448", - "shortmid;": "\u2223", - "shortparallel;": "\u2225", - "shy": "\xad", - "shy;": "\xad", - "sigma;": "\u03c3", - "sigmaf;": "\u03c2", - "sigmav;": "\u03c2", - "sim;": "\u223c", - "simdot;": "\u2a6a", - "sime;": "\u2243", - "simeq;": "\u2243", - "simg;": "\u2a9e", - "simgE;": "\u2aa0", - "siml;": "\u2a9d", - "simlE;": "\u2a9f", - "simne;": "\u2246", - "simplus;": "\u2a24", - "simrarr;": "\u2972", - "slarr;": "\u2190", - "smallsetminus;": "\u2216", - "smashp;": "\u2a33", - "smeparsl;": "\u29e4", - "smid;": "\u2223", - "smile;": "\u2323", - "smt;": "\u2aaa", - "smte;": "\u2aac", - "smtes;": "\u2aac\ufe00", - "softcy;": "\u044c", - "sol;": "/", - "solb;": "\u29c4", - "solbar;": "\u233f", - "sopf;": "\U0001d564", - "spades;": "\u2660", - "spadesuit;": "\u2660", - "spar;": "\u2225", - "sqcap;": "\u2293", - "sqcaps;": "\u2293\ufe00", - "sqcup;": "\u2294", - "sqcups;": "\u2294\ufe00", - "sqsub;": "\u228f", - "sqsube;": "\u2291", - "sqsubset;": "\u228f", - "sqsubseteq;": "\u2291", - "sqsup;": "\u2290", - "sqsupe;": "\u2292", - "sqsupset;": "\u2290", - "sqsupseteq;": "\u2292", - "squ;": "\u25a1", - "square;": "\u25a1", - "squarf;": "\u25aa", - "squf;": "\u25aa", - "srarr;": "\u2192", - "sscr;": "\U0001d4c8", - "ssetmn;": "\u2216", - "ssmile;": "\u2323", - "sstarf;": "\u22c6", - "star;": "\u2606", - "starf;": "\u2605", - "straightepsilon;": "\u03f5", - "straightphi;": "\u03d5", - "strns;": "\xaf", - "sub;": "\u2282", - "subE;": "\u2ac5", - "subdot;": "\u2abd", - "sube;": "\u2286", - "subedot;": "\u2ac3", - "submult;": "\u2ac1", - "subnE;": "\u2acb", - "subne;": "\u228a", - "subplus;": "\u2abf", - "subrarr;": "\u2979", - "subset;": "\u2282", - "subseteq;": "\u2286", - "subseteqq;": "\u2ac5", - "subsetneq;": "\u228a", - "subsetneqq;": "\u2acb", - "subsim;": "\u2ac7", - "subsub;": "\u2ad5", - "subsup;": "\u2ad3", - "succ;": "\u227b", - "succapprox;": "\u2ab8", - "succcurlyeq;": "\u227d", - "succeq;": "\u2ab0", - "succnapprox;": "\u2aba", - "succneqq;": "\u2ab6", - "succnsim;": "\u22e9", - "succsim;": "\u227f", - "sum;": "\u2211", - "sung;": "\u266a", - "sup1": "\xb9", - "sup1;": "\xb9", - "sup2": "\xb2", - "sup2;": "\xb2", - "sup3": "\xb3", - "sup3;": "\xb3", - "sup;": "\u2283", - "supE;": "\u2ac6", - "supdot;": "\u2abe", - "supdsub;": "\u2ad8", - "supe;": "\u2287", - "supedot;": "\u2ac4", - "suphsol;": "\u27c9", - "suphsub;": "\u2ad7", - "suplarr;": "\u297b", - "supmult;": "\u2ac2", - "supnE;": "\u2acc", - "supne;": "\u228b", - "supplus;": "\u2ac0", - "supset;": "\u2283", - "supseteq;": "\u2287", - "supseteqq;": "\u2ac6", - "supsetneq;": "\u228b", - "supsetneqq;": "\u2acc", - "supsim;": "\u2ac8", - "supsub;": "\u2ad4", - "supsup;": "\u2ad6", - "swArr;": "\u21d9", - "swarhk;": "\u2926", - "swarr;": "\u2199", - "swarrow;": "\u2199", - "swnwar;": "\u292a", - "szlig": "\xdf", - "szlig;": "\xdf", - "target;": "\u2316", - "tau;": "\u03c4", - "tbrk;": "\u23b4", - "tcaron;": "\u0165", - "tcedil;": "\u0163", - "tcy;": "\u0442", - "tdot;": "\u20db", - "telrec;": "\u2315", - "tfr;": "\U0001d531", - "there4;": "\u2234", - "therefore;": "\u2234", - "theta;": "\u03b8", - "thetasym;": "\u03d1", - "thetav;": "\u03d1", - "thickapprox;": "\u2248", - "thicksim;": "\u223c", - "thinsp;": "\u2009", - "thkap;": "\u2248", - "thksim;": "\u223c", - "thorn": "\xfe", - "thorn;": "\xfe", - "tilde;": "\u02dc", - "times": "\xd7", - "times;": "\xd7", - "timesb;": "\u22a0", - "timesbar;": "\u2a31", - "timesd;": "\u2a30", - "tint;": "\u222d", - "toea;": "\u2928", - "top;": "\u22a4", - "topbot;": "\u2336", - "topcir;": "\u2af1", - "topf;": "\U0001d565", - "topfork;": "\u2ada", - "tosa;": "\u2929", - "tprime;": "\u2034", - "trade;": "\u2122", - "triangle;": "\u25b5", - "triangledown;": "\u25bf", - "triangleleft;": "\u25c3", - "trianglelefteq;": "\u22b4", - "triangleq;": "\u225c", - "triangleright;": "\u25b9", - "trianglerighteq;": "\u22b5", - "tridot;": "\u25ec", - "trie;": "\u225c", - "triminus;": "\u2a3a", - "triplus;": "\u2a39", - "trisb;": "\u29cd", - "tritime;": "\u2a3b", - "trpezium;": "\u23e2", - "tscr;": "\U0001d4c9", - "tscy;": "\u0446", - "tshcy;": "\u045b", - "tstrok;": "\u0167", - "twixt;": "\u226c", - "twoheadleftarrow;": "\u219e", - "twoheadrightarrow;": "\u21a0", - "uArr;": "\u21d1", - "uHar;": "\u2963", - "uacute": "\xfa", - "uacute;": "\xfa", - "uarr;": "\u2191", - "ubrcy;": "\u045e", - "ubreve;": "\u016d", - "ucirc": "\xfb", - "ucirc;": "\xfb", - "ucy;": "\u0443", - "udarr;": "\u21c5", - "udblac;": "\u0171", - "udhar;": "\u296e", - "ufisht;": "\u297e", - "ufr;": "\U0001d532", - "ugrave": "\xf9", - "ugrave;": "\xf9", - "uharl;": "\u21bf", - "uharr;": "\u21be", - "uhblk;": "\u2580", - "ulcorn;": "\u231c", - "ulcorner;": "\u231c", - "ulcrop;": "\u230f", - "ultri;": "\u25f8", - "umacr;": "\u016b", - "uml": "\xa8", - "uml;": "\xa8", - "uogon;": "\u0173", - "uopf;": "\U0001d566", - "uparrow;": "\u2191", - "updownarrow;": "\u2195", - "upharpoonleft;": "\u21bf", - "upharpoonright;": "\u21be", - "uplus;": "\u228e", - "upsi;": "\u03c5", - "upsih;": "\u03d2", - "upsilon;": "\u03c5", - "upuparrows;": "\u21c8", - "urcorn;": "\u231d", - "urcorner;": "\u231d", - "urcrop;": "\u230e", - "uring;": "\u016f", - "urtri;": "\u25f9", - "uscr;": "\U0001d4ca", - "utdot;": "\u22f0", - "utilde;": "\u0169", - "utri;": "\u25b5", - "utrif;": "\u25b4", - "uuarr;": "\u21c8", - "uuml": "\xfc", - "uuml;": "\xfc", - "uwangle;": "\u29a7", - "vArr;": "\u21d5", - "vBar;": "\u2ae8", - "vBarv;": "\u2ae9", - "vDash;": "\u22a8", - "vangrt;": "\u299c", - "varepsilon;": "\u03f5", - "varkappa;": "\u03f0", - "varnothing;": "\u2205", - "varphi;": "\u03d5", - "varpi;": "\u03d6", - "varpropto;": "\u221d", - "varr;": "\u2195", - "varrho;": "\u03f1", - "varsigma;": "\u03c2", - "varsubsetneq;": "\u228a\ufe00", - "varsubsetneqq;": "\u2acb\ufe00", - "varsupsetneq;": "\u228b\ufe00", - "varsupsetneqq;": "\u2acc\ufe00", - "vartheta;": "\u03d1", - "vartriangleleft;": "\u22b2", - "vartriangleright;": "\u22b3", - "vcy;": "\u0432", - "vdash;": "\u22a2", - "vee;": "\u2228", - "veebar;": "\u22bb", - "veeeq;": "\u225a", - "vellip;": "\u22ee", - "verbar;": "|", - "vert;": "|", - "vfr;": "\U0001d533", - "vltri;": "\u22b2", - "vnsub;": "\u2282\u20d2", - "vnsup;": "\u2283\u20d2", - "vopf;": "\U0001d567", - "vprop;": "\u221d", - "vrtri;": "\u22b3", - "vscr;": "\U0001d4cb", - "vsubnE;": "\u2acb\ufe00", - "vsubne;": "\u228a\ufe00", - "vsupnE;": "\u2acc\ufe00", - "vsupne;": "\u228b\ufe00", - "vzigzag;": "\u299a", - "wcirc;": "\u0175", - "wedbar;": "\u2a5f", - "wedge;": "\u2227", - "wedgeq;": "\u2259", - "weierp;": "\u2118", - "wfr;": "\U0001d534", - "wopf;": "\U0001d568", - "wp;": "\u2118", - "wr;": "\u2240", - "wreath;": "\u2240", - "wscr;": "\U0001d4cc", - "xcap;": "\u22c2", - "xcirc;": "\u25ef", - "xcup;": "\u22c3", - "xdtri;": "\u25bd", - "xfr;": "\U0001d535", - "xhArr;": "\u27fa", - "xharr;": "\u27f7", - "xi;": "\u03be", - "xlArr;": "\u27f8", - "xlarr;": "\u27f5", - "xmap;": "\u27fc", - "xnis;": "\u22fb", - "xodot;": "\u2a00", - "xopf;": "\U0001d569", - "xoplus;": "\u2a01", - "xotime;": "\u2a02", - "xrArr;": "\u27f9", - "xrarr;": "\u27f6", - "xscr;": "\U0001d4cd", - "xsqcup;": "\u2a06", - "xuplus;": "\u2a04", - "xutri;": "\u25b3", - "xvee;": "\u22c1", - "xwedge;": "\u22c0", - "yacute": "\xfd", - "yacute;": "\xfd", - "yacy;": "\u044f", - "ycirc;": "\u0177", - "ycy;": "\u044b", - "yen": "\xa5", - "yen;": "\xa5", - "yfr;": "\U0001d536", - "yicy;": "\u0457", - "yopf;": "\U0001d56a", - "yscr;": "\U0001d4ce", - "yucy;": "\u044e", - "yuml": "\xff", - "yuml;": "\xff", - "zacute;": "\u017a", - "zcaron;": "\u017e", - "zcy;": "\u0437", - "zdot;": "\u017c", - "zeetrf;": "\u2128", - "zeta;": "\u03b6", - "zfr;": "\U0001d537", - "zhcy;": "\u0436", - "zigrarr;": "\u21dd", - "zopf;": "\U0001d56b", - "zscr;": "\U0001d4cf", - "zwj;": "\u200d", - "zwnj;": "\u200c", -} - -replacementCharacters = { - 0x0: "\uFFFD", - 0x0d: "\u000D", - 0x80: "\u20AC", - 0x81: "\u0081", - 0x82: "\u201A", - 0x83: "\u0192", - 0x84: "\u201E", - 0x85: "\u2026", - 0x86: "\u2020", - 0x87: "\u2021", - 0x88: "\u02C6", - 0x89: "\u2030", - 0x8A: "\u0160", - 0x8B: "\u2039", - 0x8C: "\u0152", - 0x8D: "\u008D", - 0x8E: "\u017D", - 0x8F: "\u008F", - 0x90: "\u0090", - 0x91: "\u2018", - 0x92: "\u2019", - 0x93: "\u201C", - 0x94: "\u201D", - 0x95: "\u2022", - 0x96: "\u2013", - 0x97: "\u2014", - 0x98: "\u02DC", - 0x99: "\u2122", - 0x9A: "\u0161", - 0x9B: "\u203A", - 0x9C: "\u0153", - 0x9D: "\u009D", - 0x9E: "\u017E", - 0x9F: "\u0178", -} - -tokenTypes = { - "Doctype": 0, - "Characters": 1, - "SpaceCharacters": 2, - "StartTag": 3, - "EndTag": 4, - "EmptyTag": 5, - "Comment": 6, - "ParseError": 7 -} - -tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"], - tokenTypes["EmptyTag"]]) - - -prefixes = dict([(v, k) for k, v in namespaces.items()]) -prefixes["http://www.w3.org/1998/Math/MathML"] = "math" - - -class DataLossWarning(UserWarning): - """Raised when the current tree is unable to represent the input data""" - pass - - -class _ReparseException(Exception): - pass diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/alphabeticalattributes.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/alphabeticalattributes.py deleted file mode 100644 index d9e234a..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/alphabeticalattributes.py +++ /dev/null @@ -1,29 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import base - -from collections import OrderedDict - - -def _attr_key(attr): - """Return an appropriate key for an attribute for sorting - - Attributes have a namespace that can be either ``None`` or a string. We - can't compare the two because they're different types, so we convert - ``None`` to an empty string first. - - """ - return (attr[0][0] or ''), attr[0][1] - - -class Filter(base.Filter): - """Alphabetizes attributes for elements""" - def __iter__(self): - for token in base.Filter.__iter__(self): - if token["type"] in ("StartTag", "EmptyTag"): - attrs = OrderedDict() - for name, value in sorted(token["data"].items(), - key=_attr_key): - attrs[name] = value - token["data"] = attrs - yield token diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/base.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/base.py deleted file mode 100644 index f5aa523..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/base.py +++ /dev/null @@ -1,12 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - - -class Filter(object): - def __init__(self, source): - self.source = source - - def __iter__(self): - return iter(self.source) - - def __getattr__(self, name): - return getattr(self.source, name) diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/inject_meta_charset.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/inject_meta_charset.py deleted file mode 100644 index 2f8ec4f..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/inject_meta_charset.py +++ /dev/null @@ -1,73 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import base - - -class Filter(base.Filter): - """Injects ```` tag into head of document""" - def __init__(self, source, encoding): - """Creates a Filter - - :arg source: the source token stream - - :arg encoding: the encoding to set - - """ - base.Filter.__init__(self, source) - self.encoding = encoding - - def __iter__(self): - state = "pre_head" - meta_found = (self.encoding is None) - pending = [] - - for token in base.Filter.__iter__(self): - type = token["type"] - if type == "StartTag": - if token["name"].lower() == "head": - state = "in_head" - - elif type == "EmptyTag": - if token["name"].lower() == "meta": - # replace charset with actual encoding - has_http_equiv_content_type = False - for (namespace, name), value in token["data"].items(): - if namespace is not None: - continue - elif name.lower() == 'charset': - token["data"][(namespace, name)] = self.encoding - meta_found = True - break - elif name == 'http-equiv' and value.lower() == 'content-type': - has_http_equiv_content_type = True - else: - if has_http_equiv_content_type and (None, "content") in token["data"]: - token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding - meta_found = True - - elif token["name"].lower() == "head" and not meta_found: - # insert meta into empty head - yield {"type": "StartTag", "name": "head", - "data": token["data"]} - yield {"type": "EmptyTag", "name": "meta", - "data": {(None, "charset"): self.encoding}} - yield {"type": "EndTag", "name": "head"} - meta_found = True - continue - - elif type == "EndTag": - if token["name"].lower() == "head" and pending: - # insert meta into head (if necessary) and flush pending queue - yield pending.pop(0) - if not meta_found: - yield {"type": "EmptyTag", "name": "meta", - "data": {(None, "charset"): self.encoding}} - while pending: - yield pending.pop(0) - meta_found = True - state = "post_head" - - if state == "in_head": - pending.append(token) - else: - yield token diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/lint.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/lint.py deleted file mode 100644 index b5bbd97..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/lint.py +++ /dev/null @@ -1,93 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from pip._vendor.six import text_type - -from . import base -from ..constants import namespaces, voidElements - -from ..constants import spaceCharacters -spaceCharacters = "".join(spaceCharacters) - - -class Filter(base.Filter): - """Lints the token stream for errors - - If it finds any errors, it'll raise an ``AssertionError``. - - """ - def __init__(self, source, require_matching_tags=True): - """Creates a Filter - - :arg source: the source token stream - - :arg require_matching_tags: whether or not to require matching tags - - """ - super(Filter, self).__init__(source) - self.require_matching_tags = require_matching_tags - - def __iter__(self): - open_elements = [] - for token in base.Filter.__iter__(self): - type = token["type"] - if type in ("StartTag", "EmptyTag"): - namespace = token["namespace"] - name = token["name"] - assert namespace is None or isinstance(namespace, text_type) - assert namespace != "" - assert isinstance(name, text_type) - assert name != "" - assert isinstance(token["data"], dict) - if (not namespace or namespace == namespaces["html"]) and name in voidElements: - assert type == "EmptyTag" - else: - assert type == "StartTag" - if type == "StartTag" and self.require_matching_tags: - open_elements.append((namespace, name)) - for (namespace, name), value in token["data"].items(): - assert namespace is None or isinstance(namespace, text_type) - assert namespace != "" - assert isinstance(name, text_type) - assert name != "" - assert isinstance(value, text_type) - - elif type == "EndTag": - namespace = token["namespace"] - name = token["name"] - assert namespace is None or isinstance(namespace, text_type) - assert namespace != "" - assert isinstance(name, text_type) - assert name != "" - if (not namespace or namespace == namespaces["html"]) and name in voidElements: - assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name} - elif self.require_matching_tags: - start = open_elements.pop() - assert start == (namespace, name) - - elif type == "Comment": - data = token["data"] - assert isinstance(data, text_type) - - elif type in ("Characters", "SpaceCharacters"): - data = token["data"] - assert isinstance(data, text_type) - assert data != "" - if type == "SpaceCharacters": - assert data.strip(spaceCharacters) == "" - - elif type == "Doctype": - name = token["name"] - assert name is None or isinstance(name, text_type) - assert token["publicId"] is None or isinstance(name, text_type) - assert token["systemId"] is None or isinstance(name, text_type) - - elif type == "Entity": - assert isinstance(token["name"], text_type) - - elif type == "SerializerError": - assert isinstance(token["data"], text_type) - - else: - assert False, "Unknown token type: %(type)s" % {"type": type} - - yield token diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py deleted file mode 100644 index c8d5e54..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py +++ /dev/null @@ -1,207 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import base - - -class Filter(base.Filter): - """Removes optional tags from the token stream""" - def slider(self): - previous1 = previous2 = None - for token in self.source: - if previous1 is not None: - yield previous2, previous1, token - previous2 = previous1 - previous1 = token - if previous1 is not None: - yield previous2, previous1, None - - def __iter__(self): - for previous, token, next in self.slider(): - type = token["type"] - if type == "StartTag": - if (token["data"] or - not self.is_optional_start(token["name"], previous, next)): - yield token - elif type == "EndTag": - if not self.is_optional_end(token["name"], next): - yield token - else: - yield token - - def is_optional_start(self, tagname, previous, next): - type = next and next["type"] or None - if tagname in 'html': - # An html element's start tag may be omitted if the first thing - # inside the html element is not a space character or a comment. - return type not in ("Comment", "SpaceCharacters") - elif tagname == 'head': - # A head element's start tag may be omitted if the first thing - # inside the head element is an element. - # XXX: we also omit the start tag if the head element is empty - if type in ("StartTag", "EmptyTag"): - return True - elif type == "EndTag": - return next["name"] == "head" - elif tagname == 'body': - # A body element's start tag may be omitted if the first thing - # inside the body element is not a space character or a comment, - # except if the first thing inside the body element is a script - # or style element and the node immediately preceding the body - # element is a head element whose end tag has been omitted. - if type in ("Comment", "SpaceCharacters"): - return False - elif type == "StartTag": - # XXX: we do not look at the preceding event, so we never omit - # the body element's start tag if it's followed by a script or - # a style element. - return next["name"] not in ('script', 'style') - else: - return True - elif tagname == 'colgroup': - # A colgroup element's start tag may be omitted if the first thing - # inside the colgroup element is a col element, and if the element - # is not immediately preceded by another colgroup element whose - # end tag has been omitted. - if type in ("StartTag", "EmptyTag"): - # XXX: we do not look at the preceding event, so instead we never - # omit the colgroup element's end tag when it is immediately - # followed by another colgroup element. See is_optional_end. - return next["name"] == "col" - else: - return False - elif tagname == 'tbody': - # A tbody element's start tag may be omitted if the first thing - # inside the tbody element is a tr element, and if the element is - # not immediately preceded by a tbody, thead, or tfoot element - # whose end tag has been omitted. - if type == "StartTag": - # omit the thead and tfoot elements' end tag when they are - # immediately followed by a tbody element. See is_optional_end. - if previous and previous['type'] == 'EndTag' and \ - previous['name'] in ('tbody', 'thead', 'tfoot'): - return False - return next["name"] == 'tr' - else: - return False - return False - - def is_optional_end(self, tagname, next): - type = next and next["type"] or None - if tagname in ('html', 'head', 'body'): - # An html element's end tag may be omitted if the html element - # is not immediately followed by a space character or a comment. - return type not in ("Comment", "SpaceCharacters") - elif tagname in ('li', 'optgroup', 'tr'): - # A li element's end tag may be omitted if the li element is - # immediately followed by another li element or if there is - # no more content in the parent element. - # An optgroup element's end tag may be omitted if the optgroup - # element is immediately followed by another optgroup element, - # or if there is no more content in the parent element. - # A tr element's end tag may be omitted if the tr element is - # immediately followed by another tr element, or if there is - # no more content in the parent element. - if type == "StartTag": - return next["name"] == tagname - else: - return type == "EndTag" or type is None - elif tagname in ('dt', 'dd'): - # A dt element's end tag may be omitted if the dt element is - # immediately followed by another dt element or a dd element. - # A dd element's end tag may be omitted if the dd element is - # immediately followed by another dd element or a dt element, - # or if there is no more content in the parent element. - if type == "StartTag": - return next["name"] in ('dt', 'dd') - elif tagname == 'dd': - return type == "EndTag" or type is None - else: - return False - elif tagname == 'p': - # A p element's end tag may be omitted if the p element is - # immediately followed by an address, article, aside, - # blockquote, datagrid, dialog, dir, div, dl, fieldset, - # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu, - # nav, ol, p, pre, section, table, or ul, element, or if - # there is no more content in the parent element. - if type in ("StartTag", "EmptyTag"): - return next["name"] in ('address', 'article', 'aside', - 'blockquote', 'datagrid', 'dialog', - 'dir', 'div', 'dl', 'fieldset', 'footer', - 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', - 'header', 'hr', 'menu', 'nav', 'ol', - 'p', 'pre', 'section', 'table', 'ul') - else: - return type == "EndTag" or type is None - elif tagname == 'option': - # An option element's end tag may be omitted if the option - # element is immediately followed by another option element, - # or if it is immediately followed by an optgroup - # element, or if there is no more content in the parent - # element. - if type == "StartTag": - return next["name"] in ('option', 'optgroup') - else: - return type == "EndTag" or type is None - elif tagname in ('rt', 'rp'): - # An rt element's end tag may be omitted if the rt element is - # immediately followed by an rt or rp element, or if there is - # no more content in the parent element. - # An rp element's end tag may be omitted if the rp element is - # immediately followed by an rt or rp element, or if there is - # no more content in the parent element. - if type == "StartTag": - return next["name"] in ('rt', 'rp') - else: - return type == "EndTag" or type is None - elif tagname == 'colgroup': - # A colgroup element's end tag may be omitted if the colgroup - # element is not immediately followed by a space character or - # a comment. - if type in ("Comment", "SpaceCharacters"): - return False - elif type == "StartTag": - # XXX: we also look for an immediately following colgroup - # element. See is_optional_start. - return next["name"] != 'colgroup' - else: - return True - elif tagname in ('thead', 'tbody'): - # A thead element's end tag may be omitted if the thead element - # is immediately followed by a tbody or tfoot element. - # A tbody element's end tag may be omitted if the tbody element - # is immediately followed by a tbody or tfoot element, or if - # there is no more content in the parent element. - # A tfoot element's end tag may be omitted if the tfoot element - # is immediately followed by a tbody element, or if there is no - # more content in the parent element. - # XXX: we never omit the end tag when the following element is - # a tbody. See is_optional_start. - if type == "StartTag": - return next["name"] in ['tbody', 'tfoot'] - elif tagname == 'tbody': - return type == "EndTag" or type is None - else: - return False - elif tagname == 'tfoot': - # A tfoot element's end tag may be omitted if the tfoot element - # is immediately followed by a tbody element, or if there is no - # more content in the parent element. - # XXX: we never omit the end tag when the following element is - # a tbody. See is_optional_start. - if type == "StartTag": - return next["name"] == 'tbody' - else: - return type == "EndTag" or type is None - elif tagname in ('td', 'th'): - # A td element's end tag may be omitted if the td element is - # immediately followed by a td or th element, or if there is - # no more content in the parent element. - # A th element's end tag may be omitted if the th element is - # immediately followed by a td or th element, or if there is - # no more content in the parent element. - if type == "StartTag": - return next["name"] in ('td', 'th') - else: - return type == "EndTag" or type is None - return False diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py deleted file mode 100644 index c3199a5..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py +++ /dev/null @@ -1,896 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import re -from xml.sax.saxutils import escape, unescape - -from pip._vendor.six.moves import urllib_parse as urlparse - -from . import base -from ..constants import namespaces, prefixes - -__all__ = ["Filter"] - - -allowed_elements = frozenset(( - (namespaces['html'], 'a'), - (namespaces['html'], 'abbr'), - (namespaces['html'], 'acronym'), - (namespaces['html'], 'address'), - (namespaces['html'], 'area'), - (namespaces['html'], 'article'), - (namespaces['html'], 'aside'), - (namespaces['html'], 'audio'), - (namespaces['html'], 'b'), - (namespaces['html'], 'big'), - (namespaces['html'], 'blockquote'), - (namespaces['html'], 'br'), - (namespaces['html'], 'button'), - (namespaces['html'], 'canvas'), - (namespaces['html'], 'caption'), - (namespaces['html'], 'center'), - (namespaces['html'], 'cite'), - (namespaces['html'], 'code'), - (namespaces['html'], 'col'), - (namespaces['html'], 'colgroup'), - (namespaces['html'], 'command'), - (namespaces['html'], 'datagrid'), - (namespaces['html'], 'datalist'), - (namespaces['html'], 'dd'), - (namespaces['html'], 'del'), - (namespaces['html'], 'details'), - (namespaces['html'], 'dfn'), - (namespaces['html'], 'dialog'), - (namespaces['html'], 'dir'), - (namespaces['html'], 'div'), - (namespaces['html'], 'dl'), - (namespaces['html'], 'dt'), - (namespaces['html'], 'em'), - (namespaces['html'], 'event-source'), - (namespaces['html'], 'fieldset'), - (namespaces['html'], 'figcaption'), - (namespaces['html'], 'figure'), - (namespaces['html'], 'footer'), - (namespaces['html'], 'font'), - (namespaces['html'], 'form'), - (namespaces['html'], 'header'), - (namespaces['html'], 'h1'), - (namespaces['html'], 'h2'), - (namespaces['html'], 'h3'), - (namespaces['html'], 'h4'), - (namespaces['html'], 'h5'), - (namespaces['html'], 'h6'), - (namespaces['html'], 'hr'), - (namespaces['html'], 'i'), - (namespaces['html'], 'img'), - (namespaces['html'], 'input'), - (namespaces['html'], 'ins'), - (namespaces['html'], 'keygen'), - (namespaces['html'], 'kbd'), - (namespaces['html'], 'label'), - (namespaces['html'], 'legend'), - (namespaces['html'], 'li'), - (namespaces['html'], 'm'), - (namespaces['html'], 'map'), - (namespaces['html'], 'menu'), - (namespaces['html'], 'meter'), - (namespaces['html'], 'multicol'), - (namespaces['html'], 'nav'), - (namespaces['html'], 'nextid'), - (namespaces['html'], 'ol'), - (namespaces['html'], 'output'), - (namespaces['html'], 'optgroup'), - (namespaces['html'], 'option'), - (namespaces['html'], 'p'), - (namespaces['html'], 'pre'), - (namespaces['html'], 'progress'), - (namespaces['html'], 'q'), - (namespaces['html'], 's'), - (namespaces['html'], 'samp'), - (namespaces['html'], 'section'), - (namespaces['html'], 'select'), - (namespaces['html'], 'small'), - (namespaces['html'], 'sound'), - (namespaces['html'], 'source'), - (namespaces['html'], 'spacer'), - (namespaces['html'], 'span'), - (namespaces['html'], 'strike'), - (namespaces['html'], 'strong'), - (namespaces['html'], 'sub'), - (namespaces['html'], 'sup'), - (namespaces['html'], 'table'), - (namespaces['html'], 'tbody'), - (namespaces['html'], 'td'), - (namespaces['html'], 'textarea'), - (namespaces['html'], 'time'), - (namespaces['html'], 'tfoot'), - (namespaces['html'], 'th'), - (namespaces['html'], 'thead'), - (namespaces['html'], 'tr'), - (namespaces['html'], 'tt'), - (namespaces['html'], 'u'), - (namespaces['html'], 'ul'), - (namespaces['html'], 'var'), - (namespaces['html'], 'video'), - (namespaces['mathml'], 'maction'), - (namespaces['mathml'], 'math'), - (namespaces['mathml'], 'merror'), - (namespaces['mathml'], 'mfrac'), - (namespaces['mathml'], 'mi'), - (namespaces['mathml'], 'mmultiscripts'), - (namespaces['mathml'], 'mn'), - (namespaces['mathml'], 'mo'), - (namespaces['mathml'], 'mover'), - (namespaces['mathml'], 'mpadded'), - (namespaces['mathml'], 'mphantom'), - (namespaces['mathml'], 'mprescripts'), - (namespaces['mathml'], 'mroot'), - (namespaces['mathml'], 'mrow'), - (namespaces['mathml'], 'mspace'), - (namespaces['mathml'], 'msqrt'), - (namespaces['mathml'], 'mstyle'), - (namespaces['mathml'], 'msub'), - (namespaces['mathml'], 'msubsup'), - (namespaces['mathml'], 'msup'), - (namespaces['mathml'], 'mtable'), - (namespaces['mathml'], 'mtd'), - (namespaces['mathml'], 'mtext'), - (namespaces['mathml'], 'mtr'), - (namespaces['mathml'], 'munder'), - (namespaces['mathml'], 'munderover'), - (namespaces['mathml'], 'none'), - (namespaces['svg'], 'a'), - (namespaces['svg'], 'animate'), - (namespaces['svg'], 'animateColor'), - (namespaces['svg'], 'animateMotion'), - (namespaces['svg'], 'animateTransform'), - (namespaces['svg'], 'clipPath'), - (namespaces['svg'], 'circle'), - (namespaces['svg'], 'defs'), - (namespaces['svg'], 'desc'), - (namespaces['svg'], 'ellipse'), - (namespaces['svg'], 'font-face'), - (namespaces['svg'], 'font-face-name'), - (namespaces['svg'], 'font-face-src'), - (namespaces['svg'], 'g'), - (namespaces['svg'], 'glyph'), - (namespaces['svg'], 'hkern'), - (namespaces['svg'], 'linearGradient'), - (namespaces['svg'], 'line'), - (namespaces['svg'], 'marker'), - (namespaces['svg'], 'metadata'), - (namespaces['svg'], 'missing-glyph'), - (namespaces['svg'], 'mpath'), - (namespaces['svg'], 'path'), - (namespaces['svg'], 'polygon'), - (namespaces['svg'], 'polyline'), - (namespaces['svg'], 'radialGradient'), - (namespaces['svg'], 'rect'), - (namespaces['svg'], 'set'), - (namespaces['svg'], 'stop'), - (namespaces['svg'], 'svg'), - (namespaces['svg'], 'switch'), - (namespaces['svg'], 'text'), - (namespaces['svg'], 'title'), - (namespaces['svg'], 'tspan'), - (namespaces['svg'], 'use'), -)) - -allowed_attributes = frozenset(( - # HTML attributes - (None, 'abbr'), - (None, 'accept'), - (None, 'accept-charset'), - (None, 'accesskey'), - (None, 'action'), - (None, 'align'), - (None, 'alt'), - (None, 'autocomplete'), - (None, 'autofocus'), - (None, 'axis'), - (None, 'background'), - (None, 'balance'), - (None, 'bgcolor'), - (None, 'bgproperties'), - (None, 'border'), - (None, 'bordercolor'), - (None, 'bordercolordark'), - (None, 'bordercolorlight'), - (None, 'bottompadding'), - (None, 'cellpadding'), - (None, 'cellspacing'), - (None, 'ch'), - (None, 'challenge'), - (None, 'char'), - (None, 'charoff'), - (None, 'choff'), - (None, 'charset'), - (None, 'checked'), - (None, 'cite'), - (None, 'class'), - (None, 'clear'), - (None, 'color'), - (None, 'cols'), - (None, 'colspan'), - (None, 'compact'), - (None, 'contenteditable'), - (None, 'controls'), - (None, 'coords'), - (None, 'data'), - (None, 'datafld'), - (None, 'datapagesize'), - (None, 'datasrc'), - (None, 'datetime'), - (None, 'default'), - (None, 'delay'), - (None, 'dir'), - (None, 'disabled'), - (None, 'draggable'), - (None, 'dynsrc'), - (None, 'enctype'), - (None, 'end'), - (None, 'face'), - (None, 'for'), - (None, 'form'), - (None, 'frame'), - (None, 'galleryimg'), - (None, 'gutter'), - (None, 'headers'), - (None, 'height'), - (None, 'hidefocus'), - (None, 'hidden'), - (None, 'high'), - (None, 'href'), - (None, 'hreflang'), - (None, 'hspace'), - (None, 'icon'), - (None, 'id'), - (None, 'inputmode'), - (None, 'ismap'), - (None, 'keytype'), - (None, 'label'), - (None, 'leftspacing'), - (None, 'lang'), - (None, 'list'), - (None, 'longdesc'), - (None, 'loop'), - (None, 'loopcount'), - (None, 'loopend'), - (None, 'loopstart'), - (None, 'low'), - (None, 'lowsrc'), - (None, 'max'), - (None, 'maxlength'), - (None, 'media'), - (None, 'method'), - (None, 'min'), - (None, 'multiple'), - (None, 'name'), - (None, 'nohref'), - (None, 'noshade'), - (None, 'nowrap'), - (None, 'open'), - (None, 'optimum'), - (None, 'pattern'), - (None, 'ping'), - (None, 'point-size'), - (None, 'poster'), - (None, 'pqg'), - (None, 'preload'), - (None, 'prompt'), - (None, 'radiogroup'), - (None, 'readonly'), - (None, 'rel'), - (None, 'repeat-max'), - (None, 'repeat-min'), - (None, 'replace'), - (None, 'required'), - (None, 'rev'), - (None, 'rightspacing'), - (None, 'rows'), - (None, 'rowspan'), - (None, 'rules'), - (None, 'scope'), - (None, 'selected'), - (None, 'shape'), - (None, 'size'), - (None, 'span'), - (None, 'src'), - (None, 'start'), - (None, 'step'), - (None, 'style'), - (None, 'summary'), - (None, 'suppress'), - (None, 'tabindex'), - (None, 'target'), - (None, 'template'), - (None, 'title'), - (None, 'toppadding'), - (None, 'type'), - (None, 'unselectable'), - (None, 'usemap'), - (None, 'urn'), - (None, 'valign'), - (None, 'value'), - (None, 'variable'), - (None, 'volume'), - (None, 'vspace'), - (None, 'vrml'), - (None, 'width'), - (None, 'wrap'), - (namespaces['xml'], 'lang'), - # MathML attributes - (None, 'actiontype'), - (None, 'align'), - (None, 'columnalign'), - (None, 'columnalign'), - (None, 'columnalign'), - (None, 'columnlines'), - (None, 'columnspacing'), - (None, 'columnspan'), - (None, 'depth'), - (None, 'display'), - (None, 'displaystyle'), - (None, 'equalcolumns'), - (None, 'equalrows'), - (None, 'fence'), - (None, 'fontstyle'), - (None, 'fontweight'), - (None, 'frame'), - (None, 'height'), - (None, 'linethickness'), - (None, 'lspace'), - (None, 'mathbackground'), - (None, 'mathcolor'), - (None, 'mathvariant'), - (None, 'mathvariant'), - (None, 'maxsize'), - (None, 'minsize'), - (None, 'other'), - (None, 'rowalign'), - (None, 'rowalign'), - (None, 'rowalign'), - (None, 'rowlines'), - (None, 'rowspacing'), - (None, 'rowspan'), - (None, 'rspace'), - (None, 'scriptlevel'), - (None, 'selection'), - (None, 'separator'), - (None, 'stretchy'), - (None, 'width'), - (None, 'width'), - (namespaces['xlink'], 'href'), - (namespaces['xlink'], 'show'), - (namespaces['xlink'], 'type'), - # SVG attributes - (None, 'accent-height'), - (None, 'accumulate'), - (None, 'additive'), - (None, 'alphabetic'), - (None, 'arabic-form'), - (None, 'ascent'), - (None, 'attributeName'), - (None, 'attributeType'), - (None, 'baseProfile'), - (None, 'bbox'), - (None, 'begin'), - (None, 'by'), - (None, 'calcMode'), - (None, 'cap-height'), - (None, 'class'), - (None, 'clip-path'), - (None, 'color'), - (None, 'color-rendering'), - (None, 'content'), - (None, 'cx'), - (None, 'cy'), - (None, 'd'), - (None, 'dx'), - (None, 'dy'), - (None, 'descent'), - (None, 'display'), - (None, 'dur'), - (None, 'end'), - (None, 'fill'), - (None, 'fill-opacity'), - (None, 'fill-rule'), - (None, 'font-family'), - (None, 'font-size'), - (None, 'font-stretch'), - (None, 'font-style'), - (None, 'font-variant'), - (None, 'font-weight'), - (None, 'from'), - (None, 'fx'), - (None, 'fy'), - (None, 'g1'), - (None, 'g2'), - (None, 'glyph-name'), - (None, 'gradientUnits'), - (None, 'hanging'), - (None, 'height'), - (None, 'horiz-adv-x'), - (None, 'horiz-origin-x'), - (None, 'id'), - (None, 'ideographic'), - (None, 'k'), - (None, 'keyPoints'), - (None, 'keySplines'), - (None, 'keyTimes'), - (None, 'lang'), - (None, 'marker-end'), - (None, 'marker-mid'), - (None, 'marker-start'), - (None, 'markerHeight'), - (None, 'markerUnits'), - (None, 'markerWidth'), - (None, 'mathematical'), - (None, 'max'), - (None, 'min'), - (None, 'name'), - (None, 'offset'), - (None, 'opacity'), - (None, 'orient'), - (None, 'origin'), - (None, 'overline-position'), - (None, 'overline-thickness'), - (None, 'panose-1'), - (None, 'path'), - (None, 'pathLength'), - (None, 'points'), - (None, 'preserveAspectRatio'), - (None, 'r'), - (None, 'refX'), - (None, 'refY'), - (None, 'repeatCount'), - (None, 'repeatDur'), - (None, 'requiredExtensions'), - (None, 'requiredFeatures'), - (None, 'restart'), - (None, 'rotate'), - (None, 'rx'), - (None, 'ry'), - (None, 'slope'), - (None, 'stemh'), - (None, 'stemv'), - (None, 'stop-color'), - (None, 'stop-opacity'), - (None, 'strikethrough-position'), - (None, 'strikethrough-thickness'), - (None, 'stroke'), - (None, 'stroke-dasharray'), - (None, 'stroke-dashoffset'), - (None, 'stroke-linecap'), - (None, 'stroke-linejoin'), - (None, 'stroke-miterlimit'), - (None, 'stroke-opacity'), - (None, 'stroke-width'), - (None, 'systemLanguage'), - (None, 'target'), - (None, 'text-anchor'), - (None, 'to'), - (None, 'transform'), - (None, 'type'), - (None, 'u1'), - (None, 'u2'), - (None, 'underline-position'), - (None, 'underline-thickness'), - (None, 'unicode'), - (None, 'unicode-range'), - (None, 'units-per-em'), - (None, 'values'), - (None, 'version'), - (None, 'viewBox'), - (None, 'visibility'), - (None, 'width'), - (None, 'widths'), - (None, 'x'), - (None, 'x-height'), - (None, 'x1'), - (None, 'x2'), - (namespaces['xlink'], 'actuate'), - (namespaces['xlink'], 'arcrole'), - (namespaces['xlink'], 'href'), - (namespaces['xlink'], 'role'), - (namespaces['xlink'], 'show'), - (namespaces['xlink'], 'title'), - (namespaces['xlink'], 'type'), - (namespaces['xml'], 'base'), - (namespaces['xml'], 'lang'), - (namespaces['xml'], 'space'), - (None, 'y'), - (None, 'y1'), - (None, 'y2'), - (None, 'zoomAndPan'), -)) - -attr_val_is_uri = frozenset(( - (None, 'href'), - (None, 'src'), - (None, 'cite'), - (None, 'action'), - (None, 'longdesc'), - (None, 'poster'), - (None, 'background'), - (None, 'datasrc'), - (None, 'dynsrc'), - (None, 'lowsrc'), - (None, 'ping'), - (namespaces['xlink'], 'href'), - (namespaces['xml'], 'base'), -)) - -svg_attr_val_allows_ref = frozenset(( - (None, 'clip-path'), - (None, 'color-profile'), - (None, 'cursor'), - (None, 'fill'), - (None, 'filter'), - (None, 'marker'), - (None, 'marker-start'), - (None, 'marker-mid'), - (None, 'marker-end'), - (None, 'mask'), - (None, 'stroke'), -)) - -svg_allow_local_href = frozenset(( - (None, 'altGlyph'), - (None, 'animate'), - (None, 'animateColor'), - (None, 'animateMotion'), - (None, 'animateTransform'), - (None, 'cursor'), - (None, 'feImage'), - (None, 'filter'), - (None, 'linearGradient'), - (None, 'pattern'), - (None, 'radialGradient'), - (None, 'textpath'), - (None, 'tref'), - (None, 'set'), - (None, 'use') -)) - -allowed_css_properties = frozenset(( - 'azimuth', - 'background-color', - 'border-bottom-color', - 'border-collapse', - 'border-color', - 'border-left-color', - 'border-right-color', - 'border-top-color', - 'clear', - 'color', - 'cursor', - 'direction', - 'display', - 'elevation', - 'float', - 'font', - 'font-family', - 'font-size', - 'font-style', - 'font-variant', - 'font-weight', - 'height', - 'letter-spacing', - 'line-height', - 'overflow', - 'pause', - 'pause-after', - 'pause-before', - 'pitch', - 'pitch-range', - 'richness', - 'speak', - 'speak-header', - 'speak-numeral', - 'speak-punctuation', - 'speech-rate', - 'stress', - 'text-align', - 'text-decoration', - 'text-indent', - 'unicode-bidi', - 'vertical-align', - 'voice-family', - 'volume', - 'white-space', - 'width', -)) - -allowed_css_keywords = frozenset(( - 'auto', - 'aqua', - 'black', - 'block', - 'blue', - 'bold', - 'both', - 'bottom', - 'brown', - 'center', - 'collapse', - 'dashed', - 'dotted', - 'fuchsia', - 'gray', - 'green', - '!important', - 'italic', - 'left', - 'lime', - 'maroon', - 'medium', - 'none', - 'navy', - 'normal', - 'nowrap', - 'olive', - 'pointer', - 'purple', - 'red', - 'right', - 'solid', - 'silver', - 'teal', - 'top', - 'transparent', - 'underline', - 'white', - 'yellow', -)) - -allowed_svg_properties = frozenset(( - 'fill', - 'fill-opacity', - 'fill-rule', - 'stroke', - 'stroke-width', - 'stroke-linecap', - 'stroke-linejoin', - 'stroke-opacity', -)) - -allowed_protocols = frozenset(( - 'ed2k', - 'ftp', - 'http', - 'https', - 'irc', - 'mailto', - 'news', - 'gopher', - 'nntp', - 'telnet', - 'webcal', - 'xmpp', - 'callto', - 'feed', - 'urn', - 'aim', - 'rsync', - 'tag', - 'ssh', - 'sftp', - 'rtsp', - 'afs', - 'data', -)) - -allowed_content_types = frozenset(( - 'image/png', - 'image/jpeg', - 'image/gif', - 'image/webp', - 'image/bmp', - 'text/plain', -)) - - -data_content_type = re.compile(r''' - ^ - # Match a content type / - (?P[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) - # Match any character set and encoding - (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) - |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) - # Assume the rest is data - ,.* - $ - ''', - re.VERBOSE) - - -class Filter(base.Filter): - """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes""" - def __init__(self, - source, - allowed_elements=allowed_elements, - allowed_attributes=allowed_attributes, - allowed_css_properties=allowed_css_properties, - allowed_css_keywords=allowed_css_keywords, - allowed_svg_properties=allowed_svg_properties, - allowed_protocols=allowed_protocols, - allowed_content_types=allowed_content_types, - attr_val_is_uri=attr_val_is_uri, - svg_attr_val_allows_ref=svg_attr_val_allows_ref, - svg_allow_local_href=svg_allow_local_href): - """Creates a Filter - - :arg allowed_elements: set of elements to allow--everything else will - be escaped - - :arg allowed_attributes: set of attributes to allow in - elements--everything else will be stripped - - :arg allowed_css_properties: set of CSS properties to allow--everything - else will be stripped - - :arg allowed_css_keywords: set of CSS keywords to allow--everything - else will be stripped - - :arg allowed_svg_properties: set of SVG properties to allow--everything - else will be removed - - :arg allowed_protocols: set of allowed protocols for URIs - - :arg allowed_content_types: set of allowed content types for ``data`` URIs. - - :arg attr_val_is_uri: set of attributes that have URI values--values - that have a scheme not listed in ``allowed_protocols`` are removed - - :arg svg_attr_val_allows_ref: set of SVG attributes that can have - references - - :arg svg_allow_local_href: set of SVG elements that can have local - hrefs--these are removed - - """ - super(Filter, self).__init__(source) - self.allowed_elements = allowed_elements - self.allowed_attributes = allowed_attributes - self.allowed_css_properties = allowed_css_properties - self.allowed_css_keywords = allowed_css_keywords - self.allowed_svg_properties = allowed_svg_properties - self.allowed_protocols = allowed_protocols - self.allowed_content_types = allowed_content_types - self.attr_val_is_uri = attr_val_is_uri - self.svg_attr_val_allows_ref = svg_attr_val_allows_ref - self.svg_allow_local_href = svg_allow_local_href - - def __iter__(self): - for token in base.Filter.__iter__(self): - token = self.sanitize_token(token) - if token: - yield token - - # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and - # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes - # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and - # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI - # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are - # allowed. - # - # sanitize_html('') - # => <script> do_nasty_stuff() </script> - # sanitize_html('Click here for $100') - # => Click here for $100 - def sanitize_token(self, token): - - # accommodate filters which use token_type differently - token_type = token["type"] - if token_type in ("StartTag", "EndTag", "EmptyTag"): - name = token["name"] - namespace = token["namespace"] - if ((namespace, name) in self.allowed_elements or - (namespace is None and - (namespaces["html"], name) in self.allowed_elements)): - return self.allowed_token(token) - else: - return self.disallowed_token(token) - elif token_type == "Comment": - pass - else: - return token - - def allowed_token(self, token): - if "data" in token: - attrs = token["data"] - attr_names = set(attrs.keys()) - - # Remove forbidden attributes - for to_remove in (attr_names - self.allowed_attributes): - del token["data"][to_remove] - attr_names.remove(to_remove) - - # Remove attributes with disallowed URL values - for attr in (attr_names & self.attr_val_is_uri): - assert attr in attrs - # I don't have a clue where this regexp comes from or why it matches those - # characters, nor why we call unescape. I just know it's always been here. - # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all - # this will do is remove *more* than it otherwise would. - val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '', - unescape(attrs[attr])).lower() - # remove replacement characters from unescaped characters - val_unescaped = val_unescaped.replace("\ufffd", "") - try: - uri = urlparse.urlparse(val_unescaped) - except ValueError: - uri = None - del attrs[attr] - if uri and uri.scheme: - if uri.scheme not in self.allowed_protocols: - del attrs[attr] - if uri.scheme == 'data': - m = data_content_type.match(uri.path) - if not m: - del attrs[attr] - elif m.group('content_type') not in self.allowed_content_types: - del attrs[attr] - - for attr in self.svg_attr_val_allows_ref: - if attr in attrs: - attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', - ' ', - unescape(attrs[attr])) - if (token["name"] in self.svg_allow_local_href and - (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*', - attrs[(namespaces['xlink'], 'href')])): - del attrs[(namespaces['xlink'], 'href')] - if (None, 'style') in attrs: - attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')]) - token["data"] = attrs - return token - - def disallowed_token(self, token): - token_type = token["type"] - if token_type == "EndTag": - token["data"] = "" % token["name"] - elif token["data"]: - assert token_type in ("StartTag", "EmptyTag") - attrs = [] - for (ns, name), v in token["data"].items(): - attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v))) - token["data"] = "<%s%s>" % (token["name"], ''.join(attrs)) - else: - token["data"] = "<%s>" % token["name"] - if token.get("selfClosing"): - token["data"] = token["data"][:-1] + "/>" - - token["type"] = "Characters" - - del token["name"] - return token - - def sanitize_css(self, style): - # disallow urls - style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) - - # gauntlet - if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): - return '' - if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): - return '' - - clean = [] - for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style): - if not value: - continue - if prop.lower() in self.allowed_css_properties: - clean.append(prop + ': ' + value + ';') - elif prop.split('-')[0].lower() in ['background', 'border', 'margin', - 'padding']: - for keyword in value.split(): - if keyword not in self.allowed_css_keywords and \ - not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa - break - else: - clean.append(prop + ': ' + value + ';') - elif prop.lower() in self.allowed_svg_properties: - clean.append(prop + ': ' + value + ';') - - return ' '.join(clean) diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/whitespace.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/whitespace.py deleted file mode 100644 index 24bb0de..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/whitespace.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import re - -from . import base -from ..constants import rcdataElements, spaceCharacters -spaceCharacters = "".join(spaceCharacters) - -SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) - - -class Filter(base.Filter): - """Collapses whitespace except in pre, textarea, and script elements""" - spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) - - def __iter__(self): - preserve = 0 - for token in base.Filter.__iter__(self): - type = token["type"] - if type == "StartTag" \ - and (preserve or token["name"] in self.spacePreserveElements): - preserve += 1 - - elif type == "EndTag" and preserve: - preserve -= 1 - - elif not preserve and type == "SpaceCharacters" and token["data"]: - # Test on token["data"] above to not introduce spaces where there were not - token["data"] = " " - - elif not preserve and type == "Characters": - token["data"] = collapse_spaces(token["data"]) - - yield token - - -def collapse_spaces(text): - return SPACES_REGEX.sub(' ', text) diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/html5parser.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/html5parser.py deleted file mode 100644 index b185971..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/html5parser.py +++ /dev/null @@ -1,2791 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from pip._vendor.six import with_metaclass, viewkeys - -import types -from collections import OrderedDict - -from . import _inputstream -from . import _tokenizer - -from . import treebuilders -from .treebuilders.base import Marker - -from . import _utils -from .constants import ( - spaceCharacters, asciiUpper2Lower, - specialElements, headingElements, cdataElements, rcdataElements, - tokenTypes, tagTokenTypes, - namespaces, - htmlIntegrationPointElements, mathmlTextIntegrationPointElements, - adjustForeignAttributes as adjustForeignAttributesMap, - adjustMathMLAttributes, adjustSVGAttributes, - E, - _ReparseException -) - - -def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): - """Parse an HTML document as a string or file-like object into a tree - - :arg doc: the document to parse as a string or file-like object - - :arg treebuilder: the treebuilder to use when parsing - - :arg namespaceHTMLElements: whether or not to namespace HTML elements - - :returns: parsed tree - - Example: - - >>> from html5lib.html5parser import parse - >>> parse('

This is a doc

') - - - """ - tb = treebuilders.getTreeBuilder(treebuilder) - p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) - return p.parse(doc, **kwargs) - - -def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): - """Parse an HTML fragment as a string or file-like object into a tree - - :arg doc: the fragment to parse as a string or file-like object - - :arg container: the container context to parse the fragment in - - :arg treebuilder: the treebuilder to use when parsing - - :arg namespaceHTMLElements: whether or not to namespace HTML elements - - :returns: parsed tree - - Example: - - >>> from html5lib.html5libparser import parseFragment - >>> parseFragment('this is a fragment') - - - """ - tb = treebuilders.getTreeBuilder(treebuilder) - p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) - return p.parseFragment(doc, container=container, **kwargs) - - -def method_decorator_metaclass(function): - class Decorated(type): - def __new__(meta, classname, bases, classDict): - for attributeName, attribute in classDict.items(): - if isinstance(attribute, types.FunctionType): - attribute = function(attribute) - - classDict[attributeName] = attribute - return type.__new__(meta, classname, bases, classDict) - return Decorated - - -class HTMLParser(object): - """HTML parser - - Generates a tree structure from a stream of (possibly malformed) HTML. - - """ - - def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): - """ - :arg tree: a treebuilder class controlling the type of tree that will be - returned. Built in treebuilders can be accessed through - html5lib.treebuilders.getTreeBuilder(treeType) - - :arg strict: raise an exception when a parse error is encountered - - :arg namespaceHTMLElements: whether or not to namespace HTML elements - - :arg debug: whether or not to enable debug mode which logs things - - Example: - - >>> from html5lib.html5parser import HTMLParser - >>> parser = HTMLParser() # generates parser with etree builder - >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict - - """ - - # Raise an exception on the first error encountered - self.strict = strict - - if tree is None: - tree = treebuilders.getTreeBuilder("etree") - self.tree = tree(namespaceHTMLElements) - self.errors = [] - - self.phases = dict([(name, cls(self, self.tree)) for name, cls in - getPhases(debug).items()]) - - def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): - - self.innerHTMLMode = innerHTML - self.container = container - self.scripting = scripting - self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) - self.reset() - - try: - self.mainLoop() - except _ReparseException: - self.reset() - self.mainLoop() - - def reset(self): - self.tree.reset() - self.firstStartTag = False - self.errors = [] - self.log = [] # only used with debug mode - # "quirks" / "limited quirks" / "no quirks" - self.compatMode = "no quirks" - - if self.innerHTMLMode: - self.innerHTML = self.container.lower() - - if self.innerHTML in cdataElements: - self.tokenizer.state = self.tokenizer.rcdataState - elif self.innerHTML in rcdataElements: - self.tokenizer.state = self.tokenizer.rawtextState - elif self.innerHTML == 'plaintext': - self.tokenizer.state = self.tokenizer.plaintextState - else: - # state already is data state - # self.tokenizer.state = self.tokenizer.dataState - pass - self.phase = self.phases["beforeHtml"] - self.phase.insertHtmlElement() - self.resetInsertionMode() - else: - self.innerHTML = False # pylint:disable=redefined-variable-type - self.phase = self.phases["initial"] - - self.lastPhase = None - - self.beforeRCDataPhase = None - - self.framesetOK = True - - @property - def documentEncoding(self): - """Name of the character encoding that was used to decode the input stream, or - :obj:`None` if that is not determined yet - - """ - if not hasattr(self, 'tokenizer'): - return None - return self.tokenizer.stream.charEncoding[0].name - - def isHTMLIntegrationPoint(self, element): - if (element.name == "annotation-xml" and - element.namespace == namespaces["mathml"]): - return ("encoding" in element.attributes and - element.attributes["encoding"].translate( - asciiUpper2Lower) in - ("text/html", "application/xhtml+xml")) - else: - return (element.namespace, element.name) in htmlIntegrationPointElements - - def isMathMLTextIntegrationPoint(self, element): - return (element.namespace, element.name) in mathmlTextIntegrationPointElements - - def mainLoop(self): - CharactersToken = tokenTypes["Characters"] - SpaceCharactersToken = tokenTypes["SpaceCharacters"] - StartTagToken = tokenTypes["StartTag"] - EndTagToken = tokenTypes["EndTag"] - CommentToken = tokenTypes["Comment"] - DoctypeToken = tokenTypes["Doctype"] - ParseErrorToken = tokenTypes["ParseError"] - - for token in self.normalizedTokens(): - prev_token = None - new_token = token - while new_token is not None: - prev_token = new_token - currentNode = self.tree.openElements[-1] if self.tree.openElements else None - currentNodeNamespace = currentNode.namespace if currentNode else None - currentNodeName = currentNode.name if currentNode else None - - type = new_token["type"] - - if type == ParseErrorToken: - self.parseError(new_token["data"], new_token.get("datavars", {})) - new_token = None - else: - if (len(self.tree.openElements) == 0 or - currentNodeNamespace == self.tree.defaultNamespace or - (self.isMathMLTextIntegrationPoint(currentNode) and - ((type == StartTagToken and - token["name"] not in frozenset(["mglyph", "malignmark"])) or - type in (CharactersToken, SpaceCharactersToken))) or - (currentNodeNamespace == namespaces["mathml"] and - currentNodeName == "annotation-xml" and - type == StartTagToken and - token["name"] == "svg") or - (self.isHTMLIntegrationPoint(currentNode) and - type in (StartTagToken, CharactersToken, SpaceCharactersToken))): - phase = self.phase - else: - phase = self.phases["inForeignContent"] - - if type == CharactersToken: - new_token = phase.processCharacters(new_token) - elif type == SpaceCharactersToken: - new_token = phase.processSpaceCharacters(new_token) - elif type == StartTagToken: - new_token = phase.processStartTag(new_token) - elif type == EndTagToken: - new_token = phase.processEndTag(new_token) - elif type == CommentToken: - new_token = phase.processComment(new_token) - elif type == DoctypeToken: - new_token = phase.processDoctype(new_token) - - if (type == StartTagToken and prev_token["selfClosing"] and - not prev_token["selfClosingAcknowledged"]): - self.parseError("non-void-element-with-trailing-solidus", - {"name": prev_token["name"]}) - - # When the loop finishes it's EOF - reprocess = True - phases = [] - while reprocess: - phases.append(self.phase) - reprocess = self.phase.processEOF() - if reprocess: - assert self.phase not in phases - - def normalizedTokens(self): - for token in self.tokenizer: - yield self.normalizeToken(token) - - def parse(self, stream, *args, **kwargs): - """Parse a HTML document into a well-formed tree - - :arg stream: a file-like object or string containing the HTML to be parsed - - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element). - - :arg scripting: treat noscript elements as if JavaScript was turned on - - :returns: parsed tree - - Example: - - >>> from html5lib.html5parser import HTMLParser - >>> parser = HTMLParser() - >>> parser.parse('

This is a doc

') - - - """ - self._parse(stream, False, None, *args, **kwargs) - return self.tree.getDocument() - - def parseFragment(self, stream, *args, **kwargs): - """Parse a HTML fragment into a well-formed tree fragment - - :arg container: name of the element we're setting the innerHTML - property if set to None, default to 'div' - - :arg stream: a file-like object or string containing the HTML to be parsed - - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element) - - :arg scripting: treat noscript elements as if JavaScript was turned on - - :returns: parsed tree - - Example: - - >>> from html5lib.html5libparser import HTMLParser - >>> parser = HTMLParser() - >>> parser.parseFragment('this is a fragment') - - - """ - self._parse(stream, True, *args, **kwargs) - return self.tree.getFragment() - - def parseError(self, errorcode="XXX-undefined-error", datavars=None): - # XXX The idea is to make errorcode mandatory. - if datavars is None: - datavars = {} - self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) - if self.strict: - raise ParseError(E[errorcode] % datavars) - - def normalizeToken(self, token): - # HTML5 specific normalizations to the token stream - if token["type"] == tokenTypes["StartTag"]: - raw = token["data"] - token["data"] = OrderedDict(raw) - if len(raw) > len(token["data"]): - # we had some duplicated attribute, fix so first wins - token["data"].update(raw[::-1]) - - return token - - def adjustMathMLAttributes(self, token): - adjust_attributes(token, adjustMathMLAttributes) - - def adjustSVGAttributes(self, token): - adjust_attributes(token, adjustSVGAttributes) - - def adjustForeignAttributes(self, token): - adjust_attributes(token, adjustForeignAttributesMap) - - def reparseTokenNormal(self, token): - # pylint:disable=unused-argument - self.parser.phase() - - def resetInsertionMode(self): - # The name of this method is mostly historical. (It's also used in the - # specification.) - last = False - newModes = { - "select": "inSelect", - "td": "inCell", - "th": "inCell", - "tr": "inRow", - "tbody": "inTableBody", - "thead": "inTableBody", - "tfoot": "inTableBody", - "caption": "inCaption", - "colgroup": "inColumnGroup", - "table": "inTable", - "head": "inBody", - "body": "inBody", - "frameset": "inFrameset", - "html": "beforeHead" - } - for node in self.tree.openElements[::-1]: - nodeName = node.name - new_phase = None - if node == self.tree.openElements[0]: - assert self.innerHTML - last = True - nodeName = self.innerHTML - # Check for conditions that should only happen in the innerHTML - # case - if nodeName in ("select", "colgroup", "head", "html"): - assert self.innerHTML - - if not last and node.namespace != self.tree.defaultNamespace: - continue - - if nodeName in newModes: - new_phase = self.phases[newModes[nodeName]] - break - elif last: - new_phase = self.phases["inBody"] - break - - self.phase = new_phase - - def parseRCDataRawtext(self, token, contentType): - # Generic RCDATA/RAWTEXT Parsing algorithm - assert contentType in ("RAWTEXT", "RCDATA") - - self.tree.insertElement(token) - - if contentType == "RAWTEXT": - self.tokenizer.state = self.tokenizer.rawtextState - else: - self.tokenizer.state = self.tokenizer.rcdataState - - self.originalPhase = self.phase - - self.phase = self.phases["text"] - - -@_utils.memoize -def getPhases(debug): - def log(function): - """Logger that records which phase processes each token""" - type_names = dict((value, key) for key, value in - tokenTypes.items()) - - def wrapped(self, *args, **kwargs): - if function.__name__.startswith("process") and len(args) > 0: - token = args[0] - try: - info = {"type": type_names[token['type']]} - except: - raise - if token['type'] in tagTokenTypes: - info["name"] = token['name'] - - self.parser.log.append((self.parser.tokenizer.state.__name__, - self.parser.phase.__class__.__name__, - self.__class__.__name__, - function.__name__, - info)) - return function(self, *args, **kwargs) - else: - return function(self, *args, **kwargs) - return wrapped - - def getMetaclass(use_metaclass, metaclass_func): - if use_metaclass: - return method_decorator_metaclass(metaclass_func) - else: - return type - - # pylint:disable=unused-argument - class Phase(with_metaclass(getMetaclass(debug, log))): - """Base class for helper object that implements each phase of processing - """ - - def __init__(self, parser, tree): - self.parser = parser - self.tree = tree - - def processEOF(self): - raise NotImplementedError - - def processComment(self, token): - # For most phases the following is correct. Where it's not it will be - # overridden. - self.tree.insertComment(token, self.tree.openElements[-1]) - - def processDoctype(self, token): - self.parser.parseError("unexpected-doctype") - - def processCharacters(self, token): - self.tree.insertText(token["data"]) - - def processSpaceCharacters(self, token): - self.tree.insertText(token["data"]) - - def processStartTag(self, token): - return self.startTagHandler[token["name"]](token) - - def startTagHtml(self, token): - if not self.parser.firstStartTag and token["name"] == "html": - self.parser.parseError("non-html-root") - # XXX Need a check here to see if the first start tag token emitted is - # this token... If it's not, invoke self.parser.parseError(). - for attr, value in token["data"].items(): - if attr not in self.tree.openElements[0].attributes: - self.tree.openElements[0].attributes[attr] = value - self.parser.firstStartTag = False - - def processEndTag(self, token): - return self.endTagHandler[token["name"]](token) - - class InitialPhase(Phase): - def processSpaceCharacters(self, token): - pass - - def processComment(self, token): - self.tree.insertComment(token, self.tree.document) - - def processDoctype(self, token): - name = token["name"] - publicId = token["publicId"] - systemId = token["systemId"] - correct = token["correct"] - - if (name != "html" or publicId is not None or - systemId is not None and systemId != "about:legacy-compat"): - self.parser.parseError("unknown-doctype") - - if publicId is None: - publicId = "" - - self.tree.insertDoctype(token) - - if publicId != "": - publicId = publicId.translate(asciiUpper2Lower) - - if (not correct or token["name"] != "html" or - publicId.startswith( - ("+//silmaril//dtd html pro v0r11 19970101//", - "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", - "-//as//dtd html 3.0 aswedit + extensions//", - "-//ietf//dtd html 2.0 level 1//", - "-//ietf//dtd html 2.0 level 2//", - "-//ietf//dtd html 2.0 strict level 1//", - "-//ietf//dtd html 2.0 strict level 2//", - "-//ietf//dtd html 2.0 strict//", - "-//ietf//dtd html 2.0//", - "-//ietf//dtd html 2.1e//", - "-//ietf//dtd html 3.0//", - "-//ietf//dtd html 3.2 final//", - "-//ietf//dtd html 3.2//", - "-//ietf//dtd html 3//", - "-//ietf//dtd html level 0//", - "-//ietf//dtd html level 1//", - "-//ietf//dtd html level 2//", - "-//ietf//dtd html level 3//", - "-//ietf//dtd html strict level 0//", - "-//ietf//dtd html strict level 1//", - "-//ietf//dtd html strict level 2//", - "-//ietf//dtd html strict level 3//", - "-//ietf//dtd html strict//", - "-//ietf//dtd html//", - "-//metrius//dtd metrius presentational//", - "-//microsoft//dtd internet explorer 2.0 html strict//", - "-//microsoft//dtd internet explorer 2.0 html//", - "-//microsoft//dtd internet explorer 2.0 tables//", - "-//microsoft//dtd internet explorer 3.0 html strict//", - "-//microsoft//dtd internet explorer 3.0 html//", - "-//microsoft//dtd internet explorer 3.0 tables//", - "-//netscape comm. corp.//dtd html//", - "-//netscape comm. corp.//dtd strict html//", - "-//o'reilly and associates//dtd html 2.0//", - "-//o'reilly and associates//dtd html extended 1.0//", - "-//o'reilly and associates//dtd html extended relaxed 1.0//", - "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", - "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", - "-//spyglass//dtd html 2.0 extended//", - "-//sq//dtd html 2.0 hotmetal + extensions//", - "-//sun microsystems corp.//dtd hotjava html//", - "-//sun microsystems corp.//dtd hotjava strict html//", - "-//w3c//dtd html 3 1995-03-24//", - "-//w3c//dtd html 3.2 draft//", - "-//w3c//dtd html 3.2 final//", - "-//w3c//dtd html 3.2//", - "-//w3c//dtd html 3.2s draft//", - "-//w3c//dtd html 4.0 frameset//", - "-//w3c//dtd html 4.0 transitional//", - "-//w3c//dtd html experimental 19960712//", - "-//w3c//dtd html experimental 970421//", - "-//w3c//dtd w3 html//", - "-//w3o//dtd w3 html 3.0//", - "-//webtechs//dtd mozilla html 2.0//", - "-//webtechs//dtd mozilla html//")) or - publicId in ("-//w3o//dtd w3 html strict 3.0//en//", - "-/w3c/dtd html 4.0 transitional/en", - "html") or - publicId.startswith( - ("-//w3c//dtd html 4.01 frameset//", - "-//w3c//dtd html 4.01 transitional//")) and - systemId is None or - systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): - self.parser.compatMode = "quirks" - elif (publicId.startswith( - ("-//w3c//dtd xhtml 1.0 frameset//", - "-//w3c//dtd xhtml 1.0 transitional//")) or - publicId.startswith( - ("-//w3c//dtd html 4.01 frameset//", - "-//w3c//dtd html 4.01 transitional//")) and - systemId is not None): - self.parser.compatMode = "limited quirks" - - self.parser.phase = self.parser.phases["beforeHtml"] - - def anythingElse(self): - self.parser.compatMode = "quirks" - self.parser.phase = self.parser.phases["beforeHtml"] - - def processCharacters(self, token): - self.parser.parseError("expected-doctype-but-got-chars") - self.anythingElse() - return token - - def processStartTag(self, token): - self.parser.parseError("expected-doctype-but-got-start-tag", - {"name": token["name"]}) - self.anythingElse() - return token - - def processEndTag(self, token): - self.parser.parseError("expected-doctype-but-got-end-tag", - {"name": token["name"]}) - self.anythingElse() - return token - - def processEOF(self): - self.parser.parseError("expected-doctype-but-got-eof") - self.anythingElse() - return True - - class BeforeHtmlPhase(Phase): - # helper methods - def insertHtmlElement(self): - self.tree.insertRoot(impliedTagToken("html", "StartTag")) - self.parser.phase = self.parser.phases["beforeHead"] - - # other - def processEOF(self): - self.insertHtmlElement() - return True - - def processComment(self, token): - self.tree.insertComment(token, self.tree.document) - - def processSpaceCharacters(self, token): - pass - - def processCharacters(self, token): - self.insertHtmlElement() - return token - - def processStartTag(self, token): - if token["name"] == "html": - self.parser.firstStartTag = True - self.insertHtmlElement() - return token - - def processEndTag(self, token): - if token["name"] not in ("head", "body", "html", "br"): - self.parser.parseError("unexpected-end-tag-before-html", - {"name": token["name"]}) - else: - self.insertHtmlElement() - return token - - class BeforeHeadPhase(Phase): - def __init__(self, parser, tree): - Phase.__init__(self, parser, tree) - - self.startTagHandler = _utils.MethodDispatcher([ - ("html", self.startTagHtml), - ("head", self.startTagHead) - ]) - self.startTagHandler.default = self.startTagOther - - self.endTagHandler = _utils.MethodDispatcher([ - (("head", "body", "html", "br"), self.endTagImplyHead) - ]) - self.endTagHandler.default = self.endTagOther - - def processEOF(self): - self.startTagHead(impliedTagToken("head", "StartTag")) - return True - - def processSpaceCharacters(self, token): - pass - - def processCharacters(self, token): - self.startTagHead(impliedTagToken("head", "StartTag")) - return token - - def startTagHtml(self, token): - return self.parser.phases["inBody"].processStartTag(token) - - def startTagHead(self, token): - self.tree.insertElement(token) - self.tree.headPointer = self.tree.openElements[-1] - self.parser.phase = self.parser.phases["inHead"] - - def startTagOther(self, token): - self.startTagHead(impliedTagToken("head", "StartTag")) - return token - - def endTagImplyHead(self, token): - self.startTagHead(impliedTagToken("head", "StartTag")) - return token - - def endTagOther(self, token): - self.parser.parseError("end-tag-after-implied-root", - {"name": token["name"]}) - - class InHeadPhase(Phase): - def __init__(self, parser, tree): - Phase.__init__(self, parser, tree) - - self.startTagHandler = _utils.MethodDispatcher([ - ("html", self.startTagHtml), - ("title", self.startTagTitle), - (("noframes", "style"), self.startTagNoFramesStyle), - ("noscript", self.startTagNoscript), - ("script", self.startTagScript), - (("base", "basefont", "bgsound", "command", "link"), - self.startTagBaseLinkCommand), - ("meta", self.startTagMeta), - ("head", self.startTagHead) - ]) - self.startTagHandler.default = self.startTagOther - - self.endTagHandler = _utils.MethodDispatcher([ - ("head", self.endTagHead), - (("br", "html", "body"), self.endTagHtmlBodyBr) - ]) - self.endTagHandler.default = self.endTagOther - - # the real thing - def processEOF(self): - self.anythingElse() - return True - - def processCharacters(self, token): - self.anythingElse() - return token - - def startTagHtml(self, token): - return self.parser.phases["inBody"].processStartTag(token) - - def startTagHead(self, token): - self.parser.parseError("two-heads-are-not-better-than-one") - - def startTagBaseLinkCommand(self, token): - self.tree.insertElement(token) - self.tree.openElements.pop() - token["selfClosingAcknowledged"] = True - - def startTagMeta(self, token): - self.tree.insertElement(token) - self.tree.openElements.pop() - token["selfClosingAcknowledged"] = True - - attributes = token["data"] - if self.parser.tokenizer.stream.charEncoding[1] == "tentative": - if "charset" in attributes: - self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) - elif ("content" in attributes and - "http-equiv" in attributes and - attributes["http-equiv"].lower() == "content-type"): - # Encoding it as UTF-8 here is a hack, as really we should pass - # the abstract Unicode string, and just use the - # ContentAttrParser on that, but using UTF-8 allows all chars - # to be encoded and as a ASCII-superset works. - data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) - parser = _inputstream.ContentAttrParser(data) - codec = parser.parse() - self.parser.tokenizer.stream.changeEncoding(codec) - - def startTagTitle(self, token): - self.parser.parseRCDataRawtext(token, "RCDATA") - - def startTagNoFramesStyle(self, token): - # Need to decide whether to implement the scripting-disabled case - self.parser.parseRCDataRawtext(token, "RAWTEXT") - - def startTagNoscript(self, token): - if self.parser.scripting: - self.parser.parseRCDataRawtext(token, "RAWTEXT") - else: - self.tree.insertElement(token) - self.parser.phase = self.parser.phases["inHeadNoscript"] - - def startTagScript(self, token): - self.tree.insertElement(token) - self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState - self.parser.originalPhase = self.parser.phase - self.parser.phase = self.parser.phases["text"] - - def startTagOther(self, token): - self.anythingElse() - return token - - def endTagHead(self, token): - node = self.parser.tree.openElements.pop() - assert node.name == "head", "Expected head got %s" % node.name - self.parser.phase = self.parser.phases["afterHead"] - - def endTagHtmlBodyBr(self, token): - self.anythingElse() - return token - - def endTagOther(self, token): - self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) - - def anythingElse(self): - self.endTagHead(impliedTagToken("head")) - - class InHeadNoscriptPhase(Phase): - def __init__(self, parser, tree): - Phase.__init__(self, parser, tree) - - self.startTagHandler = _utils.MethodDispatcher([ - ("html", self.startTagHtml), - (("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand), - (("head", "noscript"), self.startTagHeadNoscript), - ]) - self.startTagHandler.default = self.startTagOther - - self.endTagHandler = _utils.MethodDispatcher([ - ("noscript", self.endTagNoscript), - ("br", self.endTagBr), - ]) - self.endTagHandler.default = self.endTagOther - - def processEOF(self): - self.parser.parseError("eof-in-head-noscript") - self.anythingElse() - return True - - def processComment(self, token): - return self.parser.phases["inHead"].processComment(token) - - def processCharacters(self, token): - self.parser.parseError("char-in-head-noscript") - self.anythingElse() - return token - - def processSpaceCharacters(self, token): - return self.parser.phases["inHead"].processSpaceCharacters(token) - - def startTagHtml(self, token): - return self.parser.phases["inBody"].processStartTag(token) - - def startTagBaseLinkCommand(self, token): - return self.parser.phases["inHead"].processStartTag(token) - - def startTagHeadNoscript(self, token): - self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) - - def startTagOther(self, token): - self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) - self.anythingElse() - return token - - def endTagNoscript(self, token): - node = self.parser.tree.openElements.pop() - assert node.name == "noscript", "Expected noscript got %s" % node.name - self.parser.phase = self.parser.phases["inHead"] - - def endTagBr(self, token): - self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) - self.anythingElse() - return token - - def endTagOther(self, token): - self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) - - def anythingElse(self): - # Caller must raise parse error first! - self.endTagNoscript(impliedTagToken("noscript")) - - class AfterHeadPhase(Phase): - def __init__(self, parser, tree): - Phase.__init__(self, parser, tree) - - self.startTagHandler = _utils.MethodDispatcher([ - ("html", self.startTagHtml), - ("body", self.startTagBody), - ("frameset", self.startTagFrameset), - (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", - "style", "title"), - self.startTagFromHead), - ("head", self.startTagHead) - ]) - self.startTagHandler.default = self.startTagOther - self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), - self.endTagHtmlBodyBr)]) - self.endTagHandler.default = self.endTagOther - - def processEOF(self): - self.anythingElse() - return True - - def processCharacters(self, token): - self.anythingElse() - return token - - def startTagHtml(self, token): - return self.parser.phases["inBody"].processStartTag(token) - - def startTagBody(self, token): - self.parser.framesetOK = False - self.tree.insertElement(token) - self.parser.phase = self.parser.phases["inBody"] - - def startTagFrameset(self, token): - self.tree.insertElement(token) - self.parser.phase = self.parser.phases["inFrameset"] - - def startTagFromHead(self, token): - self.parser.parseError("unexpected-start-tag-out-of-my-head", - {"name": token["name"]}) - self.tree.openElements.append(self.tree.headPointer) - self.parser.phases["inHead"].processStartTag(token) - for node in self.tree.openElements[::-1]: - if node.name == "head": - self.tree.openElements.remove(node) - break - - def startTagHead(self, token): - self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) - - def startTagOther(self, token): - self.anythingElse() - return token - - def endTagHtmlBodyBr(self, token): - self.anythingElse() - return token - - def endTagOther(self, token): - self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) - - def anythingElse(self): - self.tree.insertElement(impliedTagToken("body", "StartTag")) - self.parser.phase = self.parser.phases["inBody"] - self.parser.framesetOK = True - - class InBodyPhase(Phase): - # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody - # the really-really-really-very crazy mode - def __init__(self, parser, tree): - Phase.__init__(self, parser, tree) - - # Set this to the default handler - self.processSpaceCharacters = self.processSpaceCharactersNonPre - - self.startTagHandler = _utils.MethodDispatcher([ - ("html", self.startTagHtml), - (("base", "basefont", "bgsound", "command", "link", "meta", - "script", "style", "title"), - self.startTagProcessInHead), - ("body", self.startTagBody), - ("frameset", self.startTagFrameset), - (("address", "article", "aside", "blockquote", "center", "details", - "dir", "div", "dl", "fieldset", "figcaption", "figure", - "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p", - "section", "summary", "ul"), - self.startTagCloseP), - (headingElements, self.startTagHeading), - (("pre", "listing"), self.startTagPreListing), - ("form", self.startTagForm), - (("li", "dd", "dt"), self.startTagListItem), - ("plaintext", self.startTagPlaintext), - ("a", self.startTagA), - (("b", "big", "code", "em", "font", "i", "s", "small", "strike", - "strong", "tt", "u"), self.startTagFormatting), - ("nobr", self.startTagNobr), - ("button", self.startTagButton), - (("applet", "marquee", "object"), self.startTagAppletMarqueeObject), - ("xmp", self.startTagXmp), - ("table", self.startTagTable), - (("area", "br", "embed", "img", "keygen", "wbr"), - self.startTagVoidFormatting), - (("param", "source", "track"), self.startTagParamSource), - ("input", self.startTagInput), - ("hr", self.startTagHr), - ("image", self.startTagImage), - ("isindex", self.startTagIsIndex), - ("textarea", self.startTagTextarea), - ("iframe", self.startTagIFrame), - ("noscript", self.startTagNoscript), - (("noembed", "noframes"), self.startTagRawtext), - ("select", self.startTagSelect), - (("rp", "rt"), self.startTagRpRt), - (("option", "optgroup"), self.startTagOpt), - (("math"), self.startTagMath), - (("svg"), self.startTagSvg), - (("caption", "col", "colgroup", "frame", "head", - "tbody", "td", "tfoot", "th", "thead", - "tr"), self.startTagMisplaced) - ]) - self.startTagHandler.default = self.startTagOther - - self.endTagHandler = _utils.MethodDispatcher([ - ("body", self.endTagBody), - ("html", self.endTagHtml), - (("address", "article", "aside", "blockquote", "button", "center", - "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", - "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre", - "section", "summary", "ul"), self.endTagBlock), - ("form", self.endTagForm), - ("p", self.endTagP), - (("dd", "dt", "li"), self.endTagListItem), - (headingElements, self.endTagHeading), - (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", - "strike", "strong", "tt", "u"), self.endTagFormatting), - (("applet", "marquee", "object"), self.endTagAppletMarqueeObject), - ("br", self.endTagBr), - ]) - self.endTagHandler.default = self.endTagOther - - def isMatchingFormattingElement(self, node1, node2): - return (node1.name == node2.name and - node1.namespace == node2.namespace and - node1.attributes == node2.attributes) - - # helper - def addFormattingElement(self, token): - self.tree.insertElement(token) - element = self.tree.openElements[-1] - - matchingElements = [] - for node in self.tree.activeFormattingElements[::-1]: - if node is Marker: - break - elif self.isMatchingFormattingElement(node, element): - matchingElements.append(node) - - assert len(matchingElements) <= 3 - if len(matchingElements) == 3: - self.tree.activeFormattingElements.remove(matchingElements[-1]) - self.tree.activeFormattingElements.append(element) - - # the real deal - def processEOF(self): - allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td", - "tfoot", "th", "thead", "tr", "body", - "html")) - for node in self.tree.openElements[::-1]: - if node.name not in allowed_elements: - self.parser.parseError("expected-closing-tag-but-got-eof") - break - # Stop parsing - - def processSpaceCharactersDropNewline(self, token): - # Sometimes (start of
, , and