diff options
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib')
34 files changed, 13190 insertions, 0 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/__init__.py new file mode 100644 index 0000000..0b54002 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/__init__.py | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | """ | ||
| 2 | HTML parsing library based on the `WHATWG HTML specification | ||
| 3 | <https://whatwg.org/html>`_. The parser is designed to be compatible with | ||
| 4 | existing HTML found in the wild and implements well-defined error recovery that | ||
| 5 | is largely compatible with modern desktop web browsers. | ||
| 6 | |||
| 7 | Example usage:: | ||
| 8 | |||
| 9 | from pip._vendor import html5lib | ||
| 10 | with open("my_document.html", "rb") as f: | ||
| 11 | tree = html5lib.parse(f) | ||
| 12 | |||
| 13 | For convenience, this module re-exports the following names: | ||
| 14 | |||
| 15 | * :func:`~.html5parser.parse` | ||
| 16 | * :func:`~.html5parser.parseFragment` | ||
| 17 | * :class:`~.html5parser.HTMLParser` | ||
| 18 | * :func:`~.treebuilders.getTreeBuilder` | ||
| 19 | * :func:`~.treewalkers.getTreeWalker` | ||
| 20 | * :func:`~.serializer.serialize` | ||
| 21 | """ | ||
| 22 | |||
| 23 | from __future__ import absolute_import, division, unicode_literals | ||
| 24 | |||
| 25 | from .html5parser import HTMLParser, parse, parseFragment | ||
| 26 | from .treebuilders import getTreeBuilder | ||
| 27 | from .treewalkers import getTreeWalker | ||
| 28 | from .serializer import serialize | ||
| 29 | |||
| 30 | __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", | ||
| 31 | "getTreeWalker", "serialize"] | ||
| 32 | |||
| 33 | # this has to be at the top level, see how setup.py parses this | ||
| 34 | #: Distribution version number. | ||
| 35 | __version__ = "1.0.1" | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_ihatexml.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_ihatexml.py new file mode 100644 index 0000000..68f9b1e --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_ihatexml.py | |||
| @@ -0,0 +1,288 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | import re | ||
| 4 | import warnings | ||
| 5 | |||
| 6 | from .constants import DataLossWarning | ||
| 7 | |||
| 8 | baseChar = """ | ||
| 9 | [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | | ||
| 10 | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | | ||
| 11 | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | | ||
| 12 | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | | ||
| 13 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | | ||
| 14 | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | | ||
| 15 | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | | ||
| 16 | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | | ||
| 17 | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | | ||
| 18 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | | ||
| 19 | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | | ||
| 20 | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | | ||
| 21 | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | | ||
| 22 | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | | ||
| 23 | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | | ||
| 24 | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | | ||
| 25 | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | | ||
| 26 | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | | ||
| 27 | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | | ||
| 28 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | | ||
| 29 | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | | ||
| 30 | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | | ||
| 31 | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | | ||
| 32 | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | | ||
| 33 | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | | ||
| 34 | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | | ||
| 35 | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | | ||
| 36 | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | | ||
| 37 | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | | ||
| 38 | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | | ||
| 39 | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | | ||
| 40 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | | ||
| 41 | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | | ||
| 42 | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | | ||
| 43 | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | | ||
| 44 | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | | ||
| 45 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | | ||
| 46 | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | | ||
| 47 | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | | ||
| 48 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | | ||
| 49 | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | | ||
| 50 | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | | ||
| 51 | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | | ||
| 52 | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | | ||
| 53 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | | ||
| 54 | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]""" | ||
| 55 | |||
| 56 | ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]""" | ||
| 57 | |||
| 58 | combiningCharacter = """ | ||
| 59 | [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | | ||
| 60 | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | | ||
| 61 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | | ||
| 62 | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | | ||
| 63 | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | | ||
| 64 | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | | ||
| 65 | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | | ||
| 66 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | | ||
| 67 | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | | ||
| 68 | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | | ||
| 69 | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | | ||
| 70 | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | | ||
| 71 | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | | ||
| 72 | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | | ||
| 73 | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | | ||
| 74 | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | | ||
| 75 | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | | ||
| 76 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | | ||
| 77 | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | | ||
| 78 | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | | ||
| 79 | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | | ||
| 80 | #x3099 | #x309A""" | ||
| 81 | |||
| 82 | digit = """ | ||
| 83 | [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | | ||
| 84 | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | | ||
| 85 | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | | ||
| 86 | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]""" | ||
| 87 | |||
| 88 | extender = """ | ||
| 89 | #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | | ||
| 90 | #[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]""" | ||
| 91 | |||
| 92 | letter = " | ".join([baseChar, ideographic]) | ||
| 93 | |||
| 94 | # Without the | ||
| 95 | name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter, | ||
| 96 | extender]) | ||
| 97 | nameFirst = " | ".join([letter, "_"]) | ||
| 98 | |||
| 99 | reChar = re.compile(r"#x([\d|A-F]{4,4})") | ||
| 100 | reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]") | ||
| 101 | |||
| 102 | |||
| 103 | def charStringToList(chars): | ||
| 104 | charRanges = [item.strip() for item in chars.split(" | ")] | ||
| 105 | rv = [] | ||
| 106 | for item in charRanges: | ||
| 107 | foundMatch = False | ||
| 108 | for regexp in (reChar, reCharRange): | ||
| 109 | match = regexp.match(item) | ||
| 110 | if match is not None: | ||
| 111 | rv.append([hexToInt(item) for item in match.groups()]) | ||
| 112 | if len(rv[-1]) == 1: | ||
| 113 | rv[-1] = rv[-1] * 2 | ||
| 114 | foundMatch = True | ||
| 115 | break | ||
| 116 | if not foundMatch: | ||
| 117 | assert len(item) == 1 | ||
| 118 | |||
| 119 | rv.append([ord(item)] * 2) | ||
| 120 | rv = normaliseCharList(rv) | ||
| 121 | return rv | ||
| 122 | |||
| 123 | |||
| 124 | def normaliseCharList(charList): | ||
| 125 | charList = sorted(charList) | ||
| 126 | for item in charList: | ||
| 127 | assert item[1] >= item[0] | ||
| 128 | rv = [] | ||
| 129 | i = 0 | ||
| 130 | while i < len(charList): | ||
| 131 | j = 1 | ||
| 132 | rv.append(charList[i]) | ||
| 133 | while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1: | ||
| 134 | rv[-1][1] = charList[i + j][1] | ||
| 135 | j += 1 | ||
| 136 | i += j | ||
| 137 | return rv | ||
| 138 | |||
| 139 | # We don't really support characters above the BMP :( | ||
| 140 | max_unicode = int("FFFF", 16) | ||
| 141 | |||
| 142 | |||
| 143 | def missingRanges(charList): | ||
| 144 | rv = [] | ||
| 145 | if charList[0] != 0: | ||
| 146 | rv.append([0, charList[0][0] - 1]) | ||
| 147 | for i, item in enumerate(charList[:-1]): | ||
| 148 | rv.append([item[1] + 1, charList[i + 1][0] - 1]) | ||
| 149 | if charList[-1][1] != max_unicode: | ||
| 150 | rv.append([charList[-1][1] + 1, max_unicode]) | ||
| 151 | return rv | ||
| 152 | |||
| 153 | |||
| 154 | def listToRegexpStr(charList): | ||
| 155 | rv = [] | ||
| 156 | for item in charList: | ||
| 157 | if item[0] == item[1]: | ||
| 158 | rv.append(escapeRegexp(chr(item[0]))) | ||
| 159 | else: | ||
| 160 | rv.append(escapeRegexp(chr(item[0])) + "-" + | ||
| 161 | escapeRegexp(chr(item[1]))) | ||
| 162 | return "[%s]" % "".join(rv) | ||
| 163 | |||
| 164 | |||
| 165 | def hexToInt(hex_str): | ||
| 166 | return int(hex_str, 16) | ||
| 167 | |||
| 168 | |||
| 169 | def escapeRegexp(string): | ||
| 170 | specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}", | ||
| 171 | "[", "]", "|", "(", ")", "-") | ||
| 172 | for char in specialCharacters: | ||
| 173 | string = string.replace(char, "\\" + char) | ||
| 174 | |||
| 175 | return string | ||
| 176 | |||
| 177 | # output from the above | ||
| 178 | nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa | ||
| 179 | |||
| 180 | nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa | ||
| 181 | |||
| 182 | # Simpler things | ||
| 183 | nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]") | ||
| 184 | |||
| 185 | |||
| 186 | class InfosetFilter(object): | ||
| 187 | replacementRegexp = re.compile(r"U[\dA-F]{5,5}") | ||
| 188 | |||
| 189 | def __init__(self, | ||
| 190 | dropXmlnsLocalName=False, | ||
| 191 | dropXmlnsAttrNs=False, | ||
| 192 | preventDoubleDashComments=False, | ||
| 193 | preventDashAtCommentEnd=False, | ||
| 194 | replaceFormFeedCharacters=True, | ||
| 195 | preventSingleQuotePubid=False): | ||
| 196 | |||
| 197 | self.dropXmlnsLocalName = dropXmlnsLocalName | ||
| 198 | self.dropXmlnsAttrNs = dropXmlnsAttrNs | ||
| 199 | |||
| 200 | self.preventDoubleDashComments = preventDoubleDashComments | ||
| 201 | self.preventDashAtCommentEnd = preventDashAtCommentEnd | ||
| 202 | |||
| 203 | self.replaceFormFeedCharacters = replaceFormFeedCharacters | ||
| 204 | |||
| 205 | self.preventSingleQuotePubid = preventSingleQuotePubid | ||
| 206 | |||
| 207 | self.replaceCache = {} | ||
| 208 | |||
| 209 | def coerceAttribute(self, name, namespace=None): | ||
| 210 | if self.dropXmlnsLocalName and name.startswith("xmlns:"): | ||
| 211 | warnings.warn("Attributes cannot begin with xmlns", DataLossWarning) | ||
| 212 | return None | ||
| 213 | elif (self.dropXmlnsAttrNs and | ||
| 214 | namespace == "http://www.w3.org/2000/xmlns/"): | ||
| 215 | warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning) | ||
| 216 | return None | ||
| 217 | else: | ||
| 218 | return self.toXmlName(name) | ||
| 219 | |||
| 220 | def coerceElement(self, name): | ||
| 221 | return self.toXmlName(name) | ||
| 222 | |||
| 223 | def coerceComment(self, data): | ||
| 224 | if self.preventDoubleDashComments: | ||
| 225 | while "--" in data: | ||
| 226 | warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) | ||
| 227 | data = data.replace("--", "- -") | ||
| 228 | if data.endswith("-"): | ||
| 229 | warnings.warn("Comments cannot end in a dash", DataLossWarning) | ||
| 230 | data += " " | ||
| 231 | return data | ||
| 232 | |||
| 233 | def coerceCharacters(self, data): | ||
| 234 | if self.replaceFormFeedCharacters: | ||
| 235 | for _ in range(data.count("\x0C")): | ||
| 236 | warnings.warn("Text cannot contain U+000C", DataLossWarning) | ||
| 237 | data = data.replace("\x0C", " ") | ||
| 238 | # Other non-xml characters | ||
| 239 | return data | ||
| 240 | |||
| 241 | def coercePubid(self, data): | ||
| 242 | dataOutput = data | ||
| 243 | for char in nonPubidCharRegexp.findall(data): | ||
| 244 | warnings.warn("Coercing non-XML pubid", DataLossWarning) | ||
| 245 | replacement = self.getReplacementCharacter(char) | ||
| 246 | dataOutput = dataOutput.replace(char, replacement) | ||
| 247 | if self.preventSingleQuotePubid and dataOutput.find("'") >= 0: | ||
| 248 | warnings.warn("Pubid cannot contain single quote", DataLossWarning) | ||
| 249 | dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'")) | ||
| 250 | return dataOutput | ||
| 251 | |||
| 252 | def toXmlName(self, name): | ||
| 253 | nameFirst = name[0] | ||
| 254 | nameRest = name[1:] | ||
| 255 | m = nonXmlNameFirstBMPRegexp.match(nameFirst) | ||
| 256 | if m: | ||
| 257 | warnings.warn("Coercing non-XML name", DataLossWarning) | ||
| 258 | nameFirstOutput = self.getReplacementCharacter(nameFirst) | ||
| 259 | else: | ||
| 260 | nameFirstOutput = nameFirst | ||
| 261 | |||
| 262 | nameRestOutput = nameRest | ||
| 263 | replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest)) | ||
| 264 | for char in replaceChars: | ||
| 265 | warnings.warn("Coercing non-XML name", DataLossWarning) | ||
| 266 | replacement = self.getReplacementCharacter(char) | ||
| 267 | nameRestOutput = nameRestOutput.replace(char, replacement) | ||
| 268 | return nameFirstOutput + nameRestOutput | ||
| 269 | |||
| 270 | def getReplacementCharacter(self, char): | ||
| 271 | if char in self.replaceCache: | ||
| 272 | replacement = self.replaceCache[char] | ||
| 273 | else: | ||
| 274 | replacement = self.escapeChar(char) | ||
| 275 | return replacement | ||
| 276 | |||
| 277 | def fromXmlName(self, name): | ||
| 278 | for item in set(self.replacementRegexp.findall(name)): | ||
| 279 | name = name.replace(item, self.unescapeChar(item)) | ||
| 280 | return name | ||
| 281 | |||
| 282 | def escapeChar(self, char): | ||
| 283 | replacement = "U%05X" % ord(char) | ||
| 284 | self.replaceCache[char] = replacement | ||
| 285 | return replacement | ||
| 286 | |||
| 287 | def unescapeChar(self, charcode): | ||
| 288 | return chr(int(charcode[1:], 16)) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_inputstream.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_inputstream.py new file mode 100644 index 0000000..21c6bbc --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_inputstream.py | |||
| @@ -0,0 +1,923 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | from pip._vendor.six import text_type, binary_type | ||
| 4 | from pip._vendor.six.moves import http_client, urllib | ||
| 5 | |||
| 6 | import codecs | ||
| 7 | import re | ||
| 8 | |||
| 9 | from pip._vendor import webencodings | ||
| 10 | |||
| 11 | from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase | ||
| 12 | from .constants import _ReparseException | ||
| 13 | from . import _utils | ||
| 14 | |||
| 15 | from io import StringIO | ||
| 16 | |||
| 17 | try: | ||
| 18 | from io import BytesIO | ||
| 19 | except ImportError: | ||
| 20 | BytesIO = StringIO | ||
| 21 | |||
| 22 | # Non-unicode versions of constants for use in the pre-parser | ||
| 23 | spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters]) | ||
| 24 | asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters]) | ||
| 25 | asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase]) | ||
| 26 | spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) | ||
| 27 | |||
| 28 | |||
| 29 | invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa | ||
| 30 | |||
| 31 | if _utils.supports_lone_surrogates: | ||
| 32 | # Use one extra step of indirection and create surrogates with | ||
| 33 | # eval. Not using this indirection would introduce an illegal | ||
| 34 | # unicode literal on platforms not supporting such lone | ||
| 35 | # surrogates. | ||
| 36 | assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1 | ||
| 37 | invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] + | ||
| 38 | eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used | ||
| 39 | "]") | ||
| 40 | else: | ||
| 41 | invalid_unicode_re = re.compile(invalid_unicode_no_surrogate) | ||
| 42 | |||
| 43 | non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, | ||
| 44 | 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, | ||
| 45 | 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, | ||
| 46 | 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, | ||
| 47 | 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, | ||
| 48 | 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, | ||
| 49 | 0x10FFFE, 0x10FFFF]) | ||
| 50 | |||
| 51 | ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]") | ||
| 52 | |||
| 53 | # Cache for charsUntil() | ||
| 54 | charsUntilRegEx = {} | ||
| 55 | |||
| 56 | |||
| 57 | class BufferedStream(object): | ||
| 58 | """Buffering for streams that do not have buffering of their own | ||
| 59 | |||
| 60 | The buffer is implemented as a list of chunks on the assumption that | ||
| 61 | joining many strings will be slow since it is O(n**2) | ||
| 62 | """ | ||
| 63 | |||
| 64 | def __init__(self, stream): | ||
| 65 | self.stream = stream | ||
| 66 | self.buffer = [] | ||
| 67 | self.position = [-1, 0] # chunk number, offset | ||
| 68 | |||
| 69 | def tell(self): | ||
| 70 | pos = 0 | ||
| 71 | for chunk in self.buffer[:self.position[0]]: | ||
| 72 | pos += len(chunk) | ||
| 73 | pos += self.position[1] | ||
| 74 | return pos | ||
| 75 | |||
| 76 | def seek(self, pos): | ||
| 77 | assert pos <= self._bufferedBytes() | ||
| 78 | offset = pos | ||
| 79 | i = 0 | ||
| 80 | while len(self.buffer[i]) < offset: | ||
| 81 | offset -= len(self.buffer[i]) | ||
| 82 | i += 1 | ||
| 83 | self.position = [i, offset] | ||
| 84 | |||
| 85 | def read(self, bytes): | ||
| 86 | if not self.buffer: | ||
| 87 | return self._readStream(bytes) | ||
| 88 | elif (self.position[0] == len(self.buffer) and | ||
| 89 | self.position[1] == len(self.buffer[-1])): | ||
| 90 | return self._readStream(bytes) | ||
| 91 | else: | ||
| 92 | return self._readFromBuffer(bytes) | ||
| 93 | |||
| 94 | def _bufferedBytes(self): | ||
| 95 | return sum([len(item) for item in self.buffer]) | ||
| 96 | |||
| 97 | def _readStream(self, bytes): | ||
| 98 | data = self.stream.read(bytes) | ||
| 99 | self.buffer.append(data) | ||
| 100 | self.position[0] += 1 | ||
| 101 | self.position[1] = len(data) | ||
| 102 | return data | ||
| 103 | |||
| 104 | def _readFromBuffer(self, bytes): | ||
| 105 | remainingBytes = bytes | ||
| 106 | rv = [] | ||
| 107 | bufferIndex = self.position[0] | ||
| 108 | bufferOffset = self.position[1] | ||
| 109 | while bufferIndex < len(self.buffer) and remainingBytes != 0: | ||
| 110 | assert remainingBytes > 0 | ||
| 111 | bufferedData = self.buffer[bufferIndex] | ||
| 112 | |||
| 113 | if remainingBytes <= len(bufferedData) - bufferOffset: | ||
| 114 | bytesToRead = remainingBytes | ||
| 115 | self.position = [bufferIndex, bufferOffset + bytesToRead] | ||
| 116 | else: | ||
| 117 | bytesToRead = len(bufferedData) - bufferOffset | ||
| 118 | self.position = [bufferIndex, len(bufferedData)] | ||
| 119 | bufferIndex += 1 | ||
| 120 | rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead]) | ||
| 121 | remainingBytes -= bytesToRead | ||
| 122 | |||
| 123 | bufferOffset = 0 | ||
| 124 | |||
| 125 | if remainingBytes: | ||
| 126 | rv.append(self._readStream(remainingBytes)) | ||
| 127 | |||
| 128 | return b"".join(rv) | ||
| 129 | |||
| 130 | |||
| 131 | def HTMLInputStream(source, **kwargs): | ||
| 132 | # Work around Python bug #20007: read(0) closes the connection. | ||
| 133 | # http://bugs.python.org/issue20007 | ||
| 134 | if (isinstance(source, http_client.HTTPResponse) or | ||
| 135 | # Also check for addinfourl wrapping HTTPResponse | ||
| 136 | (isinstance(source, urllib.response.addbase) and | ||
| 137 | isinstance(source.fp, http_client.HTTPResponse))): | ||
| 138 | isUnicode = False | ||
| 139 | elif hasattr(source, "read"): | ||
| 140 | isUnicode = isinstance(source.read(0), text_type) | ||
| 141 | else: | ||
| 142 | isUnicode = isinstance(source, text_type) | ||
| 143 | |||
| 144 | if isUnicode: | ||
| 145 | encodings = [x for x in kwargs if x.endswith("_encoding")] | ||
| 146 | if encodings: | ||
| 147 | raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings) | ||
| 148 | |||
| 149 | return HTMLUnicodeInputStream(source, **kwargs) | ||
| 150 | else: | ||
| 151 | return HTMLBinaryInputStream(source, **kwargs) | ||
| 152 | |||
| 153 | |||
| 154 | class HTMLUnicodeInputStream(object): | ||
| 155 | """Provides a unicode stream of characters to the HTMLTokenizer. | ||
| 156 | |||
| 157 | This class takes care of character encoding and removing or replacing | ||
| 158 | incorrect byte-sequences and also provides column and line tracking. | ||
| 159 | |||
| 160 | """ | ||
| 161 | |||
| 162 | _defaultChunkSize = 10240 | ||
| 163 | |||
| 164 | def __init__(self, source): | ||
| 165 | """Initialises the HTMLInputStream. | ||
| 166 | |||
| 167 | HTMLInputStream(source, [encoding]) -> Normalized stream from source | ||
| 168 | for use by html5lib. | ||
| 169 | |||
| 170 | source can be either a file-object, local filename or a string. | ||
| 171 | |||
| 172 | The optional encoding parameter must be a string that indicates | ||
| 173 | the encoding. If specified, that encoding will be used, | ||
| 174 | regardless of any BOM or later declaration (such as in a meta | ||
| 175 | element) | ||
| 176 | |||
| 177 | """ | ||
| 178 | |||
| 179 | if not _utils.supports_lone_surrogates: | ||
| 180 | # Such platforms will have already checked for such | ||
| 181 | # surrogate errors, so no need to do this checking. | ||
| 182 | self.reportCharacterErrors = None | ||
| 183 | elif len("\U0010FFFF") == 1: | ||
| 184 | self.reportCharacterErrors = self.characterErrorsUCS4 | ||
| 185 | else: | ||
| 186 | self.reportCharacterErrors = self.characterErrorsUCS2 | ||
| 187 | |||
| 188 | # List of where new lines occur | ||
| 189 | self.newLines = [0] | ||
| 190 | |||
| 191 | self.charEncoding = (lookupEncoding("utf-8"), "certain") | ||
| 192 | self.dataStream = self.openStream(source) | ||
| 193 | |||
| 194 | self.reset() | ||
| 195 | |||
| 196 | def reset(self): | ||
| 197 | self.chunk = "" | ||
| 198 | self.chunkSize = 0 | ||
| 199 | self.chunkOffset = 0 | ||
| 200 | self.errors = [] | ||
| 201 | |||
| 202 | # number of (complete) lines in previous chunks | ||
| 203 | self.prevNumLines = 0 | ||
| 204 | # number of columns in the last line of the previous chunk | ||
| 205 | self.prevNumCols = 0 | ||
| 206 | |||
| 207 | # Deal with CR LF and surrogates split over chunk boundaries | ||
| 208 | self._bufferedCharacter = None | ||
| 209 | |||
| 210 | def openStream(self, source): | ||
| 211 | """Produces a file object from source. | ||
| 212 | |||
| 213 | source can be either a file object, local filename or a string. | ||
| 214 | |||
| 215 | """ | ||
| 216 | # Already a file object | ||
| 217 | if hasattr(source, 'read'): | ||
| 218 | stream = source | ||
| 219 | else: | ||
| 220 | stream = StringIO(source) | ||
| 221 | |||
| 222 | return stream | ||
| 223 | |||
| 224 | def _position(self, offset): | ||
| 225 | chunk = self.chunk | ||
| 226 | nLines = chunk.count('\n', 0, offset) | ||
| 227 | positionLine = self.prevNumLines + nLines | ||
| 228 | lastLinePos = chunk.rfind('\n', 0, offset) | ||
| 229 | if lastLinePos == -1: | ||
| 230 | positionColumn = self.prevNumCols + offset | ||
| 231 | else: | ||
| 232 | positionColumn = offset - (lastLinePos + 1) | ||
| 233 | return (positionLine, positionColumn) | ||
| 234 | |||
| 235 | def position(self): | ||
| 236 | """Returns (line, col) of the current position in the stream.""" | ||
| 237 | line, col = self._position(self.chunkOffset) | ||
| 238 | return (line + 1, col) | ||
| 239 | |||
| 240 | def char(self): | ||
| 241 | """ Read one character from the stream or queue if available. Return | ||
| 242 | EOF when EOF is reached. | ||
| 243 | """ | ||
| 244 | # Read a new chunk from the input stream if necessary | ||
| 245 | if self.chunkOffset >= self.chunkSize: | ||
| 246 | if not self.readChunk(): | ||
| 247 | return EOF | ||
| 248 | |||
| 249 | chunkOffset = self.chunkOffset | ||
| 250 | char = self.chunk[chunkOffset] | ||
| 251 | self.chunkOffset = chunkOffset + 1 | ||
| 252 | |||
| 253 | return char | ||
| 254 | |||
| 255 | def readChunk(self, chunkSize=None): | ||
| 256 | if chunkSize is None: | ||
| 257 | chunkSize = self._defaultChunkSize | ||
| 258 | |||
| 259 | self.prevNumLines, self.prevNumCols = self._position(self.chunkSize) | ||
| 260 | |||
| 261 | self.chunk = "" | ||
| 262 | self.chunkSize = 0 | ||
| 263 | self.chunkOffset = 0 | ||
| 264 | |||
| 265 | data = self.dataStream.read(chunkSize) | ||
| 266 | |||
| 267 | # Deal with CR LF and surrogates broken across chunks | ||
| 268 | if self._bufferedCharacter: | ||
| 269 | data = self._bufferedCharacter + data | ||
| 270 | self._bufferedCharacter = None | ||
| 271 | elif not data: | ||
| 272 | # We have no more data, bye-bye stream | ||
| 273 | return False | ||
| 274 | |||
| 275 | if len(data) > 1: | ||
| 276 | lastv = ord(data[-1]) | ||
| 277 | if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF: | ||
| 278 | self._bufferedCharacter = data[-1] | ||
| 279 | data = data[:-1] | ||
| 280 | |||
| 281 | if self.reportCharacterErrors: | ||
| 282 | self.reportCharacterErrors(data) | ||
| 283 | |||
| 284 | # Replace invalid characters | ||
| 285 | data = data.replace("\r\n", "\n") | ||
| 286 | data = data.replace("\r", "\n") | ||
| 287 | |||
| 288 | self.chunk = data | ||
| 289 | self.chunkSize = len(data) | ||
| 290 | |||
| 291 | return True | ||
| 292 | |||
| 293 | def characterErrorsUCS4(self, data): | ||
| 294 | for _ in range(len(invalid_unicode_re.findall(data))): | ||
| 295 | self.errors.append("invalid-codepoint") | ||
| 296 | |||
| 297 | def characterErrorsUCS2(self, data): | ||
| 298 | # Someone picked the wrong compile option | ||
| 299 | # You lose | ||
| 300 | skip = False | ||
| 301 | for match in invalid_unicode_re.finditer(data): | ||
| 302 | if skip: | ||
| 303 | continue | ||
| 304 | codepoint = ord(match.group()) | ||
| 305 | pos = match.start() | ||
| 306 | # Pretty sure there should be endianness issues here | ||
| 307 | if _utils.isSurrogatePair(data[pos:pos + 2]): | ||
| 308 | # We have a surrogate pair! | ||
| 309 | char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2]) | ||
| 310 | if char_val in non_bmp_invalid_codepoints: | ||
| 311 | self.errors.append("invalid-codepoint") | ||
| 312 | skip = True | ||
| 313 | elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and | ||
| 314 | pos == len(data) - 1): | ||
| 315 | self.errors.append("invalid-codepoint") | ||
| 316 | else: | ||
| 317 | skip = False | ||
| 318 | self.errors.append("invalid-codepoint") | ||
| 319 | |||
| 320 | def charsUntil(self, characters, opposite=False): | ||
| 321 | """ Returns a string of characters from the stream up to but not | ||
| 322 | including any character in 'characters' or EOF. 'characters' must be | ||
| 323 | a container that supports the 'in' method and iteration over its | ||
| 324 | characters. | ||
| 325 | """ | ||
| 326 | |||
| 327 | # Use a cache of regexps to find the required characters | ||
| 328 | try: | ||
| 329 | chars = charsUntilRegEx[(characters, opposite)] | ||
| 330 | except KeyError: | ||
| 331 | if __debug__: | ||
| 332 | for c in characters: | ||
| 333 | assert(ord(c) < 128) | ||
| 334 | regex = "".join(["\\x%02x" % ord(c) for c in characters]) | ||
| 335 | if not opposite: | ||
| 336 | regex = "^%s" % regex | ||
| 337 | chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex) | ||
| 338 | |||
| 339 | rv = [] | ||
| 340 | |||
| 341 | while True: | ||
| 342 | # Find the longest matching prefix | ||
| 343 | m = chars.match(self.chunk, self.chunkOffset) | ||
| 344 | if m is None: | ||
| 345 | # If nothing matched, and it wasn't because we ran out of chunk, | ||
| 346 | # then stop | ||
| 347 | if self.chunkOffset != self.chunkSize: | ||
| 348 | break | ||
| 349 | else: | ||
| 350 | end = m.end() | ||
| 351 | # If not the whole chunk matched, return everything | ||
| 352 | # up to the part that didn't match | ||
| 353 | if end != self.chunkSize: | ||
| 354 | rv.append(self.chunk[self.chunkOffset:end]) | ||
| 355 | self.chunkOffset = end | ||
| 356 | break | ||
| 357 | # If the whole remainder of the chunk matched, | ||
| 358 | # use it all and read the next chunk | ||
| 359 | rv.append(self.chunk[self.chunkOffset:]) | ||
| 360 | if not self.readChunk(): | ||
| 361 | # Reached EOF | ||
| 362 | break | ||
| 363 | |||
| 364 | r = "".join(rv) | ||
| 365 | return r | ||
| 366 | |||
| 367 | def unget(self, char): | ||
| 368 | # Only one character is allowed to be ungotten at once - it must | ||
| 369 | # be consumed again before any further call to unget | ||
| 370 | if char is not None: | ||
| 371 | if self.chunkOffset == 0: | ||
| 372 | # unget is called quite rarely, so it's a good idea to do | ||
| 373 | # more work here if it saves a bit of work in the frequently | ||
| 374 | # called char and charsUntil. | ||
| 375 | # So, just prepend the ungotten character onto the current | ||
| 376 | # chunk: | ||
| 377 | self.chunk = char + self.chunk | ||
| 378 | self.chunkSize += 1 | ||
| 379 | else: | ||
| 380 | self.chunkOffset -= 1 | ||
| 381 | assert self.chunk[self.chunkOffset] == char | ||
| 382 | |||
| 383 | |||
| 384 | class HTMLBinaryInputStream(HTMLUnicodeInputStream): | ||
| 385 | """Provides a unicode stream of characters to the HTMLTokenizer. | ||
| 386 | |||
| 387 | This class takes care of character encoding and removing or replacing | ||
| 388 | incorrect byte-sequences and also provides column and line tracking. | ||
| 389 | |||
| 390 | """ | ||
| 391 | |||
| 392 | def __init__(self, source, override_encoding=None, transport_encoding=None, | ||
| 393 | same_origin_parent_encoding=None, likely_encoding=None, | ||
| 394 | default_encoding="windows-1252", useChardet=True): | ||
| 395 | """Initialises the HTMLInputStream. | ||
| 396 | |||
| 397 | HTMLInputStream(source, [encoding]) -> Normalized stream from source | ||
| 398 | for use by html5lib. | ||
| 399 | |||
| 400 | source can be either a file-object, local filename or a string. | ||
| 401 | |||
| 402 | The optional encoding parameter must be a string that indicates | ||
| 403 | the encoding. If specified, that encoding will be used, | ||
| 404 | regardless of any BOM or later declaration (such as in a meta | ||
| 405 | element) | ||
| 406 | |||
| 407 | """ | ||
| 408 | # Raw Stream - for unicode objects this will encode to utf-8 and set | ||
| 409 | # self.charEncoding as appropriate | ||
| 410 | self.rawStream = self.openStream(source) | ||
| 411 | |||
| 412 | HTMLUnicodeInputStream.__init__(self, self.rawStream) | ||
| 413 | |||
| 414 | # Encoding Information | ||
| 415 | # Number of bytes to use when looking for a meta element with | ||
| 416 | # encoding information | ||
| 417 | self.numBytesMeta = 1024 | ||
| 418 | # Number of bytes to use when using detecting encoding using chardet | ||
| 419 | self.numBytesChardet = 100 | ||
| 420 | # Things from args | ||
| 421 | self.override_encoding = override_encoding | ||
| 422 | self.transport_encoding = transport_encoding | ||
| 423 | self.same_origin_parent_encoding = same_origin_parent_encoding | ||
| 424 | self.likely_encoding = likely_encoding | ||
| 425 | self.default_encoding = default_encoding | ||
| 426 | |||
| 427 | # Determine encoding | ||
| 428 | self.charEncoding = self.determineEncoding(useChardet) | ||
| 429 | assert self.charEncoding[0] is not None | ||
| 430 | |||
| 431 | # Call superclass | ||
| 432 | self.reset() | ||
| 433 | |||
| 434 | def reset(self): | ||
| 435 | self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace') | ||
| 436 | HTMLUnicodeInputStream.reset(self) | ||
| 437 | |||
| 438 | def openStream(self, source): | ||
| 439 | """Produces a file object from source. | ||
| 440 | |||
| 441 | source can be either a file object, local filename or a string. | ||
| 442 | |||
| 443 | """ | ||
| 444 | # Already a file object | ||
| 445 | if hasattr(source, 'read'): | ||
| 446 | stream = source | ||
| 447 | else: | ||
| 448 | stream = BytesIO(source) | ||
| 449 | |||
| 450 | try: | ||
| 451 | stream.seek(stream.tell()) | ||
| 452 | except: # pylint:disable=bare-except | ||
| 453 | stream = BufferedStream(stream) | ||
| 454 | |||
| 455 | return stream | ||
| 456 | |||
| 457 | def determineEncoding(self, chardet=True): | ||
| 458 | # BOMs take precedence over everything | ||
| 459 | # This will also read past the BOM if present | ||
| 460 | charEncoding = self.detectBOM(), "certain" | ||
| 461 | if charEncoding[0] is not None: | ||
| 462 | return charEncoding | ||
| 463 | |||
| 464 | # If we've been overriden, we've been overriden | ||
| 465 | charEncoding = lookupEncoding(self.override_encoding), "certain" | ||
| 466 | if charEncoding[0] is not None: | ||
| 467 | return charEncoding | ||
| 468 | |||
| 469 | # Now check the transport layer | ||
| 470 | charEncoding = lookupEncoding(self.transport_encoding), "certain" | ||
| 471 | if charEncoding[0] is not None: | ||
| 472 | return charEncoding | ||
| 473 | |||
| 474 | # Look for meta elements with encoding information | ||
| 475 | charEncoding = self.detectEncodingMeta(), "tentative" | ||
| 476 | if charEncoding[0] is not None: | ||
| 477 | return charEncoding | ||
| 478 | |||
| 479 | # Parent document encoding | ||
| 480 | charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative" | ||
| 481 | if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"): | ||
| 482 | return charEncoding | ||
| 483 | |||
| 484 | # "likely" encoding | ||
| 485 | charEncoding = lookupEncoding(self.likely_encoding), "tentative" | ||
| 486 | if charEncoding[0] is not None: | ||
| 487 | return charEncoding | ||
| 488 | |||
| 489 | # Guess with chardet, if available | ||
| 490 | if chardet: | ||
| 491 | try: | ||
| 492 | from pip._vendor.chardet.universaldetector import UniversalDetector | ||
| 493 | except ImportError: | ||
| 494 | pass | ||
| 495 | else: | ||
| 496 | buffers = [] | ||
| 497 | detector = UniversalDetector() | ||
| 498 | while not detector.done: | ||
| 499 | buffer = self.rawStream.read(self.numBytesChardet) | ||
| 500 | assert isinstance(buffer, bytes) | ||
| 501 | if not buffer: | ||
| 502 | break | ||
| 503 | buffers.append(buffer) | ||
| 504 | detector.feed(buffer) | ||
| 505 | detector.close() | ||
| 506 | encoding = lookupEncoding(detector.result['encoding']) | ||
| 507 | self.rawStream.seek(0) | ||
| 508 | if encoding is not None: | ||
| 509 | return encoding, "tentative" | ||
| 510 | |||
| 511 | # Try the default encoding | ||
| 512 | charEncoding = lookupEncoding(self.default_encoding), "tentative" | ||
| 513 | if charEncoding[0] is not None: | ||
| 514 | return charEncoding | ||
| 515 | |||
| 516 | # Fallback to html5lib's default if even that hasn't worked | ||
| 517 | return lookupEncoding("windows-1252"), "tentative" | ||
| 518 | |||
| 519 | def changeEncoding(self, newEncoding): | ||
| 520 | assert self.charEncoding[1] != "certain" | ||
| 521 | newEncoding = lookupEncoding(newEncoding) | ||
| 522 | if newEncoding is None: | ||
| 523 | return | ||
| 524 | if newEncoding.name in ("utf-16be", "utf-16le"): | ||
| 525 | newEncoding = lookupEncoding("utf-8") | ||
| 526 | assert newEncoding is not None | ||
| 527 | elif newEncoding == self.charEncoding[0]: | ||
| 528 | self.charEncoding = (self.charEncoding[0], "certain") | ||
| 529 | else: | ||
| 530 | self.rawStream.seek(0) | ||
| 531 | self.charEncoding = (newEncoding, "certain") | ||
| 532 | self.reset() | ||
| 533 | raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding)) | ||
| 534 | |||
| 535 | def detectBOM(self): | ||
| 536 | """Attempts to detect at BOM at the start of the stream. If | ||
| 537 | an encoding can be determined from the BOM return the name of the | ||
| 538 | encoding otherwise return None""" | ||
| 539 | bomDict = { | ||
| 540 | codecs.BOM_UTF8: 'utf-8', | ||
| 541 | codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be', | ||
| 542 | codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be' | ||
| 543 | } | ||
| 544 | |||
| 545 | # Go to beginning of file and read in 4 bytes | ||
| 546 | string = self.rawStream.read(4) | ||
| 547 | assert isinstance(string, bytes) | ||
| 548 | |||
| 549 | # Try detecting the BOM using bytes from the string | ||
| 550 | encoding = bomDict.get(string[:3]) # UTF-8 | ||
| 551 | seek = 3 | ||
| 552 | if not encoding: | ||
| 553 | # Need to detect UTF-32 before UTF-16 | ||
| 554 | encoding = bomDict.get(string) # UTF-32 | ||
| 555 | seek = 4 | ||
| 556 | if not encoding: | ||
| 557 | encoding = bomDict.get(string[:2]) # UTF-16 | ||
| 558 | seek = 2 | ||
| 559 | |||
| 560 | # Set the read position past the BOM if one was found, otherwise | ||
| 561 | # set it to the start of the stream | ||
| 562 | if encoding: | ||
| 563 | self.rawStream.seek(seek) | ||
| 564 | return lookupEncoding(encoding) | ||
| 565 | else: | ||
| 566 | self.rawStream.seek(0) | ||
| 567 | return None | ||
| 568 | |||
| 569 | def detectEncodingMeta(self): | ||
| 570 | """Report the encoding declared by the meta element | ||
| 571 | """ | ||
| 572 | buffer = self.rawStream.read(self.numBytesMeta) | ||
| 573 | assert isinstance(buffer, bytes) | ||
| 574 | parser = EncodingParser(buffer) | ||
| 575 | self.rawStream.seek(0) | ||
| 576 | encoding = parser.getEncoding() | ||
| 577 | |||
| 578 | if encoding is not None and encoding.name in ("utf-16be", "utf-16le"): | ||
| 579 | encoding = lookupEncoding("utf-8") | ||
| 580 | |||
| 581 | return encoding | ||
| 582 | |||
| 583 | |||
| 584 | class EncodingBytes(bytes): | ||
| 585 | """String-like object with an associated position and various extra methods | ||
| 586 | If the position is ever greater than the string length then an exception is | ||
| 587 | raised""" | ||
| 588 | def __new__(self, value): | ||
| 589 | assert isinstance(value, bytes) | ||
| 590 | return bytes.__new__(self, value.lower()) | ||
| 591 | |||
| 592 | def __init__(self, value): | ||
| 593 | # pylint:disable=unused-argument | ||
| 594 | self._position = -1 | ||
| 595 | |||
| 596 | def __iter__(self): | ||
| 597 | return self | ||
| 598 | |||
| 599 | def __next__(self): | ||
| 600 | p = self._position = self._position + 1 | ||
| 601 | if p >= len(self): | ||
| 602 | raise StopIteration | ||
| 603 | elif p < 0: | ||
| 604 | raise TypeError | ||
| 605 | return self[p:p + 1] | ||
| 606 | |||
| 607 | def next(self): | ||
| 608 | # Py2 compat | ||
| 609 | return self.__next__() | ||
| 610 | |||
| 611 | def previous(self): | ||
| 612 | p = self._position | ||
| 613 | if p >= len(self): | ||
| 614 | raise StopIteration | ||
| 615 | elif p < 0: | ||
| 616 | raise TypeError | ||
| 617 | self._position = p = p - 1 | ||
| 618 | return self[p:p + 1] | ||
| 619 | |||
| 620 | def setPosition(self, position): | ||
| 621 | if self._position >= len(self): | ||
| 622 | raise StopIteration | ||
| 623 | self._position = position | ||
| 624 | |||
| 625 | def getPosition(self): | ||
| 626 | if self._position >= len(self): | ||
| 627 | raise StopIteration | ||
| 628 | if self._position >= 0: | ||
| 629 | return self._position | ||
| 630 | else: | ||
| 631 | return None | ||
| 632 | |||
| 633 | position = property(getPosition, setPosition) | ||
| 634 | |||
| 635 | def getCurrentByte(self): | ||
| 636 | return self[self.position:self.position + 1] | ||
| 637 | |||
| 638 | currentByte = property(getCurrentByte) | ||
| 639 | |||
| 640 | def skip(self, chars=spaceCharactersBytes): | ||
| 641 | """Skip past a list of characters""" | ||
| 642 | p = self.position # use property for the error-checking | ||
| 643 | while p < len(self): | ||
| 644 | c = self[p:p + 1] | ||
| 645 | if c not in chars: | ||
| 646 | self._position = p | ||
| 647 | return c | ||
| 648 | p += 1 | ||
| 649 | self._position = p | ||
| 650 | return None | ||
| 651 | |||
| 652 | def skipUntil(self, chars): | ||
| 653 | p = self.position | ||
| 654 | while p < len(self): | ||
| 655 | c = self[p:p + 1] | ||
| 656 | if c in chars: | ||
| 657 | self._position = p | ||
| 658 | return c | ||
| 659 | p += 1 | ||
| 660 | self._position = p | ||
| 661 | return None | ||
| 662 | |||
| 663 | def matchBytes(self, bytes): | ||
| 664 | """Look for a sequence of bytes at the start of a string. If the bytes | ||
| 665 | are found return True and advance the position to the byte after the | ||
| 666 | match. Otherwise return False and leave the position alone""" | ||
| 667 | p = self.position | ||
| 668 | data = self[p:p + len(bytes)] | ||
| 669 | rv = data.startswith(bytes) | ||
| 670 | if rv: | ||
| 671 | self.position += len(bytes) | ||
| 672 | return rv | ||
| 673 | |||
| 674 | def jumpTo(self, bytes): | ||
| 675 | """Look for the next sequence of bytes matching a given sequence. If | ||
| 676 | a match is found advance the position to the last byte of the match""" | ||
| 677 | newPosition = self[self.position:].find(bytes) | ||
| 678 | if newPosition > -1: | ||
| 679 | # XXX: This is ugly, but I can't see a nicer way to fix this. | ||
| 680 | if self._position == -1: | ||
| 681 | self._position = 0 | ||
| 682 | self._position += (newPosition + len(bytes) - 1) | ||
| 683 | return True | ||
| 684 | else: | ||
| 685 | raise StopIteration | ||
| 686 | |||
| 687 | |||
| 688 | class EncodingParser(object): | ||
| 689 | """Mini parser for detecting character encoding from meta elements""" | ||
| 690 | |||
| 691 | def __init__(self, data): | ||
| 692 | """string - the data to work on for encoding detection""" | ||
| 693 | self.data = EncodingBytes(data) | ||
| 694 | self.encoding = None | ||
| 695 | |||
| 696 | def getEncoding(self): | ||
| 697 | methodDispatch = ( | ||
| 698 | (b"<!--", self.handleComment), | ||
| 699 | (b"<meta", self.handleMeta), | ||
| 700 | (b"</", self.handlePossibleEndTag), | ||
| 701 | (b"<!", self.handleOther), | ||
| 702 | (b"<?", self.handleOther), | ||
| 703 | (b"<", self.handlePossibleStartTag)) | ||
| 704 | for _ in self.data: | ||
| 705 | keepParsing = True | ||
| 706 | for key, method in methodDispatch: | ||
| 707 | if self.data.matchBytes(key): | ||
| 708 | try: | ||
| 709 | keepParsing = method() | ||
| 710 | break | ||
| 711 | except StopIteration: | ||
| 712 | keepParsing = False | ||
| 713 | break | ||
| 714 | if not keepParsing: | ||
| 715 | break | ||
| 716 | |||
| 717 | return self.encoding | ||
| 718 | |||
| 719 | def handleComment(self): | ||
| 720 | """Skip over comments""" | ||
| 721 | return self.data.jumpTo(b"-->") | ||
| 722 | |||
| 723 | def handleMeta(self): | ||
| 724 | if self.data.currentByte not in spaceCharactersBytes: | ||
| 725 | # if we have <meta not followed by a space so just keep going | ||
| 726 | return True | ||
| 727 | # We have a valid meta element we want to search for attributes | ||
| 728 | hasPragma = False | ||
| 729 | pendingEncoding = None | ||
| 730 | while True: | ||
| 731 | # Try to find the next attribute after the current position | ||
| 732 | attr = self.getAttribute() | ||
| 733 | if attr is None: | ||
| 734 | return True | ||
| 735 | else: | ||
| 736 | if attr[0] == b"http-equiv": | ||
| 737 | hasPragma = attr[1] == b"content-type" | ||
| 738 | if hasPragma and pendingEncoding is not None: | ||
| 739 | self.encoding = pendingEncoding | ||
| 740 | return False | ||
| 741 | elif attr[0] == b"charset": | ||
| 742 | tentativeEncoding = attr[1] | ||
| 743 | codec = lookupEncoding(tentativeEncoding) | ||
| 744 | if codec is not None: | ||
| 745 | self.encoding = codec | ||
| 746 | return False | ||
| 747 | elif attr[0] == b"content": | ||
| 748 | contentParser = ContentAttrParser(EncodingBytes(attr[1])) | ||
| 749 | tentativeEncoding = contentParser.parse() | ||
| 750 | if tentativeEncoding is not None: | ||
| 751 | codec = lookupEncoding(tentativeEncoding) | ||
| 752 | if codec is not None: | ||
| 753 | if hasPragma: | ||
| 754 | self.encoding = codec | ||
| 755 | return False | ||
| 756 | else: | ||
| 757 | pendingEncoding = codec | ||
| 758 | |||
| 759 | def handlePossibleStartTag(self): | ||
| 760 | return self.handlePossibleTag(False) | ||
| 761 | |||
| 762 | def handlePossibleEndTag(self): | ||
| 763 | next(self.data) | ||
| 764 | return self.handlePossibleTag(True) | ||
| 765 | |||
| 766 | def handlePossibleTag(self, endTag): | ||
| 767 | data = self.data | ||
| 768 | if data.currentByte not in asciiLettersBytes: | ||
| 769 | # If the next byte is not an ascii letter either ignore this | ||
| 770 | # fragment (possible start tag case) or treat it according to | ||
| 771 | # handleOther | ||
| 772 | if endTag: | ||
| 773 | data.previous() | ||
| 774 | self.handleOther() | ||
| 775 | return True | ||
| 776 | |||
| 777 | c = data.skipUntil(spacesAngleBrackets) | ||
| 778 | if c == b"<": | ||
| 779 | # return to the first step in the overall "two step" algorithm | ||
| 780 | # reprocessing the < byte | ||
| 781 | data.previous() | ||
| 782 | else: | ||
| 783 | # Read all attributes | ||
| 784 | attr = self.getAttribute() | ||
| 785 | while attr is not None: | ||
| 786 | attr = self.getAttribute() | ||
| 787 | return True | ||
| 788 | |||
| 789 | def handleOther(self): | ||
| 790 | return self.data.jumpTo(b">") | ||
| 791 | |||
| 792 | def getAttribute(self): | ||
| 793 | """Return a name,value pair for the next attribute in the stream, | ||
| 794 | if one is found, or None""" | ||
| 795 | data = self.data | ||
| 796 | # Step 1 (skip chars) | ||
| 797 | c = data.skip(spaceCharactersBytes | frozenset([b"/"])) | ||
| 798 | assert c is None or len(c) == 1 | ||
| 799 | # Step 2 | ||
| 800 | if c in (b">", None): | ||
| 801 | return None | ||
| 802 | # Step 3 | ||
| 803 | attrName = [] | ||
| 804 | attrValue = [] | ||
| 805 | # Step 4 attribute name | ||
| 806 | while True: | ||
| 807 | if c == b"=" and attrName: | ||
| 808 | break | ||
| 809 | elif c in spaceCharactersBytes: | ||
| 810 | # Step 6! | ||
| 811 | c = data.skip() | ||
| 812 | break | ||
| 813 | elif c in (b"/", b">"): | ||
| 814 | return b"".join(attrName), b"" | ||
| 815 | elif c in asciiUppercaseBytes: | ||
| 816 | attrName.append(c.lower()) | ||
| 817 | elif c is None: | ||
| 818 | return None | ||
| 819 | else: | ||
| 820 | attrName.append(c) | ||
| 821 | # Step 5 | ||
| 822 | c = next(data) | ||
| 823 | # Step 7 | ||
| 824 | if c != b"=": | ||
| 825 | data.previous() | ||
| 826 | return b"".join(attrName), b"" | ||
| 827 | # Step 8 | ||
| 828 | next(data) | ||
| 829 | # Step 9 | ||
| 830 | c = data.skip() | ||
| 831 | # Step 10 | ||
| 832 | if c in (b"'", b'"'): | ||
| 833 | # 10.1 | ||
| 834 | quoteChar = c | ||
| 835 | while True: | ||
| 836 | # 10.2 | ||
| 837 | c = next(data) | ||
| 838 | # 10.3 | ||
| 839 | if c == quoteChar: | ||
| 840 | next(data) | ||
| 841 | return b"".join(attrName), b"".join(attrValue) | ||
| 842 | # 10.4 | ||
| 843 | elif c in asciiUppercaseBytes: | ||
| 844 | attrValue.append(c.lower()) | ||
| 845 | # 10.5 | ||
| 846 | else: | ||
| 847 | attrValue.append(c) | ||
| 848 | elif c == b">": | ||
| 849 | return b"".join(attrName), b"" | ||
| 850 | elif c in asciiUppercaseBytes: | ||
| 851 | attrValue.append(c.lower()) | ||
| 852 | elif c is None: | ||
| 853 | return None | ||
| 854 | else: | ||
| 855 | attrValue.append(c) | ||
| 856 | # Step 11 | ||
| 857 | while True: | ||
| 858 | c = next(data) | ||
| 859 | if c in spacesAngleBrackets: | ||
| 860 | return b"".join(attrName), b"".join(attrValue) | ||
| 861 | elif c in asciiUppercaseBytes: | ||
| 862 | attrValue.append(c.lower()) | ||
| 863 | elif c is None: | ||
| 864 | return None | ||
| 865 | else: | ||
| 866 | attrValue.append(c) | ||
| 867 | |||
| 868 | |||
| 869 | class ContentAttrParser(object): | ||
| 870 | def __init__(self, data): | ||
| 871 | assert isinstance(data, bytes) | ||
| 872 | self.data = data | ||
| 873 | |||
| 874 | def parse(self): | ||
| 875 | try: | ||
| 876 | # Check if the attr name is charset | ||
| 877 | # otherwise return | ||
| 878 | self.data.jumpTo(b"charset") | ||
| 879 | self.data.position += 1 | ||
| 880 | self.data.skip() | ||
| 881 | if not self.data.currentByte == b"=": | ||
| 882 | # If there is no = sign keep looking for attrs | ||
| 883 | return None | ||
| 884 | self.data.position += 1 | ||
| 885 | self.data.skip() | ||
| 886 | # Look for an encoding between matching quote marks | ||
| 887 | if self.data.currentByte in (b'"', b"'"): | ||
| 888 | quoteMark = self.data.currentByte | ||
| 889 | self.data.position += 1 | ||
| 890 | oldPosition = self.data.position | ||
| 891 | if self.data.jumpTo(quoteMark): | ||
| 892 | return self.data[oldPosition:self.data.position] | ||
| 893 | else: | ||
| 894 | return None | ||
| 895 | else: | ||
| 896 | # Unquoted value | ||
| 897 | oldPosition = self.data.position | ||
| 898 | try: | ||
| 899 | self.data.skipUntil(spaceCharactersBytes) | ||
| 900 | return self.data[oldPosition:self.data.position] | ||
| 901 | except StopIteration: | ||
| 902 | # Return the whole remaining value | ||
| 903 | return self.data[oldPosition:] | ||
| 904 | except StopIteration: | ||
| 905 | return None | ||
| 906 | |||
| 907 | |||
| 908 | def lookupEncoding(encoding): | ||
| 909 | """Return the python codec name corresponding to an encoding or None if the | ||
| 910 | string doesn't correspond to a valid encoding.""" | ||
| 911 | if isinstance(encoding, binary_type): | ||
| 912 | try: | ||
| 913 | encoding = encoding.decode("ascii") | ||
| 914 | except UnicodeDecodeError: | ||
| 915 | return None | ||
| 916 | |||
| 917 | if encoding is not None: | ||
| 918 | try: | ||
| 919 | return webencodings.lookup(encoding) | ||
| 920 | except AttributeError: | ||
| 921 | return None | ||
| 922 | else: | ||
| 923 | return None | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_tokenizer.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_tokenizer.py new file mode 100644 index 0000000..ef1ccf8 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_tokenizer.py | |||
| @@ -0,0 +1,1721 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | from pip._vendor.six import unichr as chr | ||
| 4 | |||
| 5 | from collections import deque | ||
| 6 | |||
| 7 | from .constants import spaceCharacters | ||
| 8 | from .constants import entities | ||
| 9 | from .constants import asciiLetters, asciiUpper2Lower | ||
| 10 | from .constants import digits, hexDigits, EOF | ||
| 11 | from .constants import tokenTypes, tagTokenTypes | ||
| 12 | from .constants import replacementCharacters | ||
| 13 | |||
| 14 | from ._inputstream import HTMLInputStream | ||
| 15 | |||
| 16 | from ._trie import Trie | ||
| 17 | |||
| 18 | entitiesTrie = Trie(entities) | ||
| 19 | |||
| 20 | |||
| 21 | class HTMLTokenizer(object): | ||
| 22 | """ This class takes care of tokenizing HTML. | ||
| 23 | |||
| 24 | * self.currentToken | ||
| 25 | Holds the token that is currently being processed. | ||
| 26 | |||
| 27 | * self.state | ||
| 28 | Holds a reference to the method to be invoked... XXX | ||
| 29 | |||
| 30 | * self.stream | ||
| 31 | Points to HTMLInputStream object. | ||
| 32 | """ | ||
| 33 | |||
| 34 | def __init__(self, stream, parser=None, **kwargs): | ||
| 35 | |||
| 36 | self.stream = HTMLInputStream(stream, **kwargs) | ||
| 37 | self.parser = parser | ||
| 38 | |||
| 39 | # Setup the initial tokenizer state | ||
| 40 | self.escapeFlag = False | ||
| 41 | self.lastFourChars = [] | ||
| 42 | self.state = self.dataState | ||
| 43 | self.escape = False | ||
| 44 | |||
| 45 | # The current token being created | ||
| 46 | self.currentToken = None | ||
| 47 | super(HTMLTokenizer, self).__init__() | ||
| 48 | |||
| 49 | def __iter__(self): | ||
| 50 | """ This is where the magic happens. | ||
| 51 | |||
| 52 | We do our usually processing through the states and when we have a token | ||
| 53 | to return we yield the token which pauses processing until the next token | ||
| 54 | is requested. | ||
| 55 | """ | ||
| 56 | self.tokenQueue = deque([]) | ||
| 57 | # Start processing. When EOF is reached self.state will return False | ||
| 58 | # instead of True and the loop will terminate. | ||
| 59 | while self.state(): | ||
| 60 | while self.stream.errors: | ||
| 61 | yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)} | ||
| 62 | while self.tokenQueue: | ||
| 63 | yield self.tokenQueue.popleft() | ||
| 64 | |||
| 65 | def consumeNumberEntity(self, isHex): | ||
| 66 | """This function returns either U+FFFD or the character based on the | ||
| 67 | decimal or hexadecimal representation. It also discards ";" if present. | ||
| 68 | If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked. | ||
| 69 | """ | ||
| 70 | |||
| 71 | allowed = digits | ||
| 72 | radix = 10 | ||
| 73 | if isHex: | ||
| 74 | allowed = hexDigits | ||
| 75 | radix = 16 | ||
| 76 | |||
| 77 | charStack = [] | ||
| 78 | |||
| 79 | # Consume all the characters that are in range while making sure we | ||
| 80 | # don't hit an EOF. | ||
| 81 | c = self.stream.char() | ||
| 82 | while c in allowed and c is not EOF: | ||
| 83 | charStack.append(c) | ||
| 84 | c = self.stream.char() | ||
| 85 | |||
| 86 | # Convert the set of characters consumed to an int. | ||
| 87 | charAsInt = int("".join(charStack), radix) | ||
| 88 | |||
| 89 | # Certain characters get replaced with others | ||
| 90 | if charAsInt in replacementCharacters: | ||
| 91 | char = replacementCharacters[charAsInt] | ||
| 92 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 93 | "illegal-codepoint-for-numeric-entity", | ||
| 94 | "datavars": {"charAsInt": charAsInt}}) | ||
| 95 | elif ((0xD800 <= charAsInt <= 0xDFFF) or | ||
| 96 | (charAsInt > 0x10FFFF)): | ||
| 97 | char = "\uFFFD" | ||
| 98 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 99 | "illegal-codepoint-for-numeric-entity", | ||
| 100 | "datavars": {"charAsInt": charAsInt}}) | ||
| 101 | else: | ||
| 102 | # Should speed up this check somehow (e.g. move the set to a constant) | ||
| 103 | if ((0x0001 <= charAsInt <= 0x0008) or | ||
| 104 | (0x000E <= charAsInt <= 0x001F) or | ||
| 105 | (0x007F <= charAsInt <= 0x009F) or | ||
| 106 | (0xFDD0 <= charAsInt <= 0xFDEF) or | ||
| 107 | charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, | ||
| 108 | 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, | ||
| 109 | 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, | ||
| 110 | 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, | ||
| 111 | 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, | ||
| 112 | 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, | ||
| 113 | 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, | ||
| 114 | 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, | ||
| 115 | 0xFFFFF, 0x10FFFE, 0x10FFFF])): | ||
| 116 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 117 | "data": | ||
| 118 | "illegal-codepoint-for-numeric-entity", | ||
| 119 | "datavars": {"charAsInt": charAsInt}}) | ||
| 120 | try: | ||
| 121 | # Try/except needed as UCS-2 Python builds' unichar only works | ||
| 122 | # within the BMP. | ||
| 123 | char = chr(charAsInt) | ||
| 124 | except ValueError: | ||
| 125 | v = charAsInt - 0x10000 | ||
| 126 | char = chr(0xD800 | (v >> 10)) + chr(0xDC00 | (v & 0x3FF)) | ||
| 127 | |||
| 128 | # Discard the ; if present. Otherwise, put it back on the queue and | ||
| 129 | # invoke parseError on parser. | ||
| 130 | if c != ";": | ||
| 131 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 132 | "numeric-entity-without-semicolon"}) | ||
| 133 | self.stream.unget(c) | ||
| 134 | |||
| 135 | return char | ||
| 136 | |||
| 137 | def consumeEntity(self, allowedChar=None, fromAttribute=False): | ||
| 138 | # Initialise to the default output for when no entity is matched | ||
| 139 | output = "&" | ||
| 140 | |||
| 141 | charStack = [self.stream.char()] | ||
| 142 | if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or | ||
| 143 | (allowedChar is not None and allowedChar == charStack[0])): | ||
| 144 | self.stream.unget(charStack[0]) | ||
| 145 | |||
| 146 | elif charStack[0] == "#": | ||
| 147 | # Read the next character to see if it's hex or decimal | ||
| 148 | hex = False | ||
| 149 | charStack.append(self.stream.char()) | ||
| 150 | if charStack[-1] in ("x", "X"): | ||
| 151 | hex = True | ||
| 152 | charStack.append(self.stream.char()) | ||
| 153 | |||
| 154 | # charStack[-1] should be the first digit | ||
| 155 | if (hex and charStack[-1] in hexDigits) \ | ||
| 156 | or (not hex and charStack[-1] in digits): | ||
| 157 | # At least one digit found, so consume the whole number | ||
| 158 | self.stream.unget(charStack[-1]) | ||
| 159 | output = self.consumeNumberEntity(hex) | ||
| 160 | else: | ||
| 161 | # No digits found | ||
| 162 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 163 | "data": "expected-numeric-entity"}) | ||
| 164 | self.stream.unget(charStack.pop()) | ||
| 165 | output = "&" + "".join(charStack) | ||
| 166 | |||
| 167 | else: | ||
| 168 | # At this point in the process might have named entity. Entities | ||
| 169 | # are stored in the global variable "entities". | ||
| 170 | # | ||
| 171 | # Consume characters and compare to these to a substring of the | ||
| 172 | # entity names in the list until the substring no longer matches. | ||
| 173 | while (charStack[-1] is not EOF): | ||
| 174 | if not entitiesTrie.has_keys_with_prefix("".join(charStack)): | ||
| 175 | break | ||
| 176 | charStack.append(self.stream.char()) | ||
| 177 | |||
| 178 | # At this point we have a string that starts with some characters | ||
| 179 | # that may match an entity | ||
| 180 | # Try to find the longest entity the string will match to take care | ||
| 181 | # of ¬i for instance. | ||
| 182 | try: | ||
| 183 | entityName = entitiesTrie.longest_prefix("".join(charStack[:-1])) | ||
| 184 | entityLength = len(entityName) | ||
| 185 | except KeyError: | ||
| 186 | entityName = None | ||
| 187 | |||
| 188 | if entityName is not None: | ||
| 189 | if entityName[-1] != ";": | ||
| 190 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 191 | "named-entity-without-semicolon"}) | ||
| 192 | if (entityName[-1] != ";" and fromAttribute and | ||
| 193 | (charStack[entityLength] in asciiLetters or | ||
| 194 | charStack[entityLength] in digits or | ||
| 195 | charStack[entityLength] == "=")): | ||
| 196 | self.stream.unget(charStack.pop()) | ||
| 197 | output = "&" + "".join(charStack) | ||
| 198 | else: | ||
| 199 | output = entities[entityName] | ||
| 200 | self.stream.unget(charStack.pop()) | ||
| 201 | output += "".join(charStack[entityLength:]) | ||
| 202 | else: | ||
| 203 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 204 | "expected-named-entity"}) | ||
| 205 | self.stream.unget(charStack.pop()) | ||
| 206 | output = "&" + "".join(charStack) | ||
| 207 | |||
| 208 | if fromAttribute: | ||
| 209 | self.currentToken["data"][-1][1] += output | ||
| 210 | else: | ||
| 211 | if output in spaceCharacters: | ||
| 212 | tokenType = "SpaceCharacters" | ||
| 213 | else: | ||
| 214 | tokenType = "Characters" | ||
| 215 | self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output}) | ||
| 216 | |||
| 217 | def processEntityInAttribute(self, allowedChar): | ||
| 218 | """This method replaces the need for "entityInAttributeValueState". | ||
| 219 | """ | ||
| 220 | self.consumeEntity(allowedChar=allowedChar, fromAttribute=True) | ||
| 221 | |||
| 222 | def emitCurrentToken(self): | ||
| 223 | """This method is a generic handler for emitting the tags. It also sets | ||
| 224 | the state to "data" because that's what's needed after a token has been | ||
| 225 | emitted. | ||
| 226 | """ | ||
| 227 | token = self.currentToken | ||
| 228 | # Add token to the queue to be yielded | ||
| 229 | if (token["type"] in tagTokenTypes): | ||
| 230 | token["name"] = token["name"].translate(asciiUpper2Lower) | ||
| 231 | if token["type"] == tokenTypes["EndTag"]: | ||
| 232 | if token["data"]: | ||
| 233 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 234 | "data": "attributes-in-end-tag"}) | ||
| 235 | if token["selfClosing"]: | ||
| 236 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 237 | "data": "self-closing-flag-on-end-tag"}) | ||
| 238 | self.tokenQueue.append(token) | ||
| 239 | self.state = self.dataState | ||
| 240 | |||
| 241 | # Below are the various tokenizer states worked out. | ||
| 242 | def dataState(self): | ||
| 243 | data = self.stream.char() | ||
| 244 | if data == "&": | ||
| 245 | self.state = self.entityDataState | ||
| 246 | elif data == "<": | ||
| 247 | self.state = self.tagOpenState | ||
| 248 | elif data == "\u0000": | ||
| 249 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 250 | "data": "invalid-codepoint"}) | ||
| 251 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
| 252 | "data": "\u0000"}) | ||
| 253 | elif data is EOF: | ||
| 254 | # Tokenization ends. | ||
| 255 | return False | ||
| 256 | elif data in spaceCharacters: | ||
| 257 | # Directly after emitting a token you switch back to the "data | ||
| 258 | # state". At that point spaceCharacters are important so they are | ||
| 259 | # emitted separately. | ||
| 260 | self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": | ||
| 261 | data + self.stream.charsUntil(spaceCharacters, True)}) | ||
| 262 | # No need to update lastFourChars here, since the first space will | ||
| 263 | # have already been appended to lastFourChars and will have broken | ||
| 264 | # any <!-- or --> sequences | ||
| 265 | else: | ||
| 266 | chars = self.stream.charsUntil(("&", "<", "\u0000")) | ||
| 267 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | ||
| 268 | data + chars}) | ||
| 269 | return True | ||
| 270 | |||
| 271 | def entityDataState(self): | ||
| 272 | self.consumeEntity() | ||
| 273 | self.state = self.dataState | ||
| 274 | return True | ||
| 275 | |||
| 276 | def rcdataState(self): | ||
| 277 | data = self.stream.char() | ||
| 278 | if data == "&": | ||
| 279 | self.state = self.characterReferenceInRcdata | ||
| 280 | elif data == "<": | ||
| 281 | self.state = self.rcdataLessThanSignState | ||
| 282 | elif data == EOF: | ||
| 283 | # Tokenization ends. | ||
| 284 | return False | ||
| 285 | elif data == "\u0000": | ||
| 286 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 287 | "data": "invalid-codepoint"}) | ||
| 288 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
| 289 | "data": "\uFFFD"}) | ||
| 290 | elif data in spaceCharacters: | ||
| 291 | # Directly after emitting a token you switch back to the "data | ||
| 292 | # state". At that point spaceCharacters are important so they are | ||
| 293 | # emitted separately. | ||
| 294 | self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": | ||
| 295 | data + self.stream.charsUntil(spaceCharacters, True)}) | ||
| 296 | # No need to update lastFourChars here, since the first space will | ||
| 297 | # have already been appended to lastFourChars and will have broken | ||
| 298 | # any <!-- or --> sequences | ||
| 299 | else: | ||
| 300 | chars = self.stream.charsUntil(("&", "<", "\u0000")) | ||
| 301 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | ||
| 302 | data + chars}) | ||
| 303 | return True | ||
| 304 | |||
| 305 | def characterReferenceInRcdata(self): | ||
| 306 | self.consumeEntity() | ||
| 307 | self.state = self.rcdataState | ||
| 308 | return True | ||
| 309 | |||
| 310 | def rawtextState(self): | ||
| 311 | data = self.stream.char() | ||
| 312 | if data == "<": | ||
| 313 | self.state = self.rawtextLessThanSignState | ||
| 314 | elif data == "\u0000": | ||
| 315 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 316 | "data": "invalid-codepoint"}) | ||
| 317 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
| 318 | "data": "\uFFFD"}) | ||
| 319 | elif data == EOF: | ||
| 320 | # Tokenization ends. | ||
| 321 | return False | ||
| 322 | else: | ||
| 323 | chars = self.stream.charsUntil(("<", "\u0000")) | ||
| 324 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | ||
| 325 | data + chars}) | ||
| 326 | return True | ||
| 327 | |||
| 328 | def scriptDataState(self): | ||
| 329 | data = self.stream.char() | ||
| 330 | if data == "<": | ||
| 331 | self.state = self.scriptDataLessThanSignState | ||
| 332 | elif data == "\u0000": | ||
| 333 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 334 | "data": "invalid-codepoint"}) | ||
| 335 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
| 336 | "data": "\uFFFD"}) | ||
| 337 | elif data == EOF: | ||
| 338 | # Tokenization ends. | ||
| 339 | return False | ||
| 340 | else: | ||
| 341 | chars = self.stream.charsUntil(("<", "\u0000")) | ||
| 342 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | ||
| 343 | data + chars}) | ||
| 344 | return True | ||
| 345 | |||
| 346 | def plaintextState(self): | ||
| 347 | data = self.stream.char() | ||
| 348 | if data == EOF: | ||
| 349 | # Tokenization ends. | ||
| 350 | return False | ||
| 351 | elif data == "\u0000": | ||
| 352 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 353 | "data": "invalid-codepoint"}) | ||
| 354 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
| 355 | "data": "\uFFFD"}) | ||
| 356 | else: | ||
| 357 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | ||
| 358 | data + self.stream.charsUntil("\u0000")}) | ||
| 359 | return True | ||
| 360 | |||
| 361 | def tagOpenState(self): | ||
| 362 | data = self.stream.char() | ||
| 363 | if data == "!": | ||
| 364 | self.state = self.markupDeclarationOpenState | ||
| 365 | elif data == "/": | ||
| 366 | self.state = self.closeTagOpenState | ||
| 367 | elif data in asciiLetters: | ||
| 368 | self.currentToken = {"type": tokenTypes["StartTag"], | ||
| 369 | "name": data, "data": [], | ||
| 370 | "selfClosing": False, | ||
| 371 | "selfClosingAcknowledged": False} | ||
| 372 | self.state = self.tagNameState | ||
| 373 | elif data == ">": | ||
| 374 | # XXX In theory it could be something besides a tag name. But | ||
| 375 | # do we really care? | ||
| 376 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 377 | "expected-tag-name-but-got-right-bracket"}) | ||
| 378 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"}) | ||
| 379 | self.state = self.dataState | ||
| 380 | elif data == "?": | ||
| 381 | # XXX In theory it could be something besides a tag name. But | ||
| 382 | # do we really care? | ||
| 383 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 384 | "expected-tag-name-but-got-question-mark"}) | ||
| 385 | self.stream.unget(data) | ||
| 386 | self.state = self.bogusCommentState | ||
| 387 | else: | ||
| 388 | # XXX | ||
| 389 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 390 | "expected-tag-name"}) | ||
| 391 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | ||
| 392 | self.stream.unget(data) | ||
| 393 | self.state = self.dataState | ||
| 394 | return True | ||
| 395 | |||
| 396 | def closeTagOpenState(self): | ||
| 397 | data = self.stream.char() | ||
| 398 | if data in asciiLetters: | ||
| 399 | self.currentToken = {"type": tokenTypes["EndTag"], "name": data, | ||
| 400 | "data": [], "selfClosing": False} | ||
| 401 | self.state = self.tagNameState | ||
| 402 | elif data == ">": | ||
| 403 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 404 | "expected-closing-tag-but-got-right-bracket"}) | ||
| 405 | self.state = self.dataState | ||
| 406 | elif data is EOF: | ||
| 407 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 408 | "expected-closing-tag-but-got-eof"}) | ||
| 409 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) | ||
| 410 | self.state = self.dataState | ||
| 411 | else: | ||
| 412 | # XXX data can be _'_... | ||
| 413 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 414 | "expected-closing-tag-but-got-char", | ||
| 415 | "datavars": {"data": data}}) | ||
| 416 | self.stream.unget(data) | ||
| 417 | self.state = self.bogusCommentState | ||
| 418 | return True | ||
| 419 | |||
| 420 | def tagNameState(self): | ||
| 421 | data = self.stream.char() | ||
| 422 | if data in spaceCharacters: | ||
| 423 | self.state = self.beforeAttributeNameState | ||
| 424 | elif data == ">": | ||
| 425 | self.emitCurrentToken() | ||
| 426 | elif data is EOF: | ||
| 427 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 428 | "eof-in-tag-name"}) | ||
| 429 | self.state = self.dataState | ||
| 430 | elif data == "/": | ||
| 431 | self.state = self.selfClosingStartTagState | ||
| 432 | elif data == "\u0000": | ||
| 433 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 434 | "data": "invalid-codepoint"}) | ||
| 435 | self.currentToken["name"] += "\uFFFD" | ||
| 436 | else: | ||
| 437 | self.currentToken["name"] += data | ||
| 438 | # (Don't use charsUntil here, because tag names are | ||
| 439 | # very short and it's faster to not do anything fancy) | ||
| 440 | return True | ||
| 441 | |||
| 442 | def rcdataLessThanSignState(self): | ||
| 443 | data = self.stream.char() | ||
| 444 | if data == "/": | ||
| 445 | self.temporaryBuffer = "" | ||
| 446 | self.state = self.rcdataEndTagOpenState | ||
| 447 | else: | ||
| 448 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | ||
| 449 | self.stream.unget(data) | ||
| 450 | self.state = self.rcdataState | ||
| 451 | return True | ||
| 452 | |||
| 453 | def rcdataEndTagOpenState(self): | ||
| 454 | data = self.stream.char() | ||
| 455 | if data in asciiLetters: | ||
| 456 | self.temporaryBuffer += data | ||
| 457 | self.state = self.rcdataEndTagNameState | ||
| 458 | else: | ||
| 459 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) | ||
| 460 | self.stream.unget(data) | ||
| 461 | self.state = self.rcdataState | ||
| 462 | return True | ||
| 463 | |||
| 464 | def rcdataEndTagNameState(self): | ||
| 465 | appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() | ||
| 466 | data = self.stream.char() | ||
| 467 | if data in spaceCharacters and appropriate: | ||
| 468 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
| 469 | "name": self.temporaryBuffer, | ||
| 470 | "data": [], "selfClosing": False} | ||
| 471 | self.state = self.beforeAttributeNameState | ||
| 472 | elif data == "/" and appropriate: | ||
| 473 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
| 474 | "name": self.temporaryBuffer, | ||
| 475 | "data": [], "selfClosing": False} | ||
| 476 | self.state = self.selfClosingStartTagState | ||
| 477 | elif data == ">" and appropriate: | ||
| 478 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
| 479 | "name": self.temporaryBuffer, | ||
| 480 | "data": [], "selfClosing": False} | ||
| 481 | self.emitCurrentToken() | ||
| 482 | self.state = self.dataState | ||
| 483 | elif data in asciiLetters: | ||
| 484 | self.temporaryBuffer += data | ||
| 485 | else: | ||
| 486 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
| 487 | "data": "</" + self.temporaryBuffer}) | ||
| 488 | self.stream.unget(data) | ||
| 489 | self.state = self.rcdataState | ||
| 490 | return True | ||
| 491 | |||
| 492 | def rawtextLessThanSignState(self): | ||
| 493 | data = self.stream.char() | ||
| 494 | if data == "/": | ||
| 495 | self.temporaryBuffer = "" | ||
| 496 | self.state = self.rawtextEndTagOpenState | ||
| 497 | else: | ||
| 498 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | ||
| 499 | self.stream.unget(data) | ||
| 500 | self.state = self.rawtextState | ||
| 501 | return True | ||
| 502 | |||
| 503 | def rawtextEndTagOpenState(self): | ||
| 504 | data = self.stream.char() | ||
| 505 | if data in asciiLetters: | ||
| 506 | self.temporaryBuffer += data | ||
| 507 | self.state = self.rawtextEndTagNameState | ||
| 508 | else: | ||
| 509 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) | ||
| 510 | self.stream.unget(data) | ||
| 511 | self.state = self.rawtextState | ||
| 512 | return True | ||
| 513 | |||
| 514 | def rawtextEndTagNameState(self): | ||
| 515 | appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() | ||
| 516 | data = self.stream.char() | ||
| 517 | if data in spaceCharacters and appropriate: | ||
| 518 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
| 519 | "name": self.temporaryBuffer, | ||
| 520 | "data": [], "selfClosing": False} | ||
| 521 | self.state = self.beforeAttributeNameState | ||
| 522 | elif data == "/" and appropriate: | ||
| 523 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
| 524 | "name": self.temporaryBuffer, | ||
| 525 | "data": [], "selfClosing": False} | ||
| 526 | self.state = self.selfClosingStartTagState | ||
| 527 | elif data == ">" and appropriate: | ||
| 528 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
| 529 | "name": self.temporaryBuffer, | ||
| 530 | "data": [], "selfClosing": False} | ||
| 531 | self.emitCurrentToken() | ||
| 532 | self.state = self.dataState | ||
| 533 | elif data in asciiLetters: | ||
| 534 | self.temporaryBuffer += data | ||
| 535 | else: | ||
| 536 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
| 537 | "data": "</" + self.temporaryBuffer}) | ||
| 538 | self.stream.unget(data) | ||
| 539 | self.state = self.rawtextState | ||
| 540 | return True | ||
| 541 | |||
| 542 | def scriptDataLessThanSignState(self): | ||
| 543 | data = self.stream.char() | ||
| 544 | if data == "/": | ||
| 545 | self.temporaryBuffer = "" | ||
| 546 | self.state = self.scriptDataEndTagOpenState | ||
| 547 | elif data == "!": | ||
| 548 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<!"}) | ||
| 549 | self.state = self.scriptDataEscapeStartState | ||
| 550 | else: | ||
| 551 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | ||
| 552 | self.stream.unget(data) | ||
| 553 | self.state = self.scriptDataState | ||
| 554 | return True | ||
| 555 | |||
| 556 | def scriptDataEndTagOpenState(self): | ||
| 557 | data = self.stream.char() | ||
| 558 | if data in asciiLetters: | ||
| 559 | self.temporaryBuffer += data | ||
| 560 | self.state = self.scriptDataEndTagNameState | ||
| 561 | else: | ||
| 562 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) | ||
| 563 | self.stream.unget(data) | ||
| 564 | self.state = self.scriptDataState | ||
| 565 | return True | ||
| 566 | |||
| 567 | def scriptDataEndTagNameState(self): | ||
| 568 | appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() | ||
| 569 | data = self.stream.char() | ||
| 570 | if data in spaceCharacters and appropriate: | ||
| 571 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
| 572 | "name": self.temporaryBuffer, | ||
| 573 | "data": [], "selfClosing": False} | ||
| 574 | self.state = self.beforeAttributeNameState | ||
| 575 | elif data == "/" and appropriate: | ||
| 576 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
| 577 | "name": self.temporaryBuffer, | ||
| 578 | "data": [], "selfClosing": False} | ||
| 579 | self.state = self.selfClosingStartTagState | ||
| 580 | elif data == ">" and appropriate: | ||
| 581 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
| 582 | "name": self.temporaryBuffer, | ||
| 583 | "data": [], "selfClosing": False} | ||
| 584 | self.emitCurrentToken() | ||
| 585 | self.state = self.dataState | ||
| 586 | elif data in asciiLetters: | ||
| 587 | self.temporaryBuffer += data | ||
| 588 | else: | ||
| 589 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
| 590 | "data": "</" + self.temporaryBuffer}) | ||
| 591 | self.stream.unget(data) | ||
| 592 | self.state = self.scriptDataState | ||
| 593 | return True | ||
| 594 | |||
| 595 | def scriptDataEscapeStartState(self): | ||
| 596 | data = self.stream.char() | ||
| 597 | if data == "-": | ||
| 598 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | ||
| 599 | self.state = self.scriptDataEscapeStartDashState | ||
| 600 | else: | ||
| 601 | self.stream.unget(data) | ||
| 602 | self.state = self.scriptDataState | ||
| 603 | return True | ||
| 604 | |||
| 605 | def scriptDataEscapeStartDashState(self): | ||
| 606 | data = self.stream.char() | ||
| 607 | if data == "-": | ||
| 608 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | ||
| 609 | self.state = self.scriptDataEscapedDashDashState | ||
| 610 | else: | ||
| 611 | self.stream.unget(data) | ||
| 612 | self.state = self.scriptDataState | ||
| 613 | return True | ||
| 614 | |||
| 615 | def scriptDataEscapedState(self): | ||
| 616 | data = self.stream.char() | ||
| 617 | if data == "-": | ||
| 618 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | ||
| 619 | self.state = self.scriptDataEscapedDashState | ||
| 620 | elif data == "<": | ||
| 621 | self.state = self.scriptDataEscapedLessThanSignState | ||
| 622 | elif data == "\u0000": | ||
| 623 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 624 | "data": "invalid-codepoint"}) | ||
| 625 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
| 626 | "data": "\uFFFD"}) | ||
| 627 | elif data == EOF: | ||
| 628 | self.state = self.dataState | ||
| 629 | else: | ||
| 630 | chars = self.stream.charsUntil(("<", "-", "\u0000")) | ||
| 631 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | ||
| 632 | data + chars}) | ||
| 633 | return True | ||
| 634 | |||
| 635 | def scriptDataEscapedDashState(self): | ||
| 636 | data = self.stream.char() | ||
| 637 | if data == "-": | ||
| 638 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | ||
| 639 | self.state = self.scriptDataEscapedDashDashState | ||
| 640 | elif data == "<": | ||
| 641 | self.state = self.scriptDataEscapedLessThanSignState | ||
| 642 | elif data == "\u0000": | ||
| 643 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 644 | "data": "invalid-codepoint"}) | ||
| 645 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
| 646 | "data": "\uFFFD"}) | ||
| 647 | self.state = self.scriptDataEscapedState | ||
| 648 | elif data == EOF: | ||
| 649 | self.state = self.dataState | ||
| 650 | else: | ||
| 651 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
| 652 | self.state = self.scriptDataEscapedState | ||
| 653 | return True | ||
| 654 | |||
| 655 | def scriptDataEscapedDashDashState(self): | ||
| 656 | data = self.stream.char() | ||
| 657 | if data == "-": | ||
| 658 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | ||
| 659 | elif data == "<": | ||
| 660 | self.state = self.scriptDataEscapedLessThanSignState | ||
| 661 | elif data == ">": | ||
| 662 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) | ||
| 663 | self.state = self.scriptDataState | ||
| 664 | elif data == "\u0000": | ||
| 665 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 666 | "data": "invalid-codepoint"}) | ||
| 667 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
| 668 | "data": "\uFFFD"}) | ||
| 669 | self.state = self.scriptDataEscapedState | ||
| 670 | elif data == EOF: | ||
| 671 | self.state = self.dataState | ||
| 672 | else: | ||
| 673 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
| 674 | self.state = self.scriptDataEscapedState | ||
| 675 | return True | ||
| 676 | |||
| 677 | def scriptDataEscapedLessThanSignState(self): | ||
| 678 | data = self.stream.char() | ||
| 679 | if data == "/": | ||
| 680 | self.temporaryBuffer = "" | ||
| 681 | self.state = self.scriptDataEscapedEndTagOpenState | ||
| 682 | elif data in asciiLetters: | ||
| 683 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data}) | ||
| 684 | self.temporaryBuffer = data | ||
| 685 | self.state = self.scriptDataDoubleEscapeStartState | ||
| 686 | else: | ||
| 687 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | ||
| 688 | self.stream.unget(data) | ||
| 689 | self.state = self.scriptDataEscapedState | ||
| 690 | return True | ||
| 691 | |||
| 692 | def scriptDataEscapedEndTagOpenState(self): | ||
| 693 | data = self.stream.char() | ||
| 694 | if data in asciiLetters: | ||
| 695 | self.temporaryBuffer = data | ||
| 696 | self.state = self.scriptDataEscapedEndTagNameState | ||
| 697 | else: | ||
| 698 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) | ||
| 699 | self.stream.unget(data) | ||
| 700 | self.state = self.scriptDataEscapedState | ||
| 701 | return True | ||
| 702 | |||
| 703 | def scriptDataEscapedEndTagNameState(self): | ||
| 704 | appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() | ||
| 705 | data = self.stream.char() | ||
| 706 | if data in spaceCharacters and appropriate: | ||
| 707 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
| 708 | "name": self.temporaryBuffer, | ||
| 709 | "data": [], "selfClosing": False} | ||
| 710 | self.state = self.beforeAttributeNameState | ||
| 711 | elif data == "/" and appropriate: | ||
| 712 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
| 713 | "name": self.temporaryBuffer, | ||
| 714 | "data": [], "selfClosing": False} | ||
| 715 | self.state = self.selfClosingStartTagState | ||
| 716 | elif data == ">" and appropriate: | ||
| 717 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
| 718 | "name": self.temporaryBuffer, | ||
| 719 | "data": [], "selfClosing": False} | ||
| 720 | self.emitCurrentToken() | ||
| 721 | self.state = self.dataState | ||
| 722 | elif data in asciiLetters: | ||
| 723 | self.temporaryBuffer += data | ||
| 724 | else: | ||
| 725 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
| 726 | "data": "</" + self.temporaryBuffer}) | ||
| 727 | self.stream.unget(data) | ||
| 728 | self.state = self.scriptDataEscapedState | ||
| 729 | return True | ||
| 730 | |||
| 731 | def scriptDataDoubleEscapeStartState(self): | ||
| 732 | data = self.stream.char() | ||
| 733 | if data in (spaceCharacters | frozenset(("/", ">"))): | ||
| 734 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
| 735 | if self.temporaryBuffer.lower() == "script": | ||
| 736 | self.state = self.scriptDataDoubleEscapedState | ||
| 737 | else: | ||
| 738 | self.state = self.scriptDataEscapedState | ||
| 739 | elif data in asciiLetters: | ||
| 740 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
| 741 | self.temporaryBuffer += data | ||
| 742 | else: | ||
| 743 | self.stream.unget(data) | ||
| 744 | self.state = self.scriptDataEscapedState | ||
| 745 | return True | ||
| 746 | |||
| 747 | def scriptDataDoubleEscapedState(self): | ||
| 748 | data = self.stream.char() | ||
| 749 | if data == "-": | ||
| 750 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | ||
| 751 | self.state = self.scriptDataDoubleEscapedDashState | ||
| 752 | elif data == "<": | ||
| 753 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | ||
| 754 | self.state = self.scriptDataDoubleEscapedLessThanSignState | ||
| 755 | elif data == "\u0000": | ||
| 756 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 757 | "data": "invalid-codepoint"}) | ||
| 758 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
| 759 | "data": "\uFFFD"}) | ||
| 760 | elif data == EOF: | ||
| 761 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 762 | "eof-in-script-in-script"}) | ||
| 763 | self.state = self.dataState | ||
| 764 | else: | ||
| 765 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
| 766 | return True | ||
| 767 | |||
| 768 | def scriptDataDoubleEscapedDashState(self): | ||
| 769 | data = self.stream.char() | ||
| 770 | if data == "-": | ||
| 771 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | ||
| 772 | self.state = self.scriptDataDoubleEscapedDashDashState | ||
| 773 | elif data == "<": | ||
| 774 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | ||
| 775 | self.state = self.scriptDataDoubleEscapedLessThanSignState | ||
| 776 | elif data == "\u0000": | ||
| 777 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 778 | "data": "invalid-codepoint"}) | ||
| 779 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
| 780 | "data": "\uFFFD"}) | ||
| 781 | self.state = self.scriptDataDoubleEscapedState | ||
| 782 | elif data == EOF: | ||
| 783 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 784 | "eof-in-script-in-script"}) | ||
| 785 | self.state = self.dataState | ||
| 786 | else: | ||
| 787 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
| 788 | self.state = self.scriptDataDoubleEscapedState | ||
| 789 | return True | ||
| 790 | |||
| 791 | def scriptDataDoubleEscapedDashDashState(self): | ||
| 792 | data = self.stream.char() | ||
| 793 | if data == "-": | ||
| 794 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | ||
| 795 | elif data == "<": | ||
| 796 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | ||
| 797 | self.state = self.scriptDataDoubleEscapedLessThanSignState | ||
| 798 | elif data == ">": | ||
| 799 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) | ||
| 800 | self.state = self.scriptDataState | ||
| 801 | elif data == "\u0000": | ||
| 802 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 803 | "data": "invalid-codepoint"}) | ||
| 804 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
| 805 | "data": "\uFFFD"}) | ||
| 806 | self.state = self.scriptDataDoubleEscapedState | ||
| 807 | elif data == EOF: | ||
| 808 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 809 | "eof-in-script-in-script"}) | ||
| 810 | self.state = self.dataState | ||
| 811 | else: | ||
| 812 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
| 813 | self.state = self.scriptDataDoubleEscapedState | ||
| 814 | return True | ||
| 815 | |||
| 816 | def scriptDataDoubleEscapedLessThanSignState(self): | ||
| 817 | data = self.stream.char() | ||
| 818 | if data == "/": | ||
| 819 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"}) | ||
| 820 | self.temporaryBuffer = "" | ||
| 821 | self.state = self.scriptDataDoubleEscapeEndState | ||
| 822 | else: | ||
| 823 | self.stream.unget(data) | ||
| 824 | self.state = self.scriptDataDoubleEscapedState | ||
| 825 | return True | ||
| 826 | |||
| 827 | def scriptDataDoubleEscapeEndState(self): | ||
| 828 | data = self.stream.char() | ||
| 829 | if data in (spaceCharacters | frozenset(("/", ">"))): | ||
| 830 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
| 831 | if self.temporaryBuffer.lower() == "script": | ||
| 832 | self.state = self.scriptDataEscapedState | ||
| 833 | else: | ||
| 834 | self.state = self.scriptDataDoubleEscapedState | ||
| 835 | elif data in asciiLetters: | ||
| 836 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
| 837 | self.temporaryBuffer += data | ||
| 838 | else: | ||
| 839 | self.stream.unget(data) | ||
| 840 | self.state = self.scriptDataDoubleEscapedState | ||
| 841 | return True | ||
| 842 | |||
| 843 | def beforeAttributeNameState(self): | ||
| 844 | data = self.stream.char() | ||
| 845 | if data in spaceCharacters: | ||
| 846 | self.stream.charsUntil(spaceCharacters, True) | ||
| 847 | elif data in asciiLetters: | ||
| 848 | self.currentToken["data"].append([data, ""]) | ||
| 849 | self.state = self.attributeNameState | ||
| 850 | elif data == ">": | ||
| 851 | self.emitCurrentToken() | ||
| 852 | elif data == "/": | ||
| 853 | self.state = self.selfClosingStartTagState | ||
| 854 | elif data in ("'", '"', "=", "<"): | ||
| 855 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 856 | "invalid-character-in-attribute-name"}) | ||
| 857 | self.currentToken["data"].append([data, ""]) | ||
| 858 | self.state = self.attributeNameState | ||
| 859 | elif data == "\u0000": | ||
| 860 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 861 | "data": "invalid-codepoint"}) | ||
| 862 | self.currentToken["data"].append(["\uFFFD", ""]) | ||
| 863 | self.state = self.attributeNameState | ||
| 864 | elif data is EOF: | ||
| 865 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 866 | "expected-attribute-name-but-got-eof"}) | ||
| 867 | self.state = self.dataState | ||
| 868 | else: | ||
| 869 | self.currentToken["data"].append([data, ""]) | ||
| 870 | self.state = self.attributeNameState | ||
| 871 | return True | ||
| 872 | |||
| 873 | def attributeNameState(self): | ||
| 874 | data = self.stream.char() | ||
| 875 | leavingThisState = True | ||
| 876 | emitToken = False | ||
| 877 | if data == "=": | ||
| 878 | self.state = self.beforeAttributeValueState | ||
| 879 | elif data in asciiLetters: | ||
| 880 | self.currentToken["data"][-1][0] += data +\ | ||
| 881 | self.stream.charsUntil(asciiLetters, True) | ||
| 882 | leavingThisState = False | ||
| 883 | elif data == ">": | ||
| 884 | # XXX If we emit here the attributes are converted to a dict | ||
| 885 | # without being checked and when the code below runs we error | ||
| 886 | # because data is a dict not a list | ||
| 887 | emitToken = True | ||
| 888 | elif data in spaceCharacters: | ||
| 889 | self.state = self.afterAttributeNameState | ||
| 890 | elif data == "/": | ||
| 891 | self.state = self.selfClosingStartTagState | ||
| 892 | elif data == "\u0000": | ||
| 893 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 894 | "data": "invalid-codepoint"}) | ||
| 895 | self.currentToken["data"][-1][0] += "\uFFFD" | ||
| 896 | leavingThisState = False | ||
| 897 | elif data in ("'", '"', "<"): | ||
| 898 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 899 | "data": | ||
| 900 | "invalid-character-in-attribute-name"}) | ||
| 901 | self.currentToken["data"][-1][0] += data | ||
| 902 | leavingThisState = False | ||
| 903 | elif data is EOF: | ||
| 904 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 905 | "data": "eof-in-attribute-name"}) | ||
| 906 | self.state = self.dataState | ||
| 907 | else: | ||
| 908 | self.currentToken["data"][-1][0] += data | ||
| 909 | leavingThisState = False | ||
| 910 | |||
| 911 | if leavingThisState: | ||
| 912 | # Attributes are not dropped at this stage. That happens when the | ||
| 913 | # start tag token is emitted so values can still be safely appended | ||
| 914 | # to attributes, but we do want to report the parse error in time. | ||
| 915 | self.currentToken["data"][-1][0] = ( | ||
| 916 | self.currentToken["data"][-1][0].translate(asciiUpper2Lower)) | ||
| 917 | for name, _ in self.currentToken["data"][:-1]: | ||
| 918 | if self.currentToken["data"][-1][0] == name: | ||
| 919 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 920 | "duplicate-attribute"}) | ||
| 921 | break | ||
| 922 | # XXX Fix for above XXX | ||
| 923 | if emitToken: | ||
| 924 | self.emitCurrentToken() | ||
| 925 | return True | ||
| 926 | |||
| 927 | def afterAttributeNameState(self): | ||
| 928 | data = self.stream.char() | ||
| 929 | if data in spaceCharacters: | ||
| 930 | self.stream.charsUntil(spaceCharacters, True) | ||
| 931 | elif data == "=": | ||
| 932 | self.state = self.beforeAttributeValueState | ||
| 933 | elif data == ">": | ||
| 934 | self.emitCurrentToken() | ||
| 935 | elif data in asciiLetters: | ||
| 936 | self.currentToken["data"].append([data, ""]) | ||
| 937 | self.state = self.attributeNameState | ||
| 938 | elif data == "/": | ||
| 939 | self.state = self.selfClosingStartTagState | ||
| 940 | elif data == "\u0000": | ||
| 941 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 942 | "data": "invalid-codepoint"}) | ||
| 943 | self.currentToken["data"].append(["\uFFFD", ""]) | ||
| 944 | self.state = self.attributeNameState | ||
| 945 | elif data in ("'", '"', "<"): | ||
| 946 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 947 | "invalid-character-after-attribute-name"}) | ||
| 948 | self.currentToken["data"].append([data, ""]) | ||
| 949 | self.state = self.attributeNameState | ||
| 950 | elif data is EOF: | ||
| 951 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 952 | "expected-end-of-tag-but-got-eof"}) | ||
| 953 | self.state = self.dataState | ||
| 954 | else: | ||
| 955 | self.currentToken["data"].append([data, ""]) | ||
| 956 | self.state = self.attributeNameState | ||
| 957 | return True | ||
| 958 | |||
| 959 | def beforeAttributeValueState(self): | ||
| 960 | data = self.stream.char() | ||
| 961 | if data in spaceCharacters: | ||
| 962 | self.stream.charsUntil(spaceCharacters, True) | ||
| 963 | elif data == "\"": | ||
| 964 | self.state = self.attributeValueDoubleQuotedState | ||
| 965 | elif data == "&": | ||
| 966 | self.state = self.attributeValueUnQuotedState | ||
| 967 | self.stream.unget(data) | ||
| 968 | elif data == "'": | ||
| 969 | self.state = self.attributeValueSingleQuotedState | ||
| 970 | elif data == ">": | ||
| 971 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 972 | "expected-attribute-value-but-got-right-bracket"}) | ||
| 973 | self.emitCurrentToken() | ||
| 974 | elif data == "\u0000": | ||
| 975 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 976 | "data": "invalid-codepoint"}) | ||
| 977 | self.currentToken["data"][-1][1] += "\uFFFD" | ||
| 978 | self.state = self.attributeValueUnQuotedState | ||
| 979 | elif data in ("=", "<", "`"): | ||
| 980 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 981 | "equals-in-unquoted-attribute-value"}) | ||
| 982 | self.currentToken["data"][-1][1] += data | ||
| 983 | self.state = self.attributeValueUnQuotedState | ||
| 984 | elif data is EOF: | ||
| 985 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 986 | "expected-attribute-value-but-got-eof"}) | ||
| 987 | self.state = self.dataState | ||
| 988 | else: | ||
| 989 | self.currentToken["data"][-1][1] += data | ||
| 990 | self.state = self.attributeValueUnQuotedState | ||
| 991 | return True | ||
| 992 | |||
| 993 | def attributeValueDoubleQuotedState(self): | ||
| 994 | data = self.stream.char() | ||
| 995 | if data == "\"": | ||
| 996 | self.state = self.afterAttributeValueState | ||
| 997 | elif data == "&": | ||
| 998 | self.processEntityInAttribute('"') | ||
| 999 | elif data == "\u0000": | ||
| 1000 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1001 | "data": "invalid-codepoint"}) | ||
| 1002 | self.currentToken["data"][-1][1] += "\uFFFD" | ||
| 1003 | elif data is EOF: | ||
| 1004 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1005 | "eof-in-attribute-value-double-quote"}) | ||
| 1006 | self.state = self.dataState | ||
| 1007 | else: | ||
| 1008 | self.currentToken["data"][-1][1] += data +\ | ||
| 1009 | self.stream.charsUntil(("\"", "&", "\u0000")) | ||
| 1010 | return True | ||
| 1011 | |||
| 1012 | def attributeValueSingleQuotedState(self): | ||
| 1013 | data = self.stream.char() | ||
| 1014 | if data == "'": | ||
| 1015 | self.state = self.afterAttributeValueState | ||
| 1016 | elif data == "&": | ||
| 1017 | self.processEntityInAttribute("'") | ||
| 1018 | elif data == "\u0000": | ||
| 1019 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1020 | "data": "invalid-codepoint"}) | ||
| 1021 | self.currentToken["data"][-1][1] += "\uFFFD" | ||
| 1022 | elif data is EOF: | ||
| 1023 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1024 | "eof-in-attribute-value-single-quote"}) | ||
| 1025 | self.state = self.dataState | ||
| 1026 | else: | ||
| 1027 | self.currentToken["data"][-1][1] += data +\ | ||
| 1028 | self.stream.charsUntil(("'", "&", "\u0000")) | ||
| 1029 | return True | ||
| 1030 | |||
| 1031 | def attributeValueUnQuotedState(self): | ||
| 1032 | data = self.stream.char() | ||
| 1033 | if data in spaceCharacters: | ||
| 1034 | self.state = self.beforeAttributeNameState | ||
| 1035 | elif data == "&": | ||
| 1036 | self.processEntityInAttribute(">") | ||
| 1037 | elif data == ">": | ||
| 1038 | self.emitCurrentToken() | ||
| 1039 | elif data in ('"', "'", "=", "<", "`"): | ||
| 1040 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1041 | "unexpected-character-in-unquoted-attribute-value"}) | ||
| 1042 | self.currentToken["data"][-1][1] += data | ||
| 1043 | elif data == "\u0000": | ||
| 1044 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1045 | "data": "invalid-codepoint"}) | ||
| 1046 | self.currentToken["data"][-1][1] += "\uFFFD" | ||
| 1047 | elif data is EOF: | ||
| 1048 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1049 | "eof-in-attribute-value-no-quotes"}) | ||
| 1050 | self.state = self.dataState | ||
| 1051 | else: | ||
| 1052 | self.currentToken["data"][-1][1] += data + self.stream.charsUntil( | ||
| 1053 | frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters) | ||
| 1054 | return True | ||
| 1055 | |||
| 1056 | def afterAttributeValueState(self): | ||
| 1057 | data = self.stream.char() | ||
| 1058 | if data in spaceCharacters: | ||
| 1059 | self.state = self.beforeAttributeNameState | ||
| 1060 | elif data == ">": | ||
| 1061 | self.emitCurrentToken() | ||
| 1062 | elif data == "/": | ||
| 1063 | self.state = self.selfClosingStartTagState | ||
| 1064 | elif data is EOF: | ||
| 1065 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1066 | "unexpected-EOF-after-attribute-value"}) | ||
| 1067 | self.stream.unget(data) | ||
| 1068 | self.state = self.dataState | ||
| 1069 | else: | ||
| 1070 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1071 | "unexpected-character-after-attribute-value"}) | ||
| 1072 | self.stream.unget(data) | ||
| 1073 | self.state = self.beforeAttributeNameState | ||
| 1074 | return True | ||
| 1075 | |||
| 1076 | def selfClosingStartTagState(self): | ||
| 1077 | data = self.stream.char() | ||
| 1078 | if data == ">": | ||
| 1079 | self.currentToken["selfClosing"] = True | ||
| 1080 | self.emitCurrentToken() | ||
| 1081 | elif data is EOF: | ||
| 1082 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1083 | "data": | ||
| 1084 | "unexpected-EOF-after-solidus-in-tag"}) | ||
| 1085 | self.stream.unget(data) | ||
| 1086 | self.state = self.dataState | ||
| 1087 | else: | ||
| 1088 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1089 | "unexpected-character-after-solidus-in-tag"}) | ||
| 1090 | self.stream.unget(data) | ||
| 1091 | self.state = self.beforeAttributeNameState | ||
| 1092 | return True | ||
| 1093 | |||
| 1094 | def bogusCommentState(self): | ||
| 1095 | # Make a new comment token and give it as value all the characters | ||
| 1096 | # until the first > or EOF (charsUntil checks for EOF automatically) | ||
| 1097 | # and emit it. | ||
| 1098 | data = self.stream.charsUntil(">") | ||
| 1099 | data = data.replace("\u0000", "\uFFFD") | ||
| 1100 | self.tokenQueue.append( | ||
| 1101 | {"type": tokenTypes["Comment"], "data": data}) | ||
| 1102 | |||
| 1103 | # Eat the character directly after the bogus comment which is either a | ||
| 1104 | # ">" or an EOF. | ||
| 1105 | self.stream.char() | ||
| 1106 | self.state = self.dataState | ||
| 1107 | return True | ||
| 1108 | |||
| 1109 | def markupDeclarationOpenState(self): | ||
| 1110 | charStack = [self.stream.char()] | ||
| 1111 | if charStack[-1] == "-": | ||
| 1112 | charStack.append(self.stream.char()) | ||
| 1113 | if charStack[-1] == "-": | ||
| 1114 | self.currentToken = {"type": tokenTypes["Comment"], "data": ""} | ||
| 1115 | self.state = self.commentStartState | ||
| 1116 | return True | ||
| 1117 | elif charStack[-1] in ('d', 'D'): | ||
| 1118 | matched = True | ||
| 1119 | for expected in (('o', 'O'), ('c', 'C'), ('t', 'T'), | ||
| 1120 | ('y', 'Y'), ('p', 'P'), ('e', 'E')): | ||
| 1121 | charStack.append(self.stream.char()) | ||
| 1122 | if charStack[-1] not in expected: | ||
| 1123 | matched = False | ||
| 1124 | break | ||
| 1125 | if matched: | ||
| 1126 | self.currentToken = {"type": tokenTypes["Doctype"], | ||
| 1127 | "name": "", | ||
| 1128 | "publicId": None, "systemId": None, | ||
| 1129 | "correct": True} | ||
| 1130 | self.state = self.doctypeState | ||
| 1131 | return True | ||
| 1132 | elif (charStack[-1] == "[" and | ||
| 1133 | self.parser is not None and | ||
| 1134 | self.parser.tree.openElements and | ||
| 1135 | self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace): | ||
| 1136 | matched = True | ||
| 1137 | for expected in ["C", "D", "A", "T", "A", "["]: | ||
| 1138 | charStack.append(self.stream.char()) | ||
| 1139 | if charStack[-1] != expected: | ||
| 1140 | matched = False | ||
| 1141 | break | ||
| 1142 | if matched: | ||
| 1143 | self.state = self.cdataSectionState | ||
| 1144 | return True | ||
| 1145 | |||
| 1146 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1147 | "expected-dashes-or-doctype"}) | ||
| 1148 | |||
| 1149 | while charStack: | ||
| 1150 | self.stream.unget(charStack.pop()) | ||
| 1151 | self.state = self.bogusCommentState | ||
| 1152 | return True | ||
| 1153 | |||
| 1154 | def commentStartState(self): | ||
| 1155 | data = self.stream.char() | ||
| 1156 | if data == "-": | ||
| 1157 | self.state = self.commentStartDashState | ||
| 1158 | elif data == "\u0000": | ||
| 1159 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1160 | "data": "invalid-codepoint"}) | ||
| 1161 | self.currentToken["data"] += "\uFFFD" | ||
| 1162 | elif data == ">": | ||
| 1163 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1164 | "incorrect-comment"}) | ||
| 1165 | self.tokenQueue.append(self.currentToken) | ||
| 1166 | self.state = self.dataState | ||
| 1167 | elif data is EOF: | ||
| 1168 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1169 | "eof-in-comment"}) | ||
| 1170 | self.tokenQueue.append(self.currentToken) | ||
| 1171 | self.state = self.dataState | ||
| 1172 | else: | ||
| 1173 | self.currentToken["data"] += data | ||
| 1174 | self.state = self.commentState | ||
| 1175 | return True | ||
| 1176 | |||
| 1177 | def commentStartDashState(self): | ||
| 1178 | data = self.stream.char() | ||
| 1179 | if data == "-": | ||
| 1180 | self.state = self.commentEndState | ||
| 1181 | elif data == "\u0000": | ||
| 1182 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1183 | "data": "invalid-codepoint"}) | ||
| 1184 | self.currentToken["data"] += "-\uFFFD" | ||
| 1185 | elif data == ">": | ||
| 1186 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1187 | "incorrect-comment"}) | ||
| 1188 | self.tokenQueue.append(self.currentToken) | ||
| 1189 | self.state = self.dataState | ||
| 1190 | elif data is EOF: | ||
| 1191 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1192 | "eof-in-comment"}) | ||
| 1193 | self.tokenQueue.append(self.currentToken) | ||
| 1194 | self.state = self.dataState | ||
| 1195 | else: | ||
| 1196 | self.currentToken["data"] += "-" + data | ||
| 1197 | self.state = self.commentState | ||
| 1198 | return True | ||
| 1199 | |||
| 1200 | def commentState(self): | ||
| 1201 | data = self.stream.char() | ||
| 1202 | if data == "-": | ||
| 1203 | self.state = self.commentEndDashState | ||
| 1204 | elif data == "\u0000": | ||
| 1205 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1206 | "data": "invalid-codepoint"}) | ||
| 1207 | self.currentToken["data"] += "\uFFFD" | ||
| 1208 | elif data is EOF: | ||
| 1209 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1210 | "data": "eof-in-comment"}) | ||
| 1211 | self.tokenQueue.append(self.currentToken) | ||
| 1212 | self.state = self.dataState | ||
| 1213 | else: | ||
| 1214 | self.currentToken["data"] += data + \ | ||
| 1215 | self.stream.charsUntil(("-", "\u0000")) | ||
| 1216 | return True | ||
| 1217 | |||
| 1218 | def commentEndDashState(self): | ||
| 1219 | data = self.stream.char() | ||
| 1220 | if data == "-": | ||
| 1221 | self.state = self.commentEndState | ||
| 1222 | elif data == "\u0000": | ||
| 1223 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1224 | "data": "invalid-codepoint"}) | ||
| 1225 | self.currentToken["data"] += "-\uFFFD" | ||
| 1226 | self.state = self.commentState | ||
| 1227 | elif data is EOF: | ||
| 1228 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1229 | "eof-in-comment-end-dash"}) | ||
| 1230 | self.tokenQueue.append(self.currentToken) | ||
| 1231 | self.state = self.dataState | ||
| 1232 | else: | ||
| 1233 | self.currentToken["data"] += "-" + data | ||
| 1234 | self.state = self.commentState | ||
| 1235 | return True | ||
| 1236 | |||
| 1237 | def commentEndState(self): | ||
| 1238 | data = self.stream.char() | ||
| 1239 | if data == ">": | ||
| 1240 | self.tokenQueue.append(self.currentToken) | ||
| 1241 | self.state = self.dataState | ||
| 1242 | elif data == "\u0000": | ||
| 1243 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1244 | "data": "invalid-codepoint"}) | ||
| 1245 | self.currentToken["data"] += "--\uFFFD" | ||
| 1246 | self.state = self.commentState | ||
| 1247 | elif data == "!": | ||
| 1248 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1249 | "unexpected-bang-after-double-dash-in-comment"}) | ||
| 1250 | self.state = self.commentEndBangState | ||
| 1251 | elif data == "-": | ||
| 1252 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1253 | "unexpected-dash-after-double-dash-in-comment"}) | ||
| 1254 | self.currentToken["data"] += data | ||
| 1255 | elif data is EOF: | ||
| 1256 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1257 | "eof-in-comment-double-dash"}) | ||
| 1258 | self.tokenQueue.append(self.currentToken) | ||
| 1259 | self.state = self.dataState | ||
| 1260 | else: | ||
| 1261 | # XXX | ||
| 1262 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1263 | "unexpected-char-in-comment"}) | ||
| 1264 | self.currentToken["data"] += "--" + data | ||
| 1265 | self.state = self.commentState | ||
| 1266 | return True | ||
| 1267 | |||
| 1268 | def commentEndBangState(self): | ||
| 1269 | data = self.stream.char() | ||
| 1270 | if data == ">": | ||
| 1271 | self.tokenQueue.append(self.currentToken) | ||
| 1272 | self.state = self.dataState | ||
| 1273 | elif data == "-": | ||
| 1274 | self.currentToken["data"] += "--!" | ||
| 1275 | self.state = self.commentEndDashState | ||
| 1276 | elif data == "\u0000": | ||
| 1277 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1278 | "data": "invalid-codepoint"}) | ||
| 1279 | self.currentToken["data"] += "--!\uFFFD" | ||
| 1280 | self.state = self.commentState | ||
| 1281 | elif data is EOF: | ||
| 1282 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1283 | "eof-in-comment-end-bang-state"}) | ||
| 1284 | self.tokenQueue.append(self.currentToken) | ||
| 1285 | self.state = self.dataState | ||
| 1286 | else: | ||
| 1287 | self.currentToken["data"] += "--!" + data | ||
| 1288 | self.state = self.commentState | ||
| 1289 | return True | ||
| 1290 | |||
| 1291 | def doctypeState(self): | ||
| 1292 | data = self.stream.char() | ||
| 1293 | if data in spaceCharacters: | ||
| 1294 | self.state = self.beforeDoctypeNameState | ||
| 1295 | elif data is EOF: | ||
| 1296 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1297 | "expected-doctype-name-but-got-eof"}) | ||
| 1298 | self.currentToken["correct"] = False | ||
| 1299 | self.tokenQueue.append(self.currentToken) | ||
| 1300 | self.state = self.dataState | ||
| 1301 | else: | ||
| 1302 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1303 | "need-space-after-doctype"}) | ||
| 1304 | self.stream.unget(data) | ||
| 1305 | self.state = self.beforeDoctypeNameState | ||
| 1306 | return True | ||
| 1307 | |||
| 1308 | def beforeDoctypeNameState(self): | ||
| 1309 | data = self.stream.char() | ||
| 1310 | if data in spaceCharacters: | ||
| 1311 | pass | ||
| 1312 | elif data == ">": | ||
| 1313 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1314 | "expected-doctype-name-but-got-right-bracket"}) | ||
| 1315 | self.currentToken["correct"] = False | ||
| 1316 | self.tokenQueue.append(self.currentToken) | ||
| 1317 | self.state = self.dataState | ||
| 1318 | elif data == "\u0000": | ||
| 1319 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1320 | "data": "invalid-codepoint"}) | ||
| 1321 | self.currentToken["name"] = "\uFFFD" | ||
| 1322 | self.state = self.doctypeNameState | ||
| 1323 | elif data is EOF: | ||
| 1324 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1325 | "expected-doctype-name-but-got-eof"}) | ||
| 1326 | self.currentToken["correct"] = False | ||
| 1327 | self.tokenQueue.append(self.currentToken) | ||
| 1328 | self.state = self.dataState | ||
| 1329 | else: | ||
| 1330 | self.currentToken["name"] = data | ||
| 1331 | self.state = self.doctypeNameState | ||
| 1332 | return True | ||
| 1333 | |||
| 1334 | def doctypeNameState(self): | ||
| 1335 | data = self.stream.char() | ||
| 1336 | if data in spaceCharacters: | ||
| 1337 | self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) | ||
| 1338 | self.state = self.afterDoctypeNameState | ||
| 1339 | elif data == ">": | ||
| 1340 | self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) | ||
| 1341 | self.tokenQueue.append(self.currentToken) | ||
| 1342 | self.state = self.dataState | ||
| 1343 | elif data == "\u0000": | ||
| 1344 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1345 | "data": "invalid-codepoint"}) | ||
| 1346 | self.currentToken["name"] += "\uFFFD" | ||
| 1347 | self.state = self.doctypeNameState | ||
| 1348 | elif data is EOF: | ||
| 1349 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1350 | "eof-in-doctype-name"}) | ||
| 1351 | self.currentToken["correct"] = False | ||
| 1352 | self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) | ||
| 1353 | self.tokenQueue.append(self.currentToken) | ||
| 1354 | self.state = self.dataState | ||
| 1355 | else: | ||
| 1356 | self.currentToken["name"] += data | ||
| 1357 | return True | ||
| 1358 | |||
| 1359 | def afterDoctypeNameState(self): | ||
| 1360 | data = self.stream.char() | ||
| 1361 | if data in spaceCharacters: | ||
| 1362 | pass | ||
| 1363 | elif data == ">": | ||
| 1364 | self.tokenQueue.append(self.currentToken) | ||
| 1365 | self.state = self.dataState | ||
| 1366 | elif data is EOF: | ||
| 1367 | self.currentToken["correct"] = False | ||
| 1368 | self.stream.unget(data) | ||
| 1369 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1370 | "eof-in-doctype"}) | ||
| 1371 | self.tokenQueue.append(self.currentToken) | ||
| 1372 | self.state = self.dataState | ||
| 1373 | else: | ||
| 1374 | if data in ("p", "P"): | ||
| 1375 | matched = True | ||
| 1376 | for expected in (("u", "U"), ("b", "B"), ("l", "L"), | ||
| 1377 | ("i", "I"), ("c", "C")): | ||
| 1378 | data = self.stream.char() | ||
| 1379 | if data not in expected: | ||
| 1380 | matched = False | ||
| 1381 | break | ||
| 1382 | if matched: | ||
| 1383 | self.state = self.afterDoctypePublicKeywordState | ||
| 1384 | return True | ||
| 1385 | elif data in ("s", "S"): | ||
| 1386 | matched = True | ||
| 1387 | for expected in (("y", "Y"), ("s", "S"), ("t", "T"), | ||
| 1388 | ("e", "E"), ("m", "M")): | ||
| 1389 | data = self.stream.char() | ||
| 1390 | if data not in expected: | ||
| 1391 | matched = False | ||
| 1392 | break | ||
| 1393 | if matched: | ||
| 1394 | self.state = self.afterDoctypeSystemKeywordState | ||
| 1395 | return True | ||
| 1396 | |||
| 1397 | # All the characters read before the current 'data' will be | ||
| 1398 | # [a-zA-Z], so they're garbage in the bogus doctype and can be | ||
| 1399 | # discarded; only the latest character might be '>' or EOF | ||
| 1400 | # and needs to be ungetted | ||
| 1401 | self.stream.unget(data) | ||
| 1402 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1403 | "expected-space-or-right-bracket-in-doctype", "datavars": | ||
| 1404 | {"data": data}}) | ||
| 1405 | self.currentToken["correct"] = False | ||
| 1406 | self.state = self.bogusDoctypeState | ||
| 1407 | |||
| 1408 | return True | ||
| 1409 | |||
| 1410 | def afterDoctypePublicKeywordState(self): | ||
| 1411 | data = self.stream.char() | ||
| 1412 | if data in spaceCharacters: | ||
| 1413 | self.state = self.beforeDoctypePublicIdentifierState | ||
| 1414 | elif data in ("'", '"'): | ||
| 1415 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1416 | "unexpected-char-in-doctype"}) | ||
| 1417 | self.stream.unget(data) | ||
| 1418 | self.state = self.beforeDoctypePublicIdentifierState | ||
| 1419 | elif data is EOF: | ||
| 1420 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1421 | "eof-in-doctype"}) | ||
| 1422 | self.currentToken["correct"] = False | ||
| 1423 | self.tokenQueue.append(self.currentToken) | ||
| 1424 | self.state = self.dataState | ||
| 1425 | else: | ||
| 1426 | self.stream.unget(data) | ||
| 1427 | self.state = self.beforeDoctypePublicIdentifierState | ||
| 1428 | return True | ||
| 1429 | |||
| 1430 | def beforeDoctypePublicIdentifierState(self): | ||
| 1431 | data = self.stream.char() | ||
| 1432 | if data in spaceCharacters: | ||
| 1433 | pass | ||
| 1434 | elif data == "\"": | ||
| 1435 | self.currentToken["publicId"] = "" | ||
| 1436 | self.state = self.doctypePublicIdentifierDoubleQuotedState | ||
| 1437 | elif data == "'": | ||
| 1438 | self.currentToken["publicId"] = "" | ||
| 1439 | self.state = self.doctypePublicIdentifierSingleQuotedState | ||
| 1440 | elif data == ">": | ||
| 1441 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1442 | "unexpected-end-of-doctype"}) | ||
| 1443 | self.currentToken["correct"] = False | ||
| 1444 | self.tokenQueue.append(self.currentToken) | ||
| 1445 | self.state = self.dataState | ||
| 1446 | elif data is EOF: | ||
| 1447 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1448 | "eof-in-doctype"}) | ||
| 1449 | self.currentToken["correct"] = False | ||
| 1450 | self.tokenQueue.append(self.currentToken) | ||
| 1451 | self.state = self.dataState | ||
| 1452 | else: | ||
| 1453 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1454 | "unexpected-char-in-doctype"}) | ||
| 1455 | self.currentToken["correct"] = False | ||
| 1456 | self.state = self.bogusDoctypeState | ||
| 1457 | return True | ||
| 1458 | |||
| 1459 | def doctypePublicIdentifierDoubleQuotedState(self): | ||
| 1460 | data = self.stream.char() | ||
| 1461 | if data == "\"": | ||
| 1462 | self.state = self.afterDoctypePublicIdentifierState | ||
| 1463 | elif data == "\u0000": | ||
| 1464 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1465 | "data": "invalid-codepoint"}) | ||
| 1466 | self.currentToken["publicId"] += "\uFFFD" | ||
| 1467 | elif data == ">": | ||
| 1468 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1469 | "unexpected-end-of-doctype"}) | ||
| 1470 | self.currentToken["correct"] = False | ||
| 1471 | self.tokenQueue.append(self.currentToken) | ||
| 1472 | self.state = self.dataState | ||
| 1473 | elif data is EOF: | ||
| 1474 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1475 | "eof-in-doctype"}) | ||
| 1476 | self.currentToken["correct"] = False | ||
| 1477 | self.tokenQueue.append(self.currentToken) | ||
| 1478 | self.state = self.dataState | ||
| 1479 | else: | ||
| 1480 | self.currentToken["publicId"] += data | ||
| 1481 | return True | ||
| 1482 | |||
| 1483 | def doctypePublicIdentifierSingleQuotedState(self): | ||
| 1484 | data = self.stream.char() | ||
| 1485 | if data == "'": | ||
| 1486 | self.state = self.afterDoctypePublicIdentifierState | ||
| 1487 | elif data == "\u0000": | ||
| 1488 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1489 | "data": "invalid-codepoint"}) | ||
| 1490 | self.currentToken["publicId"] += "\uFFFD" | ||
| 1491 | elif data == ">": | ||
| 1492 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1493 | "unexpected-end-of-doctype"}) | ||
| 1494 | self.currentToken["correct"] = False | ||
| 1495 | self.tokenQueue.append(self.currentToken) | ||
| 1496 | self.state = self.dataState | ||
| 1497 | elif data is EOF: | ||
| 1498 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1499 | "eof-in-doctype"}) | ||
| 1500 | self.currentToken["correct"] = False | ||
| 1501 | self.tokenQueue.append(self.currentToken) | ||
| 1502 | self.state = self.dataState | ||
| 1503 | else: | ||
| 1504 | self.currentToken["publicId"] += data | ||
| 1505 | return True | ||
| 1506 | |||
| 1507 | def afterDoctypePublicIdentifierState(self): | ||
| 1508 | data = self.stream.char() | ||
| 1509 | if data in spaceCharacters: | ||
| 1510 | self.state = self.betweenDoctypePublicAndSystemIdentifiersState | ||
| 1511 | elif data == ">": | ||
| 1512 | self.tokenQueue.append(self.currentToken) | ||
| 1513 | self.state = self.dataState | ||
| 1514 | elif data == '"': | ||
| 1515 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1516 | "unexpected-char-in-doctype"}) | ||
| 1517 | self.currentToken["systemId"] = "" | ||
| 1518 | self.state = self.doctypeSystemIdentifierDoubleQuotedState | ||
| 1519 | elif data == "'": | ||
| 1520 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1521 | "unexpected-char-in-doctype"}) | ||
| 1522 | self.currentToken["systemId"] = "" | ||
| 1523 | self.state = self.doctypeSystemIdentifierSingleQuotedState | ||
| 1524 | elif data is EOF: | ||
| 1525 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1526 | "eof-in-doctype"}) | ||
| 1527 | self.currentToken["correct"] = False | ||
| 1528 | self.tokenQueue.append(self.currentToken) | ||
| 1529 | self.state = self.dataState | ||
| 1530 | else: | ||
| 1531 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1532 | "unexpected-char-in-doctype"}) | ||
| 1533 | self.currentToken["correct"] = False | ||
| 1534 | self.state = self.bogusDoctypeState | ||
| 1535 | return True | ||
| 1536 | |||
| 1537 | def betweenDoctypePublicAndSystemIdentifiersState(self): | ||
| 1538 | data = self.stream.char() | ||
| 1539 | if data in spaceCharacters: | ||
| 1540 | pass | ||
| 1541 | elif data == ">": | ||
| 1542 | self.tokenQueue.append(self.currentToken) | ||
| 1543 | self.state = self.dataState | ||
| 1544 | elif data == '"': | ||
| 1545 | self.currentToken["systemId"] = "" | ||
| 1546 | self.state = self.doctypeSystemIdentifierDoubleQuotedState | ||
| 1547 | elif data == "'": | ||
| 1548 | self.currentToken["systemId"] = "" | ||
| 1549 | self.state = self.doctypeSystemIdentifierSingleQuotedState | ||
| 1550 | elif data == EOF: | ||
| 1551 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1552 | "eof-in-doctype"}) | ||
| 1553 | self.currentToken["correct"] = False | ||
| 1554 | self.tokenQueue.append(self.currentToken) | ||
| 1555 | self.state = self.dataState | ||
| 1556 | else: | ||
| 1557 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1558 | "unexpected-char-in-doctype"}) | ||
| 1559 | self.currentToken["correct"] = False | ||
| 1560 | self.state = self.bogusDoctypeState | ||
| 1561 | return True | ||
| 1562 | |||
| 1563 | def afterDoctypeSystemKeywordState(self): | ||
| 1564 | data = self.stream.char() | ||
| 1565 | if data in spaceCharacters: | ||
| 1566 | self.state = self.beforeDoctypeSystemIdentifierState | ||
| 1567 | elif data in ("'", '"'): | ||
| 1568 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1569 | "unexpected-char-in-doctype"}) | ||
| 1570 | self.stream.unget(data) | ||
| 1571 | self.state = self.beforeDoctypeSystemIdentifierState | ||
| 1572 | elif data is EOF: | ||
| 1573 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1574 | "eof-in-doctype"}) | ||
| 1575 | self.currentToken["correct"] = False | ||
| 1576 | self.tokenQueue.append(self.currentToken) | ||
| 1577 | self.state = self.dataState | ||
| 1578 | else: | ||
| 1579 | self.stream.unget(data) | ||
| 1580 | self.state = self.beforeDoctypeSystemIdentifierState | ||
| 1581 | return True | ||
| 1582 | |||
| 1583 | def beforeDoctypeSystemIdentifierState(self): | ||
| 1584 | data = self.stream.char() | ||
| 1585 | if data in spaceCharacters: | ||
| 1586 | pass | ||
| 1587 | elif data == "\"": | ||
| 1588 | self.currentToken["systemId"] = "" | ||
| 1589 | self.state = self.doctypeSystemIdentifierDoubleQuotedState | ||
| 1590 | elif data == "'": | ||
| 1591 | self.currentToken["systemId"] = "" | ||
| 1592 | self.state = self.doctypeSystemIdentifierSingleQuotedState | ||
| 1593 | elif data == ">": | ||
| 1594 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1595 | "unexpected-char-in-doctype"}) | ||
| 1596 | self.currentToken["correct"] = False | ||
| 1597 | self.tokenQueue.append(self.currentToken) | ||
| 1598 | self.state = self.dataState | ||
| 1599 | elif data is EOF: | ||
| 1600 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1601 | "eof-in-doctype"}) | ||
| 1602 | self.currentToken["correct"] = False | ||
| 1603 | self.tokenQueue.append(self.currentToken) | ||
| 1604 | self.state = self.dataState | ||
| 1605 | else: | ||
| 1606 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1607 | "unexpected-char-in-doctype"}) | ||
| 1608 | self.currentToken["correct"] = False | ||
| 1609 | self.state = self.bogusDoctypeState | ||
| 1610 | return True | ||
| 1611 | |||
| 1612 | def doctypeSystemIdentifierDoubleQuotedState(self): | ||
| 1613 | data = self.stream.char() | ||
| 1614 | if data == "\"": | ||
| 1615 | self.state = self.afterDoctypeSystemIdentifierState | ||
| 1616 | elif data == "\u0000": | ||
| 1617 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1618 | "data": "invalid-codepoint"}) | ||
| 1619 | self.currentToken["systemId"] += "\uFFFD" | ||
| 1620 | elif data == ">": | ||
| 1621 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1622 | "unexpected-end-of-doctype"}) | ||
| 1623 | self.currentToken["correct"] = False | ||
| 1624 | self.tokenQueue.append(self.currentToken) | ||
| 1625 | self.state = self.dataState | ||
| 1626 | elif data is EOF: | ||
| 1627 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1628 | "eof-in-doctype"}) | ||
| 1629 | self.currentToken["correct"] = False | ||
| 1630 | self.tokenQueue.append(self.currentToken) | ||
| 1631 | self.state = self.dataState | ||
| 1632 | else: | ||
| 1633 | self.currentToken["systemId"] += data | ||
| 1634 | return True | ||
| 1635 | |||
| 1636 | def doctypeSystemIdentifierSingleQuotedState(self): | ||
| 1637 | data = self.stream.char() | ||
| 1638 | if data == "'": | ||
| 1639 | self.state = self.afterDoctypeSystemIdentifierState | ||
| 1640 | elif data == "\u0000": | ||
| 1641 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1642 | "data": "invalid-codepoint"}) | ||
| 1643 | self.currentToken["systemId"] += "\uFFFD" | ||
| 1644 | elif data == ">": | ||
| 1645 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1646 | "unexpected-end-of-doctype"}) | ||
| 1647 | self.currentToken["correct"] = False | ||
| 1648 | self.tokenQueue.append(self.currentToken) | ||
| 1649 | self.state = self.dataState | ||
| 1650 | elif data is EOF: | ||
| 1651 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1652 | "eof-in-doctype"}) | ||
| 1653 | self.currentToken["correct"] = False | ||
| 1654 | self.tokenQueue.append(self.currentToken) | ||
| 1655 | self.state = self.dataState | ||
| 1656 | else: | ||
| 1657 | self.currentToken["systemId"] += data | ||
| 1658 | return True | ||
| 1659 | |||
| 1660 | def afterDoctypeSystemIdentifierState(self): | ||
| 1661 | data = self.stream.char() | ||
| 1662 | if data in spaceCharacters: | ||
| 1663 | pass | ||
| 1664 | elif data == ">": | ||
| 1665 | self.tokenQueue.append(self.currentToken) | ||
| 1666 | self.state = self.dataState | ||
| 1667 | elif data is EOF: | ||
| 1668 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1669 | "eof-in-doctype"}) | ||
| 1670 | self.currentToken["correct"] = False | ||
| 1671 | self.tokenQueue.append(self.currentToken) | ||
| 1672 | self.state = self.dataState | ||
| 1673 | else: | ||
| 1674 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
| 1675 | "unexpected-char-in-doctype"}) | ||
| 1676 | self.state = self.bogusDoctypeState | ||
| 1677 | return True | ||
| 1678 | |||
| 1679 | def bogusDoctypeState(self): | ||
| 1680 | data = self.stream.char() | ||
| 1681 | if data == ">": | ||
| 1682 | self.tokenQueue.append(self.currentToken) | ||
| 1683 | self.state = self.dataState | ||
| 1684 | elif data is EOF: | ||
| 1685 | # XXX EMIT | ||
| 1686 | self.stream.unget(data) | ||
| 1687 | self.tokenQueue.append(self.currentToken) | ||
| 1688 | self.state = self.dataState | ||
| 1689 | else: | ||
| 1690 | pass | ||
| 1691 | return True | ||
| 1692 | |||
| 1693 | def cdataSectionState(self): | ||
| 1694 | data = [] | ||
| 1695 | while True: | ||
| 1696 | data.append(self.stream.charsUntil("]")) | ||
| 1697 | data.append(self.stream.charsUntil(">")) | ||
| 1698 | char = self.stream.char() | ||
| 1699 | if char == EOF: | ||
| 1700 | break | ||
| 1701 | else: | ||
| 1702 | assert char == ">" | ||
| 1703 | if data[-1][-2:] == "]]": | ||
| 1704 | data[-1] = data[-1][:-2] | ||
| 1705 | break | ||
| 1706 | else: | ||
| 1707 | data.append(char) | ||
| 1708 | |||
| 1709 | data = "".join(data) # pylint:disable=redefined-variable-type | ||
| 1710 | # Deal with null here rather than in the parser | ||
| 1711 | nullCount = data.count("\u0000") | ||
| 1712 | if nullCount > 0: | ||
| 1713 | for _ in range(nullCount): | ||
| 1714 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
| 1715 | "data": "invalid-codepoint"}) | ||
| 1716 | data = data.replace("\u0000", "\uFFFD") | ||
| 1717 | if data: | ||
| 1718 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
| 1719 | "data": data}) | ||
| 1720 | self.state = self.dataState | ||
| 1721 | return True | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/__init__.py new file mode 100644 index 0000000..ccc70bd --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/__init__.py | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | from .py import Trie as PyTrie | ||
| 4 | |||
| 5 | Trie = PyTrie | ||
| 6 | |||
| 7 | # pylint:disable=wrong-import-position | ||
| 8 | try: | ||
| 9 | from .datrie import Trie as DATrie | ||
| 10 | except ImportError: | ||
| 11 | pass | ||
| 12 | else: | ||
| 13 | Trie = DATrie | ||
| 14 | # pylint:enable=wrong-import-position | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/_base.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/_base.py new file mode 100644 index 0000000..ecfff32 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/_base.py | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | from collections import Mapping | ||
| 4 | |||
| 5 | |||
| 6 | class Trie(Mapping): | ||
| 7 | """Abstract base class for tries""" | ||
| 8 | |||
| 9 | def keys(self, prefix=None): | ||
| 10 | # pylint:disable=arguments-differ | ||
| 11 | keys = super(Trie, self).keys() | ||
| 12 | |||
| 13 | if prefix is None: | ||
| 14 | return set(keys) | ||
| 15 | |||
| 16 | return {x for x in keys if x.startswith(prefix)} | ||
| 17 | |||
| 18 | def has_keys_with_prefix(self, prefix): | ||
| 19 | for key in self.keys(): | ||
| 20 | if key.startswith(prefix): | ||
| 21 | return True | ||
| 22 | |||
| 23 | return False | ||
| 24 | |||
| 25 | def longest_prefix(self, prefix): | ||
| 26 | if prefix in self: | ||
| 27 | return prefix | ||
| 28 | |||
| 29 | for i in range(1, len(prefix) + 1): | ||
| 30 | if prefix[:-i] in self: | ||
| 31 | return prefix[:-i] | ||
| 32 | |||
| 33 | raise KeyError(prefix) | ||
| 34 | |||
| 35 | def longest_prefix_item(self, prefix): | ||
| 36 | lprefix = self.longest_prefix(prefix) | ||
| 37 | return (lprefix, self[lprefix]) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/datrie.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/datrie.py new file mode 100644 index 0000000..cb1af60 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/datrie.py | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | from datrie import Trie as DATrie | ||
| 4 | from pip._vendor.six import text_type | ||
| 5 | |||
| 6 | from ._base import Trie as ABCTrie | ||
| 7 | |||
| 8 | |||
| 9 | class Trie(ABCTrie): | ||
| 10 | def __init__(self, data): | ||
| 11 | chars = set() | ||
| 12 | for key in data.keys(): | ||
| 13 | if not isinstance(key, text_type): | ||
| 14 | raise TypeError("All keys must be strings") | ||
| 15 | for char in key: | ||
| 16 | chars.add(char) | ||
| 17 | |||
| 18 | self._data = DATrie("".join(chars)) | ||
| 19 | for key, value in data.items(): | ||
| 20 | self._data[key] = value | ||
| 21 | |||
| 22 | def __contains__(self, key): | ||
| 23 | return key in self._data | ||
| 24 | |||
| 25 | def __len__(self): | ||
| 26 | return len(self._data) | ||
| 27 | |||
| 28 | def __iter__(self): | ||
| 29 | raise NotImplementedError() | ||
| 30 | |||
| 31 | def __getitem__(self, key): | ||
| 32 | return self._data[key] | ||
| 33 | |||
| 34 | def keys(self, prefix=None): | ||
| 35 | return self._data.keys(prefix) | ||
| 36 | |||
| 37 | def has_keys_with_prefix(self, prefix): | ||
| 38 | return self._data.has_keys_with_prefix(prefix) | ||
| 39 | |||
| 40 | def longest_prefix(self, prefix): | ||
| 41 | return self._data.longest_prefix(prefix) | ||
| 42 | |||
| 43 | def longest_prefix_item(self, prefix): | ||
| 44 | return self._data.longest_prefix_item(prefix) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/py.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/py.py new file mode 100644 index 0000000..5531263 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/py.py | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | from pip._vendor.six import text_type | ||
| 3 | |||
| 4 | from bisect import bisect_left | ||
| 5 | |||
| 6 | from ._base import Trie as ABCTrie | ||
| 7 | |||
| 8 | |||
| 9 | class Trie(ABCTrie): | ||
| 10 | def __init__(self, data): | ||
| 11 | if not all(isinstance(x, text_type) for x in data.keys()): | ||
| 12 | raise TypeError("All keys must be strings") | ||
| 13 | |||
| 14 | self._data = data | ||
| 15 | self._keys = sorted(data.keys()) | ||
| 16 | self._cachestr = "" | ||
| 17 | self._cachepoints = (0, len(data)) | ||
| 18 | |||
| 19 | def __contains__(self, key): | ||
| 20 | return key in self._data | ||
| 21 | |||
| 22 | def __len__(self): | ||
| 23 | return len(self._data) | ||
| 24 | |||
| 25 | def __iter__(self): | ||
| 26 | return iter(self._data) | ||
| 27 | |||
| 28 | def __getitem__(self, key): | ||
| 29 | return self._data[key] | ||
| 30 | |||
| 31 | def keys(self, prefix=None): | ||
| 32 | if prefix is None or prefix == "" or not self._keys: | ||
| 33 | return set(self._keys) | ||
| 34 | |||
| 35 | if prefix.startswith(self._cachestr): | ||
| 36 | lo, hi = self._cachepoints | ||
| 37 | start = i = bisect_left(self._keys, prefix, lo, hi) | ||
| 38 | else: | ||
| 39 | start = i = bisect_left(self._keys, prefix) | ||
| 40 | |||
| 41 | keys = set() | ||
| 42 | if start == len(self._keys): | ||
| 43 | return keys | ||
| 44 | |||
| 45 | while self._keys[i].startswith(prefix): | ||
| 46 | keys.add(self._keys[i]) | ||
| 47 | i += 1 | ||
| 48 | |||
| 49 | self._cachestr = prefix | ||
| 50 | self._cachepoints = (start, i) | ||
| 51 | |||
| 52 | return keys | ||
| 53 | |||
| 54 | def has_keys_with_prefix(self, prefix): | ||
| 55 | if prefix in self._data: | ||
| 56 | return True | ||
| 57 | |||
| 58 | if prefix.startswith(self._cachestr): | ||
| 59 | lo, hi = self._cachepoints | ||
| 60 | i = bisect_left(self._keys, prefix, lo, hi) | ||
| 61 | else: | ||
| 62 | i = bisect_left(self._keys, prefix) | ||
| 63 | |||
| 64 | if i == len(self._keys): | ||
| 65 | return False | ||
| 66 | |||
| 67 | return self._keys[i].startswith(prefix) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_utils.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_utils.py new file mode 100644 index 0000000..a559fa0 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_utils.py | |||
| @@ -0,0 +1,124 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | from types import ModuleType | ||
| 4 | |||
| 5 | from pip._vendor.six import text_type | ||
| 6 | |||
| 7 | try: | ||
| 8 | import xml.etree.cElementTree as default_etree | ||
| 9 | except ImportError: | ||
| 10 | import xml.etree.ElementTree as default_etree | ||
| 11 | |||
| 12 | |||
| 13 | __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", | ||
| 14 | "surrogatePairToCodepoint", "moduleFactoryFactory", | ||
| 15 | "supports_lone_surrogates"] | ||
| 16 | |||
| 17 | |||
| 18 | # Platforms not supporting lone surrogates (\uD800-\uDFFF) should be | ||
| 19 | # caught by the below test. In general this would be any platform | ||
| 20 | # using UTF-16 as its encoding of unicode strings, such as | ||
| 21 | # Jython. This is because UTF-16 itself is based on the use of such | ||
| 22 | # surrogates, and there is no mechanism to further escape such | ||
| 23 | # escapes. | ||
| 24 | try: | ||
| 25 | _x = eval('"\\uD800"') # pylint:disable=eval-used | ||
| 26 | if not isinstance(_x, text_type): | ||
| 27 | # We need this with u"" because of http://bugs.jython.org/issue2039 | ||
| 28 | _x = eval('u"\\uD800"') # pylint:disable=eval-used | ||
| 29 | assert isinstance(_x, text_type) | ||
| 30 | except: # pylint:disable=bare-except | ||
| 31 | supports_lone_surrogates = False | ||
| 32 | else: | ||
| 33 | supports_lone_surrogates = True | ||
| 34 | |||
| 35 | |||
| 36 | class MethodDispatcher(dict): | ||
| 37 | """Dict with 2 special properties: | ||
| 38 | |||
| 39 | On initiation, keys that are lists, sets or tuples are converted to | ||
| 40 | multiple keys so accessing any one of the items in the original | ||
| 41 | list-like object returns the matching value | ||
| 42 | |||
| 43 | md = MethodDispatcher({("foo", "bar"):"baz"}) | ||
| 44 | md["foo"] == "baz" | ||
| 45 | |||
| 46 | A default value which can be set through the default attribute. | ||
| 47 | """ | ||
| 48 | |||
| 49 | def __init__(self, items=()): | ||
| 50 | # Using _dictEntries instead of directly assigning to self is about | ||
| 51 | # twice as fast. Please do careful performance testing before changing | ||
| 52 | # anything here. | ||
| 53 | _dictEntries = [] | ||
| 54 | for name, value in items: | ||
| 55 | if isinstance(name, (list, tuple, frozenset, set)): | ||
| 56 | for item in name: | ||
| 57 | _dictEntries.append((item, value)) | ||
| 58 | else: | ||
| 59 | _dictEntries.append((name, value)) | ||
| 60 | dict.__init__(self, _dictEntries) | ||
| 61 | assert len(self) == len(_dictEntries) | ||
| 62 | self.default = None | ||
| 63 | |||
| 64 | def __getitem__(self, key): | ||
| 65 | return dict.get(self, key, self.default) | ||
| 66 | |||
| 67 | |||
| 68 | # Some utility functions to deal with weirdness around UCS2 vs UCS4 | ||
| 69 | # python builds | ||
| 70 | |||
| 71 | def isSurrogatePair(data): | ||
| 72 | return (len(data) == 2 and | ||
| 73 | ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and | ||
| 74 | ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF) | ||
| 75 | |||
| 76 | |||
| 77 | def surrogatePairToCodepoint(data): | ||
| 78 | char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + | ||
| 79 | (ord(data[1]) - 0xDC00)) | ||
| 80 | return char_val | ||
| 81 | |||
| 82 | # Module Factory Factory (no, this isn't Java, I know) | ||
| 83 | # Here to stop this being duplicated all over the place. | ||
| 84 | |||
| 85 | |||
| 86 | def moduleFactoryFactory(factory): | ||
| 87 | moduleCache = {} | ||
| 88 | |||
| 89 | def moduleFactory(baseModule, *args, **kwargs): | ||
| 90 | if isinstance(ModuleType.__name__, type("")): | ||
| 91 | name = "_%s_factory" % baseModule.__name__ | ||
| 92 | else: | ||
| 93 | name = b"_%s_factory" % baseModule.__name__ | ||
| 94 | |||
| 95 | kwargs_tuple = tuple(kwargs.items()) | ||
| 96 | |||
| 97 | try: | ||
| 98 | return moduleCache[name][args][kwargs_tuple] | ||
| 99 | except KeyError: | ||
| 100 | mod = ModuleType(name) | ||
| 101 | objs = factory(baseModule, *args, **kwargs) | ||
| 102 | mod.__dict__.update(objs) | ||
| 103 | if "name" not in moduleCache: | ||
| 104 | moduleCache[name] = {} | ||
| 105 | if "args" not in moduleCache[name]: | ||
| 106 | moduleCache[name][args] = {} | ||
| 107 | if "kwargs" not in moduleCache[name][args]: | ||
| 108 | moduleCache[name][args][kwargs_tuple] = {} | ||
| 109 | moduleCache[name][args][kwargs_tuple] = mod | ||
| 110 | return mod | ||
| 111 | |||
| 112 | return moduleFactory | ||
| 113 | |||
| 114 | |||
| 115 | def memoize(func): | ||
| 116 | cache = {} | ||
| 117 | |||
| 118 | def wrapped(*args, **kwargs): | ||
| 119 | key = (tuple(args), tuple(kwargs.items())) | ||
| 120 | if key not in cache: | ||
| 121 | cache[key] = func(*args, **kwargs) | ||
| 122 | return cache[key] | ||
| 123 | |||
| 124 | return wrapped | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/constants.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/constants.py new file mode 100644 index 0000000..bca155e --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/constants.py | |||
| @@ -0,0 +1,2947 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | import string | ||
| 4 | |||
| 5 | EOF = None | ||
| 6 | |||
| 7 | E = { | ||
| 8 | "null-character": | ||
| 9 | "Null character in input stream, replaced with U+FFFD.", | ||
| 10 | "invalid-codepoint": | ||
| 11 | "Invalid codepoint in stream.", | ||
| 12 | "incorrectly-placed-solidus": | ||
| 13 | "Solidus (/) incorrectly placed in tag.", | ||
| 14 | "incorrect-cr-newline-entity": | ||
| 15 | "Incorrect CR newline entity, replaced with LF.", | ||
| 16 | "illegal-windows-1252-entity": | ||
| 17 | "Entity used with illegal number (windows-1252 reference).", | ||
| 18 | "cant-convert-numeric-entity": | ||
| 19 | "Numeric entity couldn't be converted to character " | ||
| 20 | "(codepoint U+%(charAsInt)08x).", | ||
| 21 | "illegal-codepoint-for-numeric-entity": | ||
| 22 | "Numeric entity represents an illegal codepoint: " | ||
| 23 | "U+%(charAsInt)08x.", | ||
| 24 | "numeric-entity-without-semicolon": | ||
| 25 | "Numeric entity didn't end with ';'.", | ||
| 26 | "expected-numeric-entity-but-got-eof": | ||
| 27 | "Numeric entity expected. Got end of file instead.", | ||
| 28 | "expected-numeric-entity": | ||
| 29 | "Numeric entity expected but none found.", | ||
| 30 | "named-entity-without-semicolon": | ||
| 31 | "Named entity didn't end with ';'.", | ||
| 32 | "expected-named-entity": | ||
| 33 | "Named entity expected. Got none.", | ||
| 34 | "attributes-in-end-tag": | ||
| 35 | "End tag contains unexpected attributes.", | ||
| 36 | 'self-closing-flag-on-end-tag': | ||
| 37 | "End tag contains unexpected self-closing flag.", | ||
| 38 | "expected-tag-name-but-got-right-bracket": | ||
| 39 | "Expected tag name. Got '>' instead.", | ||
| 40 | "expected-tag-name-but-got-question-mark": | ||
| 41 | "Expected tag name. Got '?' instead. (HTML doesn't " | ||
| 42 | "support processing instructions.)", | ||
| 43 | "expected-tag-name": | ||
| 44 | "Expected tag name. Got something else instead", | ||
| 45 | "expected-closing-tag-but-got-right-bracket": | ||
| 46 | "Expected closing tag. Got '>' instead. Ignoring '</>'.", | ||
| 47 | "expected-closing-tag-but-got-eof": | ||
| 48 | "Expected closing tag. Unexpected end of file.", | ||
| 49 | "expected-closing-tag-but-got-char": | ||
| 50 | "Expected closing tag. Unexpected character '%(data)s' found.", | ||
| 51 | "eof-in-tag-name": | ||
| 52 | "Unexpected end of file in the tag name.", | ||
| 53 | "expected-attribute-name-but-got-eof": | ||
| 54 | "Unexpected end of file. Expected attribute name instead.", | ||
| 55 | "eof-in-attribute-name": | ||
| 56 | "Unexpected end of file in attribute name.", | ||
| 57 | "invalid-character-in-attribute-name": | ||
| 58 | "Invalid character in attribute name", | ||
| 59 | "duplicate-attribute": | ||
| 60 | "Dropped duplicate attribute on tag.", | ||
| 61 | "expected-end-of-tag-name-but-got-eof": | ||
| 62 | "Unexpected end of file. Expected = or end of tag.", | ||
| 63 | "expected-attribute-value-but-got-eof": | ||
| 64 | "Unexpected end of file. Expected attribute value.", | ||
| 65 | "expected-attribute-value-but-got-right-bracket": | ||
| 66 | "Expected attribute value. Got '>' instead.", | ||
| 67 | 'equals-in-unquoted-attribute-value': | ||
| 68 | "Unexpected = in unquoted attribute", | ||
| 69 | 'unexpected-character-in-unquoted-attribute-value': | ||
| 70 | "Unexpected character in unquoted attribute", | ||
| 71 | "invalid-character-after-attribute-name": | ||
| 72 | "Unexpected character after attribute name.", | ||
| 73 | "unexpected-character-after-attribute-value": | ||
| 74 | "Unexpected character after attribute value.", | ||
| 75 | "eof-in-attribute-value-double-quote": | ||
| 76 | "Unexpected end of file in attribute value (\").", | ||
| 77 | "eof-in-attribute-value-single-quote": | ||
| 78 | "Unexpected end of file in attribute value (').", | ||
| 79 | "eof-in-attribute-value-no-quotes": | ||
| 80 | "Unexpected end of file in attribute value.", | ||
| 81 | "unexpected-EOF-after-solidus-in-tag": | ||
| 82 | "Unexpected end of file in tag. Expected >", | ||
| 83 | "unexpected-character-after-solidus-in-tag": | ||
| 84 | "Unexpected character after / in tag. Expected >", | ||
| 85 | "expected-dashes-or-doctype": | ||
| 86 | "Expected '--' or 'DOCTYPE'. Not found.", | ||
| 87 | "unexpected-bang-after-double-dash-in-comment": | ||
| 88 | "Unexpected ! after -- in comment", | ||
| 89 | "unexpected-space-after-double-dash-in-comment": | ||
| 90 | "Unexpected space after -- in comment", | ||
| 91 | "incorrect-comment": | ||
| 92 | "Incorrect comment.", | ||
| 93 | "eof-in-comment": | ||
| 94 | "Unexpected end of file in comment.", | ||
| 95 | "eof-in-comment-end-dash": | ||
| 96 | "Unexpected end of file in comment (-)", | ||
| 97 | "unexpected-dash-after-double-dash-in-comment": | ||
| 98 | "Unexpected '-' after '--' found in comment.", | ||
| 99 | "eof-in-comment-double-dash": | ||
| 100 | "Unexpected end of file in comment (--).", | ||
| 101 | "eof-in-comment-end-space-state": | ||
| 102 | "Unexpected end of file in comment.", | ||
| 103 | "eof-in-comment-end-bang-state": | ||
| 104 | "Unexpected end of file in comment.", | ||
| 105 | "unexpected-char-in-comment": | ||
| 106 | "Unexpected character in comment found.", | ||
| 107 | "need-space-after-doctype": | ||
| 108 | "No space after literal string 'DOCTYPE'.", | ||
| 109 | "expected-doctype-name-but-got-right-bracket": | ||
| 110 | "Unexpected > character. Expected DOCTYPE name.", | ||
| 111 | "expected-doctype-name-but-got-eof": | ||
| 112 | "Unexpected end of file. Expected DOCTYPE name.", | ||
| 113 | "eof-in-doctype-name": | ||
| 114 | "Unexpected end of file in DOCTYPE name.", | ||
| 115 | "eof-in-doctype": | ||
| 116 | "Unexpected end of file in DOCTYPE.", | ||
| 117 | "expected-space-or-right-bracket-in-doctype": | ||
| 118 | "Expected space or '>'. Got '%(data)s'", | ||
| 119 | "unexpected-end-of-doctype": | ||
| 120 | "Unexpected end of DOCTYPE.", | ||
| 121 | "unexpected-char-in-doctype": | ||
| 122 | "Unexpected character in DOCTYPE.", | ||
| 123 | "eof-in-innerhtml": | ||
| 124 | "XXX innerHTML EOF", | ||
| 125 | "unexpected-doctype": | ||
| 126 | "Unexpected DOCTYPE. Ignored.", | ||
| 127 | "non-html-root": | ||
| 128 | "html needs to be the first start tag.", | ||
| 129 | "expected-doctype-but-got-eof": | ||
| 130 | "Unexpected End of file. Expected DOCTYPE.", | ||
| 131 | "unknown-doctype": | ||
| 132 | "Erroneous DOCTYPE.", | ||
| 133 | "expected-doctype-but-got-chars": | ||
| 134 | "Unexpected non-space characters. Expected DOCTYPE.", | ||
| 135 | "expected-doctype-but-got-start-tag": | ||
| 136 | "Unexpected start tag (%(name)s). Expected DOCTYPE.", | ||
| 137 | "expected-doctype-but-got-end-tag": | ||
| 138 | "Unexpected end tag (%(name)s). Expected DOCTYPE.", | ||
| 139 | "end-tag-after-implied-root": | ||
| 140 | "Unexpected end tag (%(name)s) after the (implied) root element.", | ||
| 141 | "expected-named-closing-tag-but-got-eof": | ||
| 142 | "Unexpected end of file. Expected end tag (%(name)s).", | ||
| 143 | "two-heads-are-not-better-than-one": | ||
| 144 | "Unexpected start tag head in existing head. Ignored.", | ||
| 145 | "unexpected-end-tag": | ||
| 146 | "Unexpected end tag (%(name)s). Ignored.", | ||
| 147 | "unexpected-start-tag-out-of-my-head": | ||
| 148 | "Unexpected start tag (%(name)s) that can be in head. Moved.", | ||
| 149 | "unexpected-start-tag": | ||
| 150 | "Unexpected start tag (%(name)s).", | ||
| 151 | "missing-end-tag": | ||
| 152 | "Missing end tag (%(name)s).", | ||
| 153 | "missing-end-tags": | ||
| 154 | "Missing end tags (%(name)s).", | ||
| 155 | "unexpected-start-tag-implies-end-tag": | ||
| 156 | "Unexpected start tag (%(startName)s) " | ||
| 157 | "implies end tag (%(endName)s).", | ||
| 158 | "unexpected-start-tag-treated-as": | ||
| 159 | "Unexpected start tag (%(originalName)s). Treated as %(newName)s.", | ||
| 160 | "deprecated-tag": | ||
| 161 | "Unexpected start tag %(name)s. Don't use it!", | ||
| 162 | "unexpected-start-tag-ignored": | ||
| 163 | "Unexpected start tag %(name)s. Ignored.", | ||
| 164 | "expected-one-end-tag-but-got-another": | ||
| 165 | "Unexpected end tag (%(gotName)s). " | ||
| 166 | "Missing end tag (%(expectedName)s).", | ||
| 167 | "end-tag-too-early": | ||
| 168 | "End tag (%(name)s) seen too early. Expected other end tag.", | ||
| 169 | "end-tag-too-early-named": | ||
| 170 | "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).", | ||
| 171 | "end-tag-too-early-ignored": | ||
| 172 | "End tag (%(name)s) seen too early. Ignored.", | ||
| 173 | "adoption-agency-1.1": | ||
| 174 | "End tag (%(name)s) violates step 1, " | ||
| 175 | "paragraph 1 of the adoption agency algorithm.", | ||
| 176 | "adoption-agency-1.2": | ||
| 177 | "End tag (%(name)s) violates step 1, " | ||
| 178 | "paragraph 2 of the adoption agency algorithm.", | ||
| 179 | "adoption-agency-1.3": | ||
| 180 | "End tag (%(name)s) violates step 1, " | ||
| 181 | "paragraph 3 of the adoption agency algorithm.", | ||
| 182 | "adoption-agency-4.4": | ||
| 183 | "End tag (%(name)s) violates step 4, " | ||
| 184 | "paragraph 4 of the adoption agency algorithm.", | ||
| 185 | "unexpected-end-tag-treated-as": | ||
| 186 | "Unexpected end tag (%(originalName)s). Treated as %(newName)s.", | ||
| 187 | "no-end-tag": | ||
| 188 | "This element (%(name)s) has no end tag.", | ||
| 189 | "unexpected-implied-end-tag-in-table": | ||
| 190 | "Unexpected implied end tag (%(name)s) in the table phase.", | ||
| 191 | "unexpected-implied-end-tag-in-table-body": | ||
| 192 | "Unexpected implied end tag (%(name)s) in the table body phase.", | ||
| 193 | "unexpected-char-implies-table-voodoo": | ||
| 194 | "Unexpected non-space characters in " | ||
| 195 | "table context caused voodoo mode.", | ||
| 196 | "unexpected-hidden-input-in-table": | ||
| 197 | "Unexpected input with type hidden in table context.", | ||
| 198 | "unexpected-form-in-table": | ||
| 199 | "Unexpected form in table context.", | ||
| 200 | "unexpected-start-tag-implies-table-voodoo": | ||
| 201 | "Unexpected start tag (%(name)s) in " | ||
| 202 | "table context caused voodoo mode.", | ||
| 203 | "unexpected-end-tag-implies-table-voodoo": | ||
| 204 | "Unexpected end tag (%(name)s) in " | ||
| 205 | "table context caused voodoo mode.", | ||
| 206 | "unexpected-cell-in-table-body": | ||
| 207 | "Unexpected table cell start tag (%(name)s) " | ||
| 208 | "in the table body phase.", | ||
| 209 | "unexpected-cell-end-tag": | ||
| 210 | "Got table cell end tag (%(name)s) " | ||
| 211 | "while required end tags are missing.", | ||
| 212 | "unexpected-end-tag-in-table-body": | ||
| 213 | "Unexpected end tag (%(name)s) in the table body phase. Ignored.", | ||
| 214 | "unexpected-implied-end-tag-in-table-row": | ||
| 215 | "Unexpected implied end tag (%(name)s) in the table row phase.", | ||
| 216 | "unexpected-end-tag-in-table-row": | ||
| 217 | "Unexpected end tag (%(name)s) in the table row phase. Ignored.", | ||
| 218 | "unexpected-select-in-select": | ||
| 219 | "Unexpected select start tag in the select phase " | ||
| 220 | "treated as select end tag.", | ||
| 221 | "unexpected-input-in-select": | ||
| 222 | "Unexpected input start tag in the select phase.", | ||
| 223 | "unexpected-start-tag-in-select": | ||
| 224 | "Unexpected start tag token (%(name)s in the select phase. " | ||
| 225 | "Ignored.", | ||
| 226 | "unexpected-end-tag-in-select": | ||
| 227 | "Unexpected end tag (%(name)s) in the select phase. Ignored.", | ||
| 228 | "unexpected-table-element-start-tag-in-select-in-table": | ||
| 229 | "Unexpected table element start tag (%(name)s) in the select in table phase.", | ||
| 230 | "unexpected-table-element-end-tag-in-select-in-table": | ||
| 231 | "Unexpected table element end tag (%(name)s) in the select in table phase.", | ||
| 232 | "unexpected-char-after-body": | ||
| 233 | "Unexpected non-space characters in the after body phase.", | ||
| 234 | "unexpected-start-tag-after-body": | ||
| 235 | "Unexpected start tag token (%(name)s)" | ||
| 236 | " in the after body phase.", | ||
| 237 | "unexpected-end-tag-after-body": | ||
| 238 | "Unexpected end tag token (%(name)s)" | ||
| 239 | " in the after body phase.", | ||
| 240 | "unexpected-char-in-frameset": | ||
| 241 | "Unexpected characters in the frameset phase. Characters ignored.", | ||
| 242 | "unexpected-start-tag-in-frameset": | ||
| 243 | "Unexpected start tag token (%(name)s)" | ||
| 244 | " in the frameset phase. Ignored.", | ||
| 245 | "unexpected-frameset-in-frameset-innerhtml": | ||
| 246 | "Unexpected end tag token (frameset) " | ||
| 247 | "in the frameset phase (innerHTML).", | ||
| 248 | "unexpected-end-tag-in-frameset": | ||
| 249 | "Unexpected end tag token (%(name)s)" | ||
| 250 | " in the frameset phase. Ignored.", | ||
| 251 | "unexpected-char-after-frameset": | ||
| 252 | "Unexpected non-space characters in the " | ||
| 253 | "after frameset phase. Ignored.", | ||
| 254 | "unexpected-start-tag-after-frameset": | ||
| 255 | "Unexpected start tag (%(name)s)" | ||
| 256 | " in the after frameset phase. Ignored.", | ||
| 257 | "unexpected-end-tag-after-frameset": | ||
| 258 | "Unexpected end tag (%(name)s)" | ||
| 259 | " in the after frameset phase. Ignored.", | ||
| 260 | "unexpected-end-tag-after-body-innerhtml": | ||
| 261 | "Unexpected end tag after body(innerHtml)", | ||
| 262 | "expected-eof-but-got-char": | ||
| 263 | "Unexpected non-space characters. Expected end of file.", | ||
| 264 | "expected-eof-but-got-start-tag": | ||
| 265 | "Unexpected start tag (%(name)s)" | ||
| 266 | ". Expected end of file.", | ||
| 267 | "expected-eof-but-got-end-tag": | ||
| 268 | "Unexpected end tag (%(name)s)" | ||
| 269 | ". Expected end of file.", | ||
| 270 | "eof-in-table": | ||
| 271 | "Unexpected end of file. Expected table content.", | ||
| 272 | "eof-in-select": | ||
| 273 | "Unexpected end of file. Expected select content.", | ||
| 274 | "eof-in-frameset": | ||
| 275 | "Unexpected end of file. Expected frameset content.", | ||
| 276 | "eof-in-script-in-script": | ||
| 277 | "Unexpected end of file. Expected script content.", | ||
| 278 | "eof-in-foreign-lands": | ||
| 279 | "Unexpected end of file. Expected foreign content", | ||
| 280 | "non-void-element-with-trailing-solidus": | ||
| 281 | "Trailing solidus not allowed on element %(name)s", | ||
| 282 | "unexpected-html-element-in-foreign-content": | ||
| 283 | "Element %(name)s not allowed in a non-html context", | ||
| 284 | "unexpected-end-tag-before-html": | ||
| 285 | "Unexpected end tag (%(name)s) before html.", | ||
| 286 | "unexpected-inhead-noscript-tag": | ||
| 287 | "Element %(name)s not allowed in a inhead-noscript context", | ||
| 288 | "eof-in-head-noscript": | ||
| 289 | "Unexpected end of file. Expected inhead-noscript content", | ||
| 290 | "char-in-head-noscript": | ||
| 291 | "Unexpected non-space character. Expected inhead-noscript content", | ||
| 292 | "XXX-undefined-error": | ||
| 293 | "Undefined error (this sucks and should be fixed)", | ||
| 294 | } | ||
| 295 | |||
| 296 | namespaces = { | ||
| 297 | "html": "http://www.w3.org/1999/xhtml", | ||
| 298 | "mathml": "http://www.w3.org/1998/Math/MathML", | ||
| 299 | "svg": "http://www.w3.org/2000/svg", | ||
| 300 | "xlink": "http://www.w3.org/1999/xlink", | ||
| 301 | "xml": "http://www.w3.org/XML/1998/namespace", | ||
| 302 | "xmlns": "http://www.w3.org/2000/xmlns/" | ||
| 303 | } | ||
| 304 | |||
| 305 | scopingElements = frozenset([ | ||
| 306 | (namespaces["html"], "applet"), | ||
| 307 | (namespaces["html"], "caption"), | ||
| 308 | (namespaces["html"], "html"), | ||
| 309 | (namespaces["html"], "marquee"), | ||
| 310 | (namespaces["html"], "object"), | ||
| 311 | (namespaces["html"], "table"), | ||
| 312 | (namespaces["html"], "td"), | ||
| 313 | (namespaces["html"], "th"), | ||
| 314 | (namespaces["mathml"], "mi"), | ||
| 315 | (namespaces["mathml"], "mo"), | ||
| 316 | (namespaces["mathml"], "mn"), | ||
| 317 | (namespaces["mathml"], "ms"), | ||
| 318 | (namespaces["mathml"], "mtext"), | ||
| 319 | (namespaces["mathml"], "annotation-xml"), | ||
| 320 | (namespaces["svg"], "foreignObject"), | ||
| 321 | (namespaces["svg"], "desc"), | ||
| 322 | (namespaces["svg"], "title"), | ||
| 323 | ]) | ||
| 324 | |||
| 325 | formattingElements = frozenset([ | ||
| 326 | (namespaces["html"], "a"), | ||
| 327 | (namespaces["html"], "b"), | ||
| 328 | (namespaces["html"], "big"), | ||
| 329 | (namespaces["html"], "code"), | ||
| 330 | (namespaces["html"], "em"), | ||
| 331 | (namespaces["html"], "font"), | ||
| 332 | (namespaces["html"], "i"), | ||
| 333 | (namespaces["html"], "nobr"), | ||
| 334 | (namespaces["html"], "s"), | ||
| 335 | (namespaces["html"], "small"), | ||
| 336 | (namespaces["html"], "strike"), | ||
| 337 | (namespaces["html"], "strong"), | ||
| 338 | (namespaces["html"], "tt"), | ||
| 339 | (namespaces["html"], "u") | ||
| 340 | ]) | ||
| 341 | |||
| 342 | specialElements = frozenset([ | ||
| 343 | (namespaces["html"], "address"), | ||
| 344 | (namespaces["html"], "applet"), | ||
| 345 | (namespaces["html"], "area"), | ||
| 346 | (namespaces["html"], "article"), | ||
| 347 | (namespaces["html"], "aside"), | ||
| 348 | (namespaces["html"], "base"), | ||
| 349 | (namespaces["html"], "basefont"), | ||
| 350 | (namespaces["html"], "bgsound"), | ||
| 351 | (namespaces["html"], "blockquote"), | ||
| 352 | (namespaces["html"], "body"), | ||
| 353 | (namespaces["html"], "br"), | ||
| 354 | (namespaces["html"], "button"), | ||
| 355 | (namespaces["html"], "caption"), | ||
| 356 | (namespaces["html"], "center"), | ||
| 357 | (namespaces["html"], "col"), | ||
| 358 | (namespaces["html"], "colgroup"), | ||
| 359 | (namespaces["html"], "command"), | ||
| 360 | (namespaces["html"], "dd"), | ||
| 361 | (namespaces["html"], "details"), | ||
| 362 | (namespaces["html"], "dir"), | ||
| 363 | (namespaces["html"], "div"), | ||
| 364 | (namespaces["html"], "dl"), | ||
| 365 | (namespaces["html"], "dt"), | ||
| 366 | (namespaces["html"], "embed"), | ||
| 367 | (namespaces["html"], "fieldset"), | ||
| 368 | (namespaces["html"], "figure"), | ||
| 369 | (namespaces["html"], "footer"), | ||
| 370 | (namespaces["html"], "form"), | ||
| 371 | (namespaces["html"], "frame"), | ||
| 372 | (namespaces["html"], "frameset"), | ||
| 373 | (namespaces["html"], "h1"), | ||
| 374 | (namespaces["html"], "h2"), | ||
| 375 | (namespaces["html"], "h3"), | ||
| 376 | (namespaces["html"], "h4"), | ||
| 377 | (namespaces["html"], "h5"), | ||
| 378 | (namespaces["html"], "h6"), | ||
| 379 | (namespaces["html"], "head"), | ||
| 380 | (namespaces["html"], "header"), | ||
| 381 | (namespaces["html"], "hr"), | ||
| 382 | (namespaces["html"], "html"), | ||
| 383 | (namespaces["html"], "iframe"), | ||
| 384 | # Note that image is commented out in the spec as "this isn't an | ||
| 385 | # element that can end up on the stack, so it doesn't matter," | ||
| 386 | (namespaces["html"], "image"), | ||
| 387 | (namespaces["html"], "img"), | ||
| 388 | (namespaces["html"], "input"), | ||
| 389 | (namespaces["html"], "isindex"), | ||
| 390 | (namespaces["html"], "li"), | ||
| 391 | (namespaces["html"], "link"), | ||
| 392 | (namespaces["html"], "listing"), | ||
| 393 | (namespaces["html"], "marquee"), | ||
| 394 | (namespaces["html"], "menu"), | ||
| 395 | (namespaces["html"], "meta"), | ||
| 396 | (namespaces["html"], "nav"), | ||
| 397 | (namespaces["html"], "noembed"), | ||
| 398 | (namespaces["html"], "noframes"), | ||
| 399 | (namespaces["html"], "noscript"), | ||
| 400 | (namespaces["html"], "object"), | ||
| 401 | (namespaces["html"], "ol"), | ||
| 402 | (namespaces["html"], "p"), | ||
| 403 | (namespaces["html"], "param"), | ||
| 404 | (namespaces["html"], "plaintext"), | ||
| 405 | (namespaces["html"], "pre"), | ||
| 406 | (namespaces["html"], "script"), | ||
| 407 | (namespaces["html"], "section"), | ||
| 408 | (namespaces["html"], "select"), | ||
| 409 | (namespaces["html"], "style"), | ||
| 410 | (namespaces["html"], "table"), | ||
| 411 | (namespaces["html"], "tbody"), | ||
| 412 | (namespaces["html"], "td"), | ||
| 413 | (namespaces["html"], "textarea"), | ||
| 414 | (namespaces["html"], "tfoot"), | ||
| 415 | (namespaces["html"], "th"), | ||
| 416 | (namespaces["html"], "thead"), | ||
| 417 | (namespaces["html"], "title"), | ||
| 418 | (namespaces["html"], "tr"), | ||
| 419 | (namespaces["html"], "ul"), | ||
| 420 | (namespaces["html"], "wbr"), | ||
| 421 | (namespaces["html"], "xmp"), | ||
| 422 | (namespaces["svg"], "foreignObject") | ||
| 423 | ]) | ||
| 424 | |||
| 425 | htmlIntegrationPointElements = frozenset([ | ||
| 426 | (namespaces["mathml"], "annotation-xml"), | ||
| 427 | (namespaces["svg"], "foreignObject"), | ||
| 428 | (namespaces["svg"], "desc"), | ||
| 429 | (namespaces["svg"], "title") | ||
| 430 | ]) | ||
| 431 | |||
| 432 | mathmlTextIntegrationPointElements = frozenset([ | ||
| 433 | (namespaces["mathml"], "mi"), | ||
| 434 | (namespaces["mathml"], "mo"), | ||
| 435 | (namespaces["mathml"], "mn"), | ||
| 436 | (namespaces["mathml"], "ms"), | ||
| 437 | (namespaces["mathml"], "mtext") | ||
| 438 | ]) | ||
| 439 | |||
| 440 | adjustSVGAttributes = { | ||
| 441 | "attributename": "attributeName", | ||
| 442 | "attributetype": "attributeType", | ||
| 443 | "basefrequency": "baseFrequency", | ||
| 444 | "baseprofile": "baseProfile", | ||
| 445 | "calcmode": "calcMode", | ||
| 446 | "clippathunits": "clipPathUnits", | ||
| 447 | "contentscripttype": "contentScriptType", | ||
| 448 | "contentstyletype": "contentStyleType", | ||
| 449 | "diffuseconstant": "diffuseConstant", | ||
| 450 | "edgemode": "edgeMode", | ||
| 451 | "externalresourcesrequired": "externalResourcesRequired", | ||
| 452 | "filterres": "filterRes", | ||
| 453 | "filterunits": "filterUnits", | ||
| 454 | "glyphref": "glyphRef", | ||
| 455 | "gradienttransform": "gradientTransform", | ||
| 456 | "gradientunits": "gradientUnits", | ||
| 457 | "kernelmatrix": "kernelMatrix", | ||
| 458 | "kernelunitlength": "kernelUnitLength", | ||
| 459 | "keypoints": "keyPoints", | ||
| 460 | "keysplines": "keySplines", | ||
| 461 | "keytimes": "keyTimes", | ||
| 462 | "lengthadjust": "lengthAdjust", | ||
| 463 | "limitingconeangle": "limitingConeAngle", | ||
| 464 | "markerheight": "markerHeight", | ||
| 465 | "markerunits": "markerUnits", | ||
| 466 | "markerwidth": "markerWidth", | ||
| 467 | "maskcontentunits": "maskContentUnits", | ||
| 468 | "maskunits": "maskUnits", | ||
| 469 | "numoctaves": "numOctaves", | ||
| 470 | "pathlength": "pathLength", | ||
| 471 | "patterncontentunits": "patternContentUnits", | ||
| 472 | "patterntransform": "patternTransform", | ||
| 473 | "patternunits": "patternUnits", | ||
| 474 | "pointsatx": "pointsAtX", | ||
| 475 | "pointsaty": "pointsAtY", | ||
| 476 | "pointsatz": "pointsAtZ", | ||
| 477 | "preservealpha": "preserveAlpha", | ||
| 478 | "preserveaspectratio": "preserveAspectRatio", | ||
| 479 | "primitiveunits": "primitiveUnits", | ||
| 480 | "refx": "refX", | ||
| 481 | "refy": "refY", | ||
| 482 | "repeatcount": "repeatCount", | ||
| 483 | "repeatdur": "repeatDur", | ||
| 484 | "requiredextensions": "requiredExtensions", | ||
| 485 | "requiredfeatures": "requiredFeatures", | ||
| 486 | "specularconstant": "specularConstant", | ||
| 487 | "specularexponent": "specularExponent", | ||
| 488 | "spreadmethod": "spreadMethod", | ||
| 489 | "startoffset": "startOffset", | ||
| 490 | "stddeviation": "stdDeviation", | ||
| 491 | "stitchtiles": "stitchTiles", | ||
| 492 | "surfacescale": "surfaceScale", | ||
| 493 | "systemlanguage": "systemLanguage", | ||
| 494 | "tablevalues": "tableValues", | ||
| 495 | "targetx": "targetX", | ||
| 496 | "targety": "targetY", | ||
| 497 | "textlength": "textLength", | ||
| 498 | "viewbox": "viewBox", | ||
| 499 | "viewtarget": "viewTarget", | ||
| 500 | "xchannelselector": "xChannelSelector", | ||
| 501 | "ychannelselector": "yChannelSelector", | ||
| 502 | "zoomandpan": "zoomAndPan" | ||
| 503 | } | ||
| 504 | |||
| 505 | adjustMathMLAttributes = {"definitionurl": "definitionURL"} | ||
| 506 | |||
| 507 | adjustForeignAttributes = { | ||
| 508 | "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), | ||
| 509 | "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]), | ||
| 510 | "xlink:href": ("xlink", "href", namespaces["xlink"]), | ||
| 511 | "xlink:role": ("xlink", "role", namespaces["xlink"]), | ||
| 512 | "xlink:show": ("xlink", "show", namespaces["xlink"]), | ||
| 513 | "xlink:title": ("xlink", "title", namespaces["xlink"]), | ||
| 514 | "xlink:type": ("xlink", "type", namespaces["xlink"]), | ||
| 515 | "xml:base": ("xml", "base", namespaces["xml"]), | ||
| 516 | "xml:lang": ("xml", "lang", namespaces["xml"]), | ||
| 517 | "xml:space": ("xml", "space", namespaces["xml"]), | ||
| 518 | "xmlns": (None, "xmlns", namespaces["xmlns"]), | ||
| 519 | "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"]) | ||
| 520 | } | ||
| 521 | |||
| 522 | unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in | ||
| 523 | adjustForeignAttributes.items()]) | ||
| 524 | |||
| 525 | spaceCharacters = frozenset([ | ||
| 526 | "\t", | ||
| 527 | "\n", | ||
| 528 | "\u000C", | ||
| 529 | " ", | ||
| 530 | "\r" | ||
| 531 | ]) | ||
| 532 | |||
| 533 | tableInsertModeElements = frozenset([ | ||
| 534 | "table", | ||
| 535 | "tbody", | ||
| 536 | "tfoot", | ||
| 537 | "thead", | ||
| 538 | "tr" | ||
| 539 | ]) | ||
| 540 | |||
| 541 | asciiLowercase = frozenset(string.ascii_lowercase) | ||
| 542 | asciiUppercase = frozenset(string.ascii_uppercase) | ||
| 543 | asciiLetters = frozenset(string.ascii_letters) | ||
| 544 | digits = frozenset(string.digits) | ||
| 545 | hexDigits = frozenset(string.hexdigits) | ||
| 546 | |||
| 547 | asciiUpper2Lower = dict([(ord(c), ord(c.lower())) | ||
| 548 | for c in string.ascii_uppercase]) | ||
| 549 | |||
| 550 | # Heading elements need to be ordered | ||
| 551 | headingElements = ( | ||
| 552 | "h1", | ||
| 553 | "h2", | ||
| 554 | "h3", | ||
| 555 | "h4", | ||
| 556 | "h5", | ||
| 557 | "h6" | ||
| 558 | ) | ||
| 559 | |||
| 560 | voidElements = frozenset([ | ||
| 561 | "base", | ||
| 562 | "command", | ||
| 563 | "event-source", | ||
| 564 | "link", | ||
| 565 | "meta", | ||
| 566 | "hr", | ||
| 567 | "br", | ||
| 568 | "img", | ||
| 569 | "embed", | ||
| 570 | "param", | ||
| 571 | "area", | ||
| 572 | "col", | ||
| 573 | "input", | ||
| 574 | "source", | ||
| 575 | "track" | ||
| 576 | ]) | ||
| 577 | |||
| 578 | cdataElements = frozenset(['title', 'textarea']) | ||
| 579 | |||
| 580 | rcdataElements = frozenset([ | ||
| 581 | 'style', | ||
| 582 | 'script', | ||
| 583 | 'xmp', | ||
| 584 | 'iframe', | ||
| 585 | 'noembed', | ||
| 586 | 'noframes', | ||
| 587 | 'noscript' | ||
| 588 | ]) | ||
| 589 | |||
| 590 | booleanAttributes = { | ||
| 591 | "": frozenset(["irrelevant", "itemscope"]), | ||
| 592 | "style": frozenset(["scoped"]), | ||
| 593 | "img": frozenset(["ismap"]), | ||
| 594 | "audio": frozenset(["autoplay", "controls"]), | ||
| 595 | "video": frozenset(["autoplay", "controls"]), | ||
| 596 | "script": frozenset(["defer", "async"]), | ||
| 597 | "details": frozenset(["open"]), | ||
| 598 | "datagrid": frozenset(["multiple", "disabled"]), | ||
| 599 | "command": frozenset(["hidden", "disabled", "checked", "default"]), | ||
| 600 | "hr": frozenset(["noshade"]), | ||
| 601 | "menu": frozenset(["autosubmit"]), | ||
| 602 | "fieldset": frozenset(["disabled", "readonly"]), | ||
| 603 | "option": frozenset(["disabled", "readonly", "selected"]), | ||
| 604 | "optgroup": frozenset(["disabled", "readonly"]), | ||
| 605 | "button": frozenset(["disabled", "autofocus"]), | ||
| 606 | "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]), | ||
| 607 | "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]), | ||
| 608 | "output": frozenset(["disabled", "readonly"]), | ||
| 609 | "iframe": frozenset(["seamless"]), | ||
| 610 | } | ||
| 611 | |||
| 612 | # entitiesWindows1252 has to be _ordered_ and needs to have an index. It | ||
| 613 | # therefore can't be a frozenset. | ||
| 614 | entitiesWindows1252 = ( | ||
| 615 | 8364, # 0x80 0x20AC EURO SIGN | ||
| 616 | 65533, # 0x81 UNDEFINED | ||
| 617 | 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK | ||
| 618 | 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK | ||
| 619 | 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK | ||
| 620 | 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS | ||
| 621 | 8224, # 0x86 0x2020 DAGGER | ||
| 622 | 8225, # 0x87 0x2021 DOUBLE DAGGER | ||
| 623 | 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT | ||
| 624 | 8240, # 0x89 0x2030 PER MILLE SIGN | ||
| 625 | 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON | ||
| 626 | 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK | ||
| 627 | 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE | ||
| 628 | 65533, # 0x8D UNDEFINED | ||
| 629 | 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON | ||
| 630 | 65533, # 0x8F UNDEFINED | ||
| 631 | 65533, # 0x90 UNDEFINED | ||
| 632 | 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK | ||
| 633 | 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK | ||
| 634 | 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK | ||
| 635 | 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK | ||
| 636 | 8226, # 0x95 0x2022 BULLET | ||
| 637 | 8211, # 0x96 0x2013 EN DASH | ||
| 638 | 8212, # 0x97 0x2014 EM DASH | ||
| 639 | 732, # 0x98 0x02DC SMALL TILDE | ||
| 640 | 8482, # 0x99 0x2122 TRADE MARK SIGN | ||
| 641 | 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON | ||
| 642 | 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK | ||
| 643 | 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE | ||
| 644 | 65533, # 0x9D UNDEFINED | ||
| 645 | 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON | ||
| 646 | 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS | ||
| 647 | ) | ||
| 648 | |||
| 649 | xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;']) | ||
| 650 | |||
| 651 | entities = { | ||
| 652 | "AElig": "\xc6", | ||
| 653 | "AElig;": "\xc6", | ||
| 654 | "AMP": "&", | ||
| 655 | "AMP;": "&", | ||
| 656 | "Aacute": "\xc1", | ||
| 657 | "Aacute;": "\xc1", | ||
| 658 | "Abreve;": "\u0102", | ||
| 659 | "Acirc": "\xc2", | ||
| 660 | "Acirc;": "\xc2", | ||
| 661 | "Acy;": "\u0410", | ||
| 662 | "Afr;": "\U0001d504", | ||
| 663 | "Agrave": "\xc0", | ||
| 664 | "Agrave;": "\xc0", | ||
| 665 | "Alpha;": "\u0391", | ||
| 666 | "Amacr;": "\u0100", | ||
| 667 | "And;": "\u2a53", | ||
| 668 | "Aogon;": "\u0104", | ||
| 669 | "Aopf;": "\U0001d538", | ||
| 670 | "ApplyFunction;": "\u2061", | ||
| 671 | "Aring": "\xc5", | ||
| 672 | "Aring;": "\xc5", | ||
| 673 | "Ascr;": "\U0001d49c", | ||
| 674 | "Assign;": "\u2254", | ||
| 675 | "Atilde": "\xc3", | ||
| 676 | "Atilde;": "\xc3", | ||
| 677 | "Auml": "\xc4", | ||
| 678 | "Auml;": "\xc4", | ||
| 679 | "Backslash;": "\u2216", | ||
| 680 | "Barv;": "\u2ae7", | ||
| 681 | "Barwed;": "\u2306", | ||
| 682 | "Bcy;": "\u0411", | ||
| 683 | "Because;": "\u2235", | ||
| 684 | "Bernoullis;": "\u212c", | ||
| 685 | "Beta;": "\u0392", | ||
| 686 | "Bfr;": "\U0001d505", | ||
| 687 | "Bopf;": "\U0001d539", | ||
| 688 | "Breve;": "\u02d8", | ||
| 689 | "Bscr;": "\u212c", | ||
| 690 | "Bumpeq;": "\u224e", | ||
| 691 | "CHcy;": "\u0427", | ||
| 692 | "COPY": "\xa9", | ||
| 693 | "COPY;": "\xa9", | ||
| 694 | "Cacute;": "\u0106", | ||
| 695 | "Cap;": "\u22d2", | ||
| 696 | "CapitalDifferentialD;": "\u2145", | ||
| 697 | "Cayleys;": "\u212d", | ||
| 698 | "Ccaron;": "\u010c", | ||
| 699 | "Ccedil": "\xc7", | ||
| 700 | "Ccedil;": "\xc7", | ||
| 701 | "Ccirc;": "\u0108", | ||
| 702 | "Cconint;": "\u2230", | ||
| 703 | "Cdot;": "\u010a", | ||
| 704 | "Cedilla;": "\xb8", | ||
| 705 | "CenterDot;": "\xb7", | ||
| 706 | "Cfr;": "\u212d", | ||
| 707 | "Chi;": "\u03a7", | ||
| 708 | "CircleDot;": "\u2299", | ||
| 709 | "CircleMinus;": "\u2296", | ||
| 710 | "CirclePlus;": "\u2295", | ||
| 711 | "CircleTimes;": "\u2297", | ||
| 712 | "ClockwiseContourIntegral;": "\u2232", | ||
| 713 | "CloseCurlyDoubleQuote;": "\u201d", | ||
| 714 | "CloseCurlyQuote;": "\u2019", | ||
| 715 | "Colon;": "\u2237", | ||
| 716 | "Colone;": "\u2a74", | ||
| 717 | "Congruent;": "\u2261", | ||
| 718 | "Conint;": "\u222f", | ||
| 719 | "ContourIntegral;": "\u222e", | ||
| 720 | "Copf;": "\u2102", | ||
| 721 | "Coproduct;": "\u2210", | ||
| 722 | "CounterClockwiseContourIntegral;": "\u2233", | ||
| 723 | "Cross;": "\u2a2f", | ||
| 724 | "Cscr;": "\U0001d49e", | ||
| 725 | "Cup;": "\u22d3", | ||
| 726 | "CupCap;": "\u224d", | ||
| 727 | "DD;": "\u2145", | ||
| 728 | "DDotrahd;": "\u2911", | ||
| 729 | "DJcy;": "\u0402", | ||
| 730 | "DScy;": "\u0405", | ||
| 731 | "DZcy;": "\u040f", | ||
| 732 | "Dagger;": "\u2021", | ||
| 733 | "Darr;": "\u21a1", | ||
| 734 | "Dashv;": "\u2ae4", | ||
| 735 | "Dcaron;": "\u010e", | ||
| 736 | "Dcy;": "\u0414", | ||
| 737 | "Del;": "\u2207", | ||
| 738 | "Delta;": "\u0394", | ||
| 739 | "Dfr;": "\U0001d507", | ||
| 740 | "DiacriticalAcute;": "\xb4", | ||
| 741 | "DiacriticalDot;": "\u02d9", | ||
| 742 | "DiacriticalDoubleAcute;": "\u02dd", | ||
| 743 | "DiacriticalGrave;": "`", | ||
| 744 | "DiacriticalTilde;": "\u02dc", | ||
| 745 | "Diamond;": "\u22c4", | ||
| 746 | "DifferentialD;": "\u2146", | ||
| 747 | "Dopf;": "\U0001d53b", | ||
| 748 | "Dot;": "\xa8", | ||
| 749 | "DotDot;": "\u20dc", | ||
| 750 | "DotEqual;": "\u2250", | ||
| 751 | "DoubleContourIntegral;": "\u222f", | ||
| 752 | "DoubleDot;": "\xa8", | ||
| 753 | "DoubleDownArrow;": "\u21d3", | ||
| 754 | "DoubleLeftArrow;": "\u21d0", | ||
| 755 | "DoubleLeftRightArrow;": "\u21d4", | ||
| 756 | "DoubleLeftTee;": "\u2ae4", | ||
| 757 | "DoubleLongLeftArrow;": "\u27f8", | ||
| 758 | "DoubleLongLeftRightArrow;": "\u27fa", | ||
| 759 | "DoubleLongRightArrow;": "\u27f9", | ||
| 760 | "DoubleRightArrow;": "\u21d2", | ||
| 761 | "DoubleRightTee;": "\u22a8", | ||
| 762 | "DoubleUpArrow;": "\u21d1", | ||
| 763 | "DoubleUpDownArrow;": "\u21d5", | ||
| 764 | "DoubleVerticalBar;": "\u2225", | ||
| 765 | "DownArrow;": "\u2193", | ||
| 766 | "DownArrowBar;": "\u2913", | ||
| 767 | "DownArrowUpArrow;": "\u21f5", | ||
| 768 | "DownBreve;": "\u0311", | ||
| 769 | "DownLeftRightVector;": "\u2950", | ||
| 770 | "DownLeftTeeVector;": "\u295e", | ||
| 771 | "DownLeftVector;": "\u21bd", | ||
| 772 | "DownLeftVectorBar;": "\u2956", | ||
| 773 | "DownRightTeeVector;": "\u295f", | ||
| 774 | "DownRightVector;": "\u21c1", | ||
| 775 | "DownRightVectorBar;": "\u2957", | ||
| 776 | "DownTee;": "\u22a4", | ||
| 777 | "DownTeeArrow;": "\u21a7", | ||
| 778 | "Downarrow;": "\u21d3", | ||
| 779 | "Dscr;": "\U0001d49f", | ||
| 780 | "Dstrok;": "\u0110", | ||
| 781 | "ENG;": "\u014a", | ||
| 782 | "ETH": "\xd0", | ||
| 783 | "ETH;": "\xd0", | ||
| 784 | "Eacute": "\xc9", | ||
| 785 | "Eacute;": "\xc9", | ||
| 786 | "Ecaron;": "\u011a", | ||
| 787 | "Ecirc": "\xca", | ||
| 788 | "Ecirc;": "\xca", | ||
| 789 | "Ecy;": "\u042d", | ||
| 790 | "Edot;": "\u0116", | ||
| 791 | "Efr;": "\U0001d508", | ||
| 792 | "Egrave": "\xc8", | ||
| 793 | "Egrave;": "\xc8", | ||
| 794 | "Element;": "\u2208", | ||
| 795 | "Emacr;": "\u0112", | ||
| 796 | "EmptySmallSquare;": "\u25fb", | ||
| 797 | "EmptyVerySmallSquare;": "\u25ab", | ||
| 798 | "Eogon;": "\u0118", | ||
| 799 | "Eopf;": "\U0001d53c", | ||
| 800 | "Epsilon;": "\u0395", | ||
| 801 | "Equal;": "\u2a75", | ||
| 802 | "EqualTilde;": "\u2242", | ||
| 803 | "Equilibrium;": "\u21cc", | ||
| 804 | "Escr;": "\u2130", | ||
| 805 | "Esim;": "\u2a73", | ||
| 806 | "Eta;": "\u0397", | ||
| 807 | "Euml": "\xcb", | ||
| 808 | "Euml;": "\xcb", | ||
| 809 | "Exists;": "\u2203", | ||
| 810 | "ExponentialE;": "\u2147", | ||
| 811 | "Fcy;": "\u0424", | ||
| 812 | "Ffr;": "\U0001d509", | ||
| 813 | "FilledSmallSquare;": "\u25fc", | ||
| 814 | "FilledVerySmallSquare;": "\u25aa", | ||
| 815 | "Fopf;": "\U0001d53d", | ||
| 816 | "ForAll;": "\u2200", | ||
| 817 | "Fouriertrf;": "\u2131", | ||
| 818 | "Fscr;": "\u2131", | ||
| 819 | "GJcy;": "\u0403", | ||
| 820 | "GT": ">", | ||
| 821 | "GT;": ">", | ||
| 822 | "Gamma;": "\u0393", | ||
| 823 | "Gammad;": "\u03dc", | ||
| 824 | "Gbreve;": "\u011e", | ||
| 825 | "Gcedil;": "\u0122", | ||
| 826 | "Gcirc;": "\u011c", | ||
| 827 | "Gcy;": "\u0413", | ||
| 828 | "Gdot;": "\u0120", | ||
| 829 | "Gfr;": "\U0001d50a", | ||
| 830 | "Gg;": "\u22d9", | ||
| 831 | "Gopf;": "\U0001d53e", | ||
| 832 | "GreaterEqual;": "\u2265", | ||
| 833 | "GreaterEqualLess;": "\u22db", | ||
| 834 | "GreaterFullEqual;": "\u2267", | ||
| 835 | "GreaterGreater;": "\u2aa2", | ||
| 836 | "GreaterLess;": "\u2277", | ||
| 837 | "GreaterSlantEqual;": "\u2a7e", | ||
| 838 | "GreaterTilde;": "\u2273", | ||
| 839 | "Gscr;": "\U0001d4a2", | ||
| 840 | "Gt;": "\u226b", | ||
| 841 | "HARDcy;": "\u042a", | ||
| 842 | "Hacek;": "\u02c7", | ||
| 843 | "Hat;": "^", | ||
| 844 | "Hcirc;": "\u0124", | ||
| 845 | "Hfr;": "\u210c", | ||
| 846 | "HilbertSpace;": "\u210b", | ||
| 847 | "Hopf;": "\u210d", | ||
| 848 | "HorizontalLine;": "\u2500", | ||
| 849 | "Hscr;": "\u210b", | ||
| 850 | "Hstrok;": "\u0126", | ||
| 851 | "HumpDownHump;": "\u224e", | ||
| 852 | "HumpEqual;": "\u224f", | ||
| 853 | "IEcy;": "\u0415", | ||
| 854 | "IJlig;": "\u0132", | ||
| 855 | "IOcy;": "\u0401", | ||
| 856 | "Iacute": "\xcd", | ||
| 857 | "Iacute;": "\xcd", | ||
| 858 | "Icirc": "\xce", | ||
| 859 | "Icirc;": "\xce", | ||
| 860 | "Icy;": "\u0418", | ||
| 861 | "Idot;": "\u0130", | ||
| 862 | "Ifr;": "\u2111", | ||
| 863 | "Igrave": "\xcc", | ||
| 864 | "Igrave;": "\xcc", | ||
| 865 | "Im;": "\u2111", | ||
| 866 | "Imacr;": "\u012a", | ||
| 867 | "ImaginaryI;": "\u2148", | ||
| 868 | "Implies;": "\u21d2", | ||
| 869 | "Int;": "\u222c", | ||
| 870 | "Integral;": "\u222b", | ||
| 871 | "Intersection;": "\u22c2", | ||
| 872 | "InvisibleComma;": "\u2063", | ||
| 873 | "InvisibleTimes;": "\u2062", | ||
| 874 | "Iogon;": "\u012e", | ||
| 875 | "Iopf;": "\U0001d540", | ||
| 876 | "Iota;": "\u0399", | ||
| 877 | "Iscr;": "\u2110", | ||
| 878 | "Itilde;": "\u0128", | ||
| 879 | "Iukcy;": "\u0406", | ||
| 880 | "Iuml": "\xcf", | ||
| 881 | "Iuml;": "\xcf", | ||
| 882 | "Jcirc;": "\u0134", | ||
| 883 | "Jcy;": "\u0419", | ||
| 884 | "Jfr;": "\U0001d50d", | ||
| 885 | "Jopf;": "\U0001d541", | ||
| 886 | "Jscr;": "\U0001d4a5", | ||
| 887 | "Jsercy;": "\u0408", | ||
| 888 | "Jukcy;": "\u0404", | ||
| 889 | "KHcy;": "\u0425", | ||
| 890 | "KJcy;": "\u040c", | ||
| 891 | "Kappa;": "\u039a", | ||
| 892 | "Kcedil;": "\u0136", | ||
| 893 | "Kcy;": "\u041a", | ||
| 894 | "Kfr;": "\U0001d50e", | ||
| 895 | "Kopf;": "\U0001d542", | ||
| 896 | "Kscr;": "\U0001d4a6", | ||
| 897 | "LJcy;": "\u0409", | ||
| 898 | "LT": "<", | ||
| 899 | "LT;": "<", | ||
| 900 | "Lacute;": "\u0139", | ||
| 901 | "Lambda;": "\u039b", | ||
| 902 | "Lang;": "\u27ea", | ||
| 903 | "Laplacetrf;": "\u2112", | ||
| 904 | "Larr;": "\u219e", | ||
| 905 | "Lcaron;": "\u013d", | ||
| 906 | "Lcedil;": "\u013b", | ||
| 907 | "Lcy;": "\u041b", | ||
| 908 | "LeftAngleBracket;": "\u27e8", | ||
| 909 | "LeftArrow;": "\u2190", | ||
| 910 | "LeftArrowBar;": "\u21e4", | ||
| 911 | "LeftArrowRightArrow;": "\u21c6", | ||
| 912 | "LeftCeiling;": "\u2308", | ||
| 913 | "LeftDoubleBracket;": "\u27e6", | ||
| 914 | "LeftDownTeeVector;": "\u2961", | ||
| 915 | "LeftDownVector;": "\u21c3", | ||
| 916 | "LeftDownVectorBar;": "\u2959", | ||
| 917 | "LeftFloor;": "\u230a", | ||
| 918 | "LeftRightArrow;": "\u2194", | ||
| 919 | "LeftRightVector;": "\u294e", | ||
| 920 | "LeftTee;": "\u22a3", | ||
| 921 | "LeftTeeArrow;": "\u21a4", | ||
| 922 | "LeftTeeVector;": "\u295a", | ||
| 923 | "LeftTriangle;": "\u22b2", | ||
| 924 | "LeftTriangleBar;": "\u29cf", | ||
| 925 | "LeftTriangleEqual;": "\u22b4", | ||
| 926 | "LeftUpDownVector;": "\u2951", | ||
| 927 | "LeftUpTeeVector;": "\u2960", | ||
| 928 | "LeftUpVector;": "\u21bf", | ||
| 929 | "LeftUpVectorBar;": "\u2958", | ||
| 930 | "LeftVector;": "\u21bc", | ||
| 931 | "LeftVectorBar;": "\u2952", | ||
| 932 | "Leftarrow;": "\u21d0", | ||
| 933 | "Leftrightarrow;": "\u21d4", | ||
| 934 | "LessEqualGreater;": "\u22da", | ||
| 935 | "LessFullEqual;": "\u2266", | ||
| 936 | "LessGreater;": "\u2276", | ||
| 937 | "LessLess;": "\u2aa1", | ||
| 938 | "LessSlantEqual;": "\u2a7d", | ||
| 939 | "LessTilde;": "\u2272", | ||
| 940 | "Lfr;": "\U0001d50f", | ||
| 941 | "Ll;": "\u22d8", | ||
| 942 | "Lleftarrow;": "\u21da", | ||
| 943 | "Lmidot;": "\u013f", | ||
| 944 | "LongLeftArrow;": "\u27f5", | ||
| 945 | "LongLeftRightArrow;": "\u27f7", | ||
| 946 | "LongRightArrow;": "\u27f6", | ||
| 947 | "Longleftarrow;": "\u27f8", | ||
| 948 | "Longleftrightarrow;": "\u27fa", | ||
| 949 | "Longrightarrow;": "\u27f9", | ||
| 950 | "Lopf;": "\U0001d543", | ||
| 951 | "LowerLeftArrow;": "\u2199", | ||
| 952 | "LowerRightArrow;": "\u2198", | ||
| 953 | "Lscr;": "\u2112", | ||
| 954 | "Lsh;": "\u21b0", | ||
| 955 | "Lstrok;": "\u0141", | ||
| 956 | "Lt;": "\u226a", | ||
| 957 | "Map;": "\u2905", | ||
| 958 | "Mcy;": "\u041c", | ||
| 959 | "MediumSpace;": "\u205f", | ||
| 960 | "Mellintrf;": "\u2133", | ||
| 961 | "Mfr;": "\U0001d510", | ||
| 962 | "MinusPlus;": "\u2213", | ||
| 963 | "Mopf;": "\U0001d544", | ||
| 964 | "Mscr;": "\u2133", | ||
| 965 | "Mu;": "\u039c", | ||
| 966 | "NJcy;": "\u040a", | ||
| 967 | "Nacute;": "\u0143", | ||
| 968 | "Ncaron;": "\u0147", | ||
| 969 | "Ncedil;": "\u0145", | ||
| 970 | "Ncy;": "\u041d", | ||
| 971 | "NegativeMediumSpace;": "\u200b", | ||
| 972 | "NegativeThickSpace;": "\u200b", | ||
| 973 | "NegativeThinSpace;": "\u200b", | ||
| 974 | "NegativeVeryThinSpace;": "\u200b", | ||
| 975 | "NestedGreaterGreater;": "\u226b", | ||
| 976 | "NestedLessLess;": "\u226a", | ||
| 977 | "NewLine;": "\n", | ||
| 978 | "Nfr;": "\U0001d511", | ||
| 979 | "NoBreak;": "\u2060", | ||
| 980 | "NonBreakingSpace;": "\xa0", | ||
| 981 | "Nopf;": "\u2115", | ||
| 982 | "Not;": "\u2aec", | ||
| 983 | "NotCongruent;": "\u2262", | ||
| 984 | "NotCupCap;": "\u226d", | ||
| 985 | "NotDoubleVerticalBar;": "\u2226", | ||
| 986 | "NotElement;": "\u2209", | ||
| 987 | "NotEqual;": "\u2260", | ||
| 988 | "NotEqualTilde;": "\u2242\u0338", | ||
| 989 | "NotExists;": "\u2204", | ||
| 990 | "NotGreater;": "\u226f", | ||
| 991 | "NotGreaterEqual;": "\u2271", | ||
| 992 | "NotGreaterFullEqual;": "\u2267\u0338", | ||
| 993 | "NotGreaterGreater;": "\u226b\u0338", | ||
| 994 | "NotGreaterLess;": "\u2279", | ||
| 995 | "NotGreaterSlantEqual;": "\u2a7e\u0338", | ||
| 996 | "NotGreaterTilde;": "\u2275", | ||
| 997 | "NotHumpDownHump;": "\u224e\u0338", | ||
| 998 | "NotHumpEqual;": "\u224f\u0338", | ||
| 999 | "NotLeftTriangle;": "\u22ea", | ||
| 1000 | "NotLeftTriangleBar;": "\u29cf\u0338", | ||
| 1001 | "NotLeftTriangleEqual;": "\u22ec", | ||
| 1002 | "NotLess;": "\u226e", | ||
| 1003 | "NotLessEqual;": "\u2270", | ||
| 1004 | "NotLessGreater;": "\u2278", | ||
| 1005 | "NotLessLess;": "\u226a\u0338", | ||
| 1006 | "NotLessSlantEqual;": "\u2a7d\u0338", | ||
| 1007 | "NotLessTilde;": "\u2274", | ||
| 1008 | "NotNestedGreaterGreater;": "\u2aa2\u0338", | ||
| 1009 | "NotNestedLessLess;": "\u2aa1\u0338", | ||
| 1010 | "NotPrecedes;": "\u2280", | ||
| 1011 | "NotPrecedesEqual;": "\u2aaf\u0338", | ||
| 1012 | "NotPrecedesSlantEqual;": "\u22e0", | ||
| 1013 | "NotReverseElement;": "\u220c", | ||
| 1014 | "NotRightTriangle;": "\u22eb", | ||
| 1015 | "NotRightTriangleBar;": "\u29d0\u0338", | ||
| 1016 | "NotRightTriangleEqual;": "\u22ed", | ||
| 1017 | "NotSquareSubset;": "\u228f\u0338", | ||
| 1018 | "NotSquareSubsetEqual;": "\u22e2", | ||
| 1019 | "NotSquareSuperset;": "\u2290\u0338", | ||
| 1020 | "NotSquareSupersetEqual;": "\u22e3", | ||
| 1021 | "NotSubset;": "\u2282\u20d2", | ||
| 1022 | "NotSubsetEqual;": "\u2288", | ||
| 1023 | "NotSucceeds;": "\u2281", | ||
| 1024 | "NotSucceedsEqual;": "\u2ab0\u0338", | ||
| 1025 | "NotSucceedsSlantEqual;": "\u22e1", | ||
| 1026 | "NotSucceedsTilde;": "\u227f\u0338", | ||
| 1027 | "NotSuperset;": "\u2283\u20d2", | ||
| 1028 | "NotSupersetEqual;": "\u2289", | ||
| 1029 | "NotTilde;": "\u2241", | ||
| 1030 | "NotTildeEqual;": "\u2244", | ||
| 1031 | "NotTildeFullEqual;": "\u2247", | ||
| 1032 | "NotTildeTilde;": "\u2249", | ||
| 1033 | "NotVerticalBar;": "\u2224", | ||
| 1034 | "Nscr;": "\U0001d4a9", | ||
| 1035 | "Ntilde": "\xd1", | ||
| 1036 | "Ntilde;": "\xd1", | ||
| 1037 | "Nu;": "\u039d", | ||
| 1038 | "OElig;": "\u0152", | ||
| 1039 | "Oacute": "\xd3", | ||
| 1040 | "Oacute;": "\xd3", | ||
| 1041 | "Ocirc": "\xd4", | ||
| 1042 | "Ocirc;": "\xd4", | ||
| 1043 | "Ocy;": "\u041e", | ||
| 1044 | "Odblac;": "\u0150", | ||
| 1045 | "Ofr;": "\U0001d512", | ||
| 1046 | "Ograve": "\xd2", | ||
| 1047 | "Ograve;": "\xd2", | ||
| 1048 | "Omacr;": "\u014c", | ||
| 1049 | "Omega;": "\u03a9", | ||
| 1050 | "Omicron;": "\u039f", | ||
| 1051 | "Oopf;": "\U0001d546", | ||
| 1052 | "OpenCurlyDoubleQuote;": "\u201c", | ||
| 1053 | "OpenCurlyQuote;": "\u2018", | ||
| 1054 | "Or;": "\u2a54", | ||
| 1055 | "Oscr;": "\U0001d4aa", | ||
| 1056 | "Oslash": "\xd8", | ||
| 1057 | "Oslash;": "\xd8", | ||
| 1058 | "Otilde": "\xd5", | ||
| 1059 | "Otilde;": "\xd5", | ||
| 1060 | "Otimes;": "\u2a37", | ||
| 1061 | "Ouml": "\xd6", | ||
| 1062 | "Ouml;": "\xd6", | ||
| 1063 | "OverBar;": "\u203e", | ||
| 1064 | "OverBrace;": "\u23de", | ||
| 1065 | "OverBracket;": "\u23b4", | ||
| 1066 | "OverParenthesis;": "\u23dc", | ||
| 1067 | "PartialD;": "\u2202", | ||
| 1068 | "Pcy;": "\u041f", | ||
| 1069 | "Pfr;": "\U0001d513", | ||
| 1070 | "Phi;": "\u03a6", | ||
| 1071 | "Pi;": "\u03a0", | ||
| 1072 | "PlusMinus;": "\xb1", | ||
| 1073 | "Poincareplane;": "\u210c", | ||
| 1074 | "Popf;": "\u2119", | ||
| 1075 | "Pr;": "\u2abb", | ||
| 1076 | "Precedes;": "\u227a", | ||
| 1077 | "PrecedesEqual;": "\u2aaf", | ||
| 1078 | "PrecedesSlantEqual;": "\u227c", | ||
| 1079 | "PrecedesTilde;": "\u227e", | ||
| 1080 | "Prime;": "\u2033", | ||
| 1081 | "Product;": "\u220f", | ||
| 1082 | "Proportion;": "\u2237", | ||
| 1083 | "Proportional;": "\u221d", | ||
| 1084 | "Pscr;": "\U0001d4ab", | ||
| 1085 | "Psi;": "\u03a8", | ||
| 1086 | "QUOT": "\"", | ||
| 1087 | "QUOT;": "\"", | ||
| 1088 | "Qfr;": "\U0001d514", | ||
| 1089 | "Qopf;": "\u211a", | ||
| 1090 | "Qscr;": "\U0001d4ac", | ||
| 1091 | "RBarr;": "\u2910", | ||
| 1092 | "REG": "\xae", | ||
| 1093 | "REG;": "\xae", | ||
| 1094 | "Racute;": "\u0154", | ||
| 1095 | "Rang;": "\u27eb", | ||
| 1096 | "Rarr;": "\u21a0", | ||
| 1097 | "Rarrtl;": "\u2916", | ||
| 1098 | "Rcaron;": "\u0158", | ||
| 1099 | "Rcedil;": "\u0156", | ||
| 1100 | "Rcy;": "\u0420", | ||
| 1101 | "Re;": "\u211c", | ||
| 1102 | "ReverseElement;": "\u220b", | ||
| 1103 | "ReverseEquilibrium;": "\u21cb", | ||
| 1104 | "ReverseUpEquilibrium;": "\u296f", | ||
| 1105 | "Rfr;": "\u211c", | ||
| 1106 | "Rho;": "\u03a1", | ||
| 1107 | "RightAngleBracket;": "\u27e9", | ||
| 1108 | "RightArrow;": "\u2192", | ||
| 1109 | "RightArrowBar;": "\u21e5", | ||
| 1110 | "RightArrowLeftArrow;": "\u21c4", | ||
| 1111 | "RightCeiling;": "\u2309", | ||
| 1112 | "RightDoubleBracket;": "\u27e7", | ||
| 1113 | "RightDownTeeVector;": "\u295d", | ||
| 1114 | "RightDownVector;": "\u21c2", | ||
| 1115 | "RightDownVectorBar;": "\u2955", | ||
| 1116 | "RightFloor;": "\u230b", | ||
| 1117 | "RightTee;": "\u22a2", | ||
| 1118 | "RightTeeArrow;": "\u21a6", | ||
| 1119 | "RightTeeVector;": "\u295b", | ||
| 1120 | "RightTriangle;": "\u22b3", | ||
| 1121 | "RightTriangleBar;": "\u29d0", | ||
| 1122 | "RightTriangleEqual;": "\u22b5", | ||
| 1123 | "RightUpDownVector;": "\u294f", | ||
| 1124 | "RightUpTeeVector;": "\u295c", | ||
| 1125 | "RightUpVector;": "\u21be", | ||
| 1126 | "RightUpVectorBar;": "\u2954", | ||
| 1127 | "RightVector;": "\u21c0", | ||
| 1128 | "RightVectorBar;": "\u2953", | ||
| 1129 | "Rightarrow;": "\u21d2", | ||
| 1130 | "Ropf;": "\u211d", | ||
| 1131 | "RoundImplies;": "\u2970", | ||
| 1132 | "Rrightarrow;": "\u21db", | ||
| 1133 | "Rscr;": "\u211b", | ||
| 1134 | "Rsh;": "\u21b1", | ||
| 1135 | "RuleDelayed;": "\u29f4", | ||
| 1136 | "SHCHcy;": "\u0429", | ||
| 1137 | "SHcy;": "\u0428", | ||
| 1138 | "SOFTcy;": "\u042c", | ||
| 1139 | "Sacute;": "\u015a", | ||
| 1140 | "Sc;": "\u2abc", | ||
| 1141 | "Scaron;": "\u0160", | ||
| 1142 | "Scedil;": "\u015e", | ||
| 1143 | "Scirc;": "\u015c", | ||
| 1144 | "Scy;": "\u0421", | ||
| 1145 | "Sfr;": "\U0001d516", | ||
| 1146 | "ShortDownArrow;": "\u2193", | ||
| 1147 | "ShortLeftArrow;": "\u2190", | ||
| 1148 | "ShortRightArrow;": "\u2192", | ||
| 1149 | "ShortUpArrow;": "\u2191", | ||
| 1150 | "Sigma;": "\u03a3", | ||
| 1151 | "SmallCircle;": "\u2218", | ||
| 1152 | "Sopf;": "\U0001d54a", | ||
| 1153 | "Sqrt;": "\u221a", | ||
| 1154 | "Square;": "\u25a1", | ||
| 1155 | "SquareIntersection;": "\u2293", | ||
| 1156 | "SquareSubset;": "\u228f", | ||
| 1157 | "SquareSubsetEqual;": "\u2291", | ||
| 1158 | "SquareSuperset;": "\u2290", | ||
| 1159 | "SquareSupersetEqual;": "\u2292", | ||
| 1160 | "SquareUnion;": "\u2294", | ||
| 1161 | "Sscr;": "\U0001d4ae", | ||
| 1162 | "Star;": "\u22c6", | ||
| 1163 | "Sub;": "\u22d0", | ||
| 1164 | "Subset;": "\u22d0", | ||
| 1165 | "SubsetEqual;": "\u2286", | ||
| 1166 | "Succeeds;": "\u227b", | ||
| 1167 | "SucceedsEqual;": "\u2ab0", | ||
| 1168 | "SucceedsSlantEqual;": "\u227d", | ||
| 1169 | "SucceedsTilde;": "\u227f", | ||
| 1170 | "SuchThat;": "\u220b", | ||
| 1171 | "Sum;": "\u2211", | ||
| 1172 | "Sup;": "\u22d1", | ||
| 1173 | "Superset;": "\u2283", | ||
| 1174 | "SupersetEqual;": "\u2287", | ||
| 1175 | "Supset;": "\u22d1", | ||
| 1176 | "THORN": "\xde", | ||
| 1177 | "THORN;": "\xde", | ||
| 1178 | "TRADE;": "\u2122", | ||
| 1179 | "TSHcy;": "\u040b", | ||
| 1180 | "TScy;": "\u0426", | ||
| 1181 | "Tab;": "\t", | ||
| 1182 | "Tau;": "\u03a4", | ||
| 1183 | "Tcaron;": "\u0164", | ||
| 1184 | "Tcedil;": "\u0162", | ||
| 1185 | "Tcy;": "\u0422", | ||
| 1186 | "Tfr;": "\U0001d517", | ||
| 1187 | "Therefore;": "\u2234", | ||
| 1188 | "Theta;": "\u0398", | ||
| 1189 | "ThickSpace;": "\u205f\u200a", | ||
| 1190 | "ThinSpace;": "\u2009", | ||
| 1191 | "Tilde;": "\u223c", | ||
| 1192 | "TildeEqual;": "\u2243", | ||
| 1193 | "TildeFullEqual;": "\u2245", | ||
| 1194 | "TildeTilde;": "\u2248", | ||
| 1195 | "Topf;": "\U0001d54b", | ||
| 1196 | "TripleDot;": "\u20db", | ||
| 1197 | "Tscr;": "\U0001d4af", | ||
| 1198 | "Tstrok;": "\u0166", | ||
| 1199 | "Uacute": "\xda", | ||
| 1200 | "Uacute;": "\xda", | ||
| 1201 | "Uarr;": "\u219f", | ||
| 1202 | "Uarrocir;": "\u2949", | ||
| 1203 | "Ubrcy;": "\u040e", | ||
| 1204 | "Ubreve;": "\u016c", | ||
| 1205 | "Ucirc": "\xdb", | ||
| 1206 | "Ucirc;": "\xdb", | ||
| 1207 | "Ucy;": "\u0423", | ||
| 1208 | "Udblac;": "\u0170", | ||
| 1209 | "Ufr;": "\U0001d518", | ||
| 1210 | "Ugrave": "\xd9", | ||
| 1211 | "Ugrave;": "\xd9", | ||
| 1212 | "Umacr;": "\u016a", | ||
| 1213 | "UnderBar;": "_", | ||
| 1214 | "UnderBrace;": "\u23df", | ||
| 1215 | "UnderBracket;": "\u23b5", | ||
| 1216 | "UnderParenthesis;": "\u23dd", | ||
| 1217 | "Union;": "\u22c3", | ||
| 1218 | "UnionPlus;": "\u228e", | ||
| 1219 | "Uogon;": "\u0172", | ||
| 1220 | "Uopf;": "\U0001d54c", | ||
| 1221 | "UpArrow;": "\u2191", | ||
| 1222 | "UpArrowBar;": "\u2912", | ||
| 1223 | "UpArrowDownArrow;": "\u21c5", | ||
| 1224 | "UpDownArrow;": "\u2195", | ||
| 1225 | "UpEquilibrium;": "\u296e", | ||
| 1226 | "UpTee;": "\u22a5", | ||
| 1227 | "UpTeeArrow;": "\u21a5", | ||
| 1228 | "Uparrow;": "\u21d1", | ||
| 1229 | "Updownarrow;": "\u21d5", | ||
| 1230 | "UpperLeftArrow;": "\u2196", | ||
| 1231 | "UpperRightArrow;": "\u2197", | ||
| 1232 | "Upsi;": "\u03d2", | ||
| 1233 | "Upsilon;": "\u03a5", | ||
| 1234 | "Uring;": "\u016e", | ||
| 1235 | "Uscr;": "\U0001d4b0", | ||
| 1236 | "Utilde;": "\u0168", | ||
| 1237 | "Uuml": "\xdc", | ||
| 1238 | "Uuml;": "\xdc", | ||
| 1239 | "VDash;": "\u22ab", | ||
| 1240 | "Vbar;": "\u2aeb", | ||
| 1241 | "Vcy;": "\u0412", | ||
| 1242 | "Vdash;": "\u22a9", | ||
| 1243 | "Vdashl;": "\u2ae6", | ||
| 1244 | "Vee;": "\u22c1", | ||
| 1245 | "Verbar;": "\u2016", | ||
| 1246 | "Vert;": "\u2016", | ||
| 1247 | "VerticalBar;": "\u2223", | ||
| 1248 | "VerticalLine;": "|", | ||
| 1249 | "VerticalSeparator;": "\u2758", | ||
| 1250 | "VerticalTilde;": "\u2240", | ||
| 1251 | "VeryThinSpace;": "\u200a", | ||
| 1252 | "Vfr;": "\U0001d519", | ||
| 1253 | "Vopf;": "\U0001d54d", | ||
| 1254 | "Vscr;": "\U0001d4b1", | ||
| 1255 | "Vvdash;": "\u22aa", | ||
| 1256 | "Wcirc;": "\u0174", | ||
| 1257 | "Wedge;": "\u22c0", | ||
| 1258 | "Wfr;": "\U0001d51a", | ||
| 1259 | "Wopf;": "\U0001d54e", | ||
| 1260 | "Wscr;": "\U0001d4b2", | ||
| 1261 | "Xfr;": "\U0001d51b", | ||
| 1262 | "Xi;": "\u039e", | ||
| 1263 | "Xopf;": "\U0001d54f", | ||
| 1264 | "Xscr;": "\U0001d4b3", | ||
| 1265 | "YAcy;": "\u042f", | ||
| 1266 | "YIcy;": "\u0407", | ||
| 1267 | "YUcy;": "\u042e", | ||
| 1268 | "Yacute": "\xdd", | ||
| 1269 | "Yacute;": "\xdd", | ||
| 1270 | "Ycirc;": "\u0176", | ||
| 1271 | "Ycy;": "\u042b", | ||
| 1272 | "Yfr;": "\U0001d51c", | ||
| 1273 | "Yopf;": "\U0001d550", | ||
| 1274 | "Yscr;": "\U0001d4b4", | ||
| 1275 | "Yuml;": "\u0178", | ||
| 1276 | "ZHcy;": "\u0416", | ||
| 1277 | "Zacute;": "\u0179", | ||
| 1278 | "Zcaron;": "\u017d", | ||
| 1279 | "Zcy;": "\u0417", | ||
| 1280 | "Zdot;": "\u017b", | ||
| 1281 | "ZeroWidthSpace;": "\u200b", | ||
| 1282 | "Zeta;": "\u0396", | ||
| 1283 | "Zfr;": "\u2128", | ||
| 1284 | "Zopf;": "\u2124", | ||
| 1285 | "Zscr;": "\U0001d4b5", | ||
| 1286 | "aacute": "\xe1", | ||
| 1287 | "aacute;": "\xe1", | ||
| 1288 | "abreve;": "\u0103", | ||
| 1289 | "ac;": "\u223e", | ||
| 1290 | "acE;": "\u223e\u0333", | ||
| 1291 | "acd;": "\u223f", | ||
| 1292 | "acirc": "\xe2", | ||
| 1293 | "acirc;": "\xe2", | ||
| 1294 | "acute": "\xb4", | ||
| 1295 | "acute;": "\xb4", | ||
| 1296 | "acy;": "\u0430", | ||
| 1297 | "aelig": "\xe6", | ||
| 1298 | "aelig;": "\xe6", | ||
| 1299 | "af;": "\u2061", | ||
| 1300 | "afr;": "\U0001d51e", | ||
| 1301 | "agrave": "\xe0", | ||
| 1302 | "agrave;": "\xe0", | ||
| 1303 | "alefsym;": "\u2135", | ||
| 1304 | "aleph;": "\u2135", | ||
| 1305 | "alpha;": "\u03b1", | ||
| 1306 | "amacr;": "\u0101", | ||
| 1307 | "amalg;": "\u2a3f", | ||
| 1308 | "amp": "&", | ||
| 1309 | "amp;": "&", | ||
| 1310 | "and;": "\u2227", | ||
| 1311 | "andand;": "\u2a55", | ||
| 1312 | "andd;": "\u2a5c", | ||
| 1313 | "andslope;": "\u2a58", | ||
| 1314 | "andv;": "\u2a5a", | ||
| 1315 | "ang;": "\u2220", | ||
| 1316 | "ange;": "\u29a4", | ||
| 1317 | "angle;": "\u2220", | ||
| 1318 | "angmsd;": "\u2221", | ||
| 1319 | "angmsdaa;": "\u29a8", | ||
| 1320 | "angmsdab;": "\u29a9", | ||
| 1321 | "angmsdac;": "\u29aa", | ||
| 1322 | "angmsdad;": "\u29ab", | ||
| 1323 | "angmsdae;": "\u29ac", | ||
| 1324 | "angmsdaf;": "\u29ad", | ||
| 1325 | "angmsdag;": "\u29ae", | ||
| 1326 | "angmsdah;": "\u29af", | ||
| 1327 | "angrt;": "\u221f", | ||
| 1328 | "angrtvb;": "\u22be", | ||
| 1329 | "angrtvbd;": "\u299d", | ||
| 1330 | "angsph;": "\u2222", | ||
| 1331 | "angst;": "\xc5", | ||
| 1332 | "angzarr;": "\u237c", | ||
| 1333 | "aogon;": "\u0105", | ||
| 1334 | "aopf;": "\U0001d552", | ||
| 1335 | "ap;": "\u2248", | ||
| 1336 | "apE;": "\u2a70", | ||
| 1337 | "apacir;": "\u2a6f", | ||
| 1338 | "ape;": "\u224a", | ||
| 1339 | "apid;": "\u224b", | ||
| 1340 | "apos;": "'", | ||
| 1341 | "approx;": "\u2248", | ||
| 1342 | "approxeq;": "\u224a", | ||
| 1343 | "aring": "\xe5", | ||
| 1344 | "aring;": "\xe5", | ||
| 1345 | "ascr;": "\U0001d4b6", | ||
| 1346 | "ast;": "*", | ||
| 1347 | "asymp;": "\u2248", | ||
| 1348 | "asympeq;": "\u224d", | ||
| 1349 | "atilde": "\xe3", | ||
| 1350 | "atilde;": "\xe3", | ||
| 1351 | "auml": "\xe4", | ||
| 1352 | "auml;": "\xe4", | ||
| 1353 | "awconint;": "\u2233", | ||
| 1354 | "awint;": "\u2a11", | ||
| 1355 | "bNot;": "\u2aed", | ||
| 1356 | "backcong;": "\u224c", | ||
| 1357 | "backepsilon;": "\u03f6", | ||
| 1358 | "backprime;": "\u2035", | ||
| 1359 | "backsim;": "\u223d", | ||
| 1360 | "backsimeq;": "\u22cd", | ||
| 1361 | "barvee;": "\u22bd", | ||
| 1362 | "barwed;": "\u2305", | ||
| 1363 | "barwedge;": "\u2305", | ||
| 1364 | "bbrk;": "\u23b5", | ||
| 1365 | "bbrktbrk;": "\u23b6", | ||
| 1366 | "bcong;": "\u224c", | ||
| 1367 | "bcy;": "\u0431", | ||
| 1368 | "bdquo;": "\u201e", | ||
| 1369 | "becaus;": "\u2235", | ||
| 1370 | "because;": "\u2235", | ||
| 1371 | "bemptyv;": "\u29b0", | ||
| 1372 | "bepsi;": "\u03f6", | ||
| 1373 | "bernou;": "\u212c", | ||
| 1374 | "beta;": "\u03b2", | ||
| 1375 | "beth;": "\u2136", | ||
| 1376 | "between;": "\u226c", | ||
| 1377 | "bfr;": "\U0001d51f", | ||
| 1378 | "bigcap;": "\u22c2", | ||
| 1379 | "bigcirc;": "\u25ef", | ||
| 1380 | "bigcup;": "\u22c3", | ||
| 1381 | "bigodot;": "\u2a00", | ||
| 1382 | "bigoplus;": "\u2a01", | ||
| 1383 | "bigotimes;": "\u2a02", | ||
| 1384 | "bigsqcup;": "\u2a06", | ||
| 1385 | "bigstar;": "\u2605", | ||
| 1386 | "bigtriangledown;": "\u25bd", | ||
| 1387 | "bigtriangleup;": "\u25b3", | ||
| 1388 | "biguplus;": "\u2a04", | ||
| 1389 | "bigvee;": "\u22c1", | ||
| 1390 | "bigwedge;": "\u22c0", | ||
| 1391 | "bkarow;": "\u290d", | ||
| 1392 | "blacklozenge;": "\u29eb", | ||
| 1393 | "blacksquare;": "\u25aa", | ||
| 1394 | "blacktriangle;": "\u25b4", | ||
| 1395 | "blacktriangledown;": "\u25be", | ||
| 1396 | "blacktriangleleft;": "\u25c2", | ||
| 1397 | "blacktriangleright;": "\u25b8", | ||
| 1398 | "blank;": "\u2423", | ||
| 1399 | "blk12;": "\u2592", | ||
| 1400 | "blk14;": "\u2591", | ||
| 1401 | "blk34;": "\u2593", | ||
| 1402 | "block;": "\u2588", | ||
| 1403 | "bne;": "=\u20e5", | ||
| 1404 | "bnequiv;": "\u2261\u20e5", | ||
| 1405 | "bnot;": "\u2310", | ||
| 1406 | "bopf;": "\U0001d553", | ||
| 1407 | "bot;": "\u22a5", | ||
| 1408 | "bottom;": "\u22a5", | ||
| 1409 | "bowtie;": "\u22c8", | ||
| 1410 | "boxDL;": "\u2557", | ||
| 1411 | "boxDR;": "\u2554", | ||
| 1412 | "boxDl;": "\u2556", | ||
| 1413 | "boxDr;": "\u2553", | ||
| 1414 | "boxH;": "\u2550", | ||
| 1415 | "boxHD;": "\u2566", | ||
| 1416 | "boxHU;": "\u2569", | ||
| 1417 | "boxHd;": "\u2564", | ||
| 1418 | "boxHu;": "\u2567", | ||
| 1419 | "boxUL;": "\u255d", | ||
| 1420 | "boxUR;": "\u255a", | ||
| 1421 | "boxUl;": "\u255c", | ||
| 1422 | "boxUr;": "\u2559", | ||
| 1423 | "boxV;": "\u2551", | ||
| 1424 | "boxVH;": "\u256c", | ||
| 1425 | "boxVL;": "\u2563", | ||
| 1426 | "boxVR;": "\u2560", | ||
| 1427 | "boxVh;": "\u256b", | ||
| 1428 | "boxVl;": "\u2562", | ||
| 1429 | "boxVr;": "\u255f", | ||
| 1430 | "boxbox;": "\u29c9", | ||
| 1431 | "boxdL;": "\u2555", | ||
| 1432 | "boxdR;": "\u2552", | ||
| 1433 | "boxdl;": "\u2510", | ||
| 1434 | "boxdr;": "\u250c", | ||
| 1435 | "boxh;": "\u2500", | ||
| 1436 | "boxhD;": "\u2565", | ||
| 1437 | "boxhU;": "\u2568", | ||
| 1438 | "boxhd;": "\u252c", | ||
| 1439 | "boxhu;": "\u2534", | ||
| 1440 | "boxminus;": "\u229f", | ||
| 1441 | "boxplus;": "\u229e", | ||
| 1442 | "boxtimes;": "\u22a0", | ||
| 1443 | "boxuL;": "\u255b", | ||
| 1444 | "boxuR;": "\u2558", | ||
| 1445 | "boxul;": "\u2518", | ||
| 1446 | "boxur;": "\u2514", | ||
| 1447 | "boxv;": "\u2502", | ||
| 1448 | "boxvH;": "\u256a", | ||
| 1449 | "boxvL;": "\u2561", | ||
| 1450 | "boxvR;": "\u255e", | ||
| 1451 | "boxvh;": "\u253c", | ||
| 1452 | "boxvl;": "\u2524", | ||
| 1453 | "boxvr;": "\u251c", | ||
| 1454 | "bprime;": "\u2035", | ||
| 1455 | "breve;": "\u02d8", | ||
| 1456 | "brvbar": "\xa6", | ||
| 1457 | "brvbar;": "\xa6", | ||
| 1458 | "bscr;": "\U0001d4b7", | ||
| 1459 | "bsemi;": "\u204f", | ||
| 1460 | "bsim;": "\u223d", | ||
| 1461 | "bsime;": "\u22cd", | ||
| 1462 | "bsol;": "\\", | ||
| 1463 | "bsolb;": "\u29c5", | ||
| 1464 | "bsolhsub;": "\u27c8", | ||
| 1465 | "bull;": "\u2022", | ||
| 1466 | "bullet;": "\u2022", | ||
| 1467 | "bump;": "\u224e", | ||
| 1468 | "bumpE;": "\u2aae", | ||
| 1469 | "bumpe;": "\u224f", | ||
| 1470 | "bumpeq;": "\u224f", | ||
| 1471 | "cacute;": "\u0107", | ||
| 1472 | "cap;": "\u2229", | ||
| 1473 | "capand;": "\u2a44", | ||
| 1474 | "capbrcup;": "\u2a49", | ||
| 1475 | "capcap;": "\u2a4b", | ||
| 1476 | "capcup;": "\u2a47", | ||
| 1477 | "capdot;": "\u2a40", | ||
| 1478 | "caps;": "\u2229\ufe00", | ||
| 1479 | "caret;": "\u2041", | ||
| 1480 | "caron;": "\u02c7", | ||
| 1481 | "ccaps;": "\u2a4d", | ||
| 1482 | "ccaron;": "\u010d", | ||
| 1483 | "ccedil": "\xe7", | ||
| 1484 | "ccedil;": "\xe7", | ||
| 1485 | "ccirc;": "\u0109", | ||
| 1486 | "ccups;": "\u2a4c", | ||
| 1487 | "ccupssm;": "\u2a50", | ||
| 1488 | "cdot;": "\u010b", | ||
| 1489 | "cedil": "\xb8", | ||
| 1490 | "cedil;": "\xb8", | ||
| 1491 | "cemptyv;": "\u29b2", | ||
| 1492 | "cent": "\xa2", | ||
| 1493 | "cent;": "\xa2", | ||
| 1494 | "centerdot;": "\xb7", | ||
| 1495 | "cfr;": "\U0001d520", | ||
| 1496 | "chcy;": "\u0447", | ||
| 1497 | "check;": "\u2713", | ||
| 1498 | "checkmark;": "\u2713", | ||
| 1499 | "chi;": "\u03c7", | ||
| 1500 | "cir;": "\u25cb", | ||
| 1501 | "cirE;": "\u29c3", | ||
| 1502 | "circ;": "\u02c6", | ||
| 1503 | "circeq;": "\u2257", | ||
| 1504 | "circlearrowleft;": "\u21ba", | ||
| 1505 | "circlearrowright;": "\u21bb", | ||
| 1506 | "circledR;": "\xae", | ||
| 1507 | "circledS;": "\u24c8", | ||
| 1508 | "circledast;": "\u229b", | ||
| 1509 | "circledcirc;": "\u229a", | ||
| 1510 | "circleddash;": "\u229d", | ||
| 1511 | "cire;": "\u2257", | ||
| 1512 | "cirfnint;": "\u2a10", | ||
| 1513 | "cirmid;": "\u2aef", | ||
| 1514 | "cirscir;": "\u29c2", | ||
| 1515 | "clubs;": "\u2663", | ||
| 1516 | "clubsuit;": "\u2663", | ||
| 1517 | "colon;": ":", | ||
| 1518 | "colone;": "\u2254", | ||
| 1519 | "coloneq;": "\u2254", | ||
| 1520 | "comma;": ",", | ||
| 1521 | "commat;": "@", | ||
| 1522 | "comp;": "\u2201", | ||
| 1523 | "compfn;": "\u2218", | ||
| 1524 | "complement;": "\u2201", | ||
| 1525 | "complexes;": "\u2102", | ||
| 1526 | "cong;": "\u2245", | ||
| 1527 | "congdot;": "\u2a6d", | ||
| 1528 | "conint;": "\u222e", | ||
| 1529 | "copf;": "\U0001d554", | ||
| 1530 | "coprod;": "\u2210", | ||
| 1531 | "copy": "\xa9", | ||
| 1532 | "copy;": "\xa9", | ||
| 1533 | "copysr;": "\u2117", | ||
| 1534 | "crarr;": "\u21b5", | ||
| 1535 | "cross;": "\u2717", | ||
| 1536 | "cscr;": "\U0001d4b8", | ||
| 1537 | "csub;": "\u2acf", | ||
| 1538 | "csube;": "\u2ad1", | ||
| 1539 | "csup;": "\u2ad0", | ||
| 1540 | "csupe;": "\u2ad2", | ||
| 1541 | "ctdot;": "\u22ef", | ||
| 1542 | "cudarrl;": "\u2938", | ||
| 1543 | "cudarrr;": "\u2935", | ||
| 1544 | "cuepr;": "\u22de", | ||
| 1545 | "cuesc;": "\u22df", | ||
| 1546 | "cularr;": "\u21b6", | ||
| 1547 | "cularrp;": "\u293d", | ||
| 1548 | "cup;": "\u222a", | ||
| 1549 | "cupbrcap;": "\u2a48", | ||
| 1550 | "cupcap;": "\u2a46", | ||
| 1551 | "cupcup;": "\u2a4a", | ||
| 1552 | "cupdot;": "\u228d", | ||
| 1553 | "cupor;": "\u2a45", | ||
| 1554 | "cups;": "\u222a\ufe00", | ||
| 1555 | "curarr;": "\u21b7", | ||
| 1556 | "curarrm;": "\u293c", | ||
| 1557 | "curlyeqprec;": "\u22de", | ||
| 1558 | "curlyeqsucc;": "\u22df", | ||
| 1559 | "curlyvee;": "\u22ce", | ||
| 1560 | "curlywedge;": "\u22cf", | ||
| 1561 | "curren": "\xa4", | ||
| 1562 | "curren;": "\xa4", | ||
| 1563 | "curvearrowleft;": "\u21b6", | ||
| 1564 | "curvearrowright;": "\u21b7", | ||
| 1565 | "cuvee;": "\u22ce", | ||
| 1566 | "cuwed;": "\u22cf", | ||
| 1567 | "cwconint;": "\u2232", | ||
| 1568 | "cwint;": "\u2231", | ||
| 1569 | "cylcty;": "\u232d", | ||
| 1570 | "dArr;": "\u21d3", | ||
| 1571 | "dHar;": "\u2965", | ||
| 1572 | "dagger;": "\u2020", | ||
| 1573 | "daleth;": "\u2138", | ||
| 1574 | "darr;": "\u2193", | ||
| 1575 | "dash;": "\u2010", | ||
| 1576 | "dashv;": "\u22a3", | ||
| 1577 | "dbkarow;": "\u290f", | ||
| 1578 | "dblac;": "\u02dd", | ||
| 1579 | "dcaron;": "\u010f", | ||
| 1580 | "dcy;": "\u0434", | ||
| 1581 | "dd;": "\u2146", | ||
| 1582 | "ddagger;": "\u2021", | ||
| 1583 | "ddarr;": "\u21ca", | ||
| 1584 | "ddotseq;": "\u2a77", | ||
| 1585 | "deg": "\xb0", | ||
| 1586 | "deg;": "\xb0", | ||
| 1587 | "delta;": "\u03b4", | ||
| 1588 | "demptyv;": "\u29b1", | ||
| 1589 | "dfisht;": "\u297f", | ||
| 1590 | "dfr;": "\U0001d521", | ||
| 1591 | "dharl;": "\u21c3", | ||
| 1592 | "dharr;": "\u21c2", | ||
| 1593 | "diam;": "\u22c4", | ||
| 1594 | "diamond;": "\u22c4", | ||
| 1595 | "diamondsuit;": "\u2666", | ||
| 1596 | "diams;": "\u2666", | ||
| 1597 | "die;": "\xa8", | ||
| 1598 | "digamma;": "\u03dd", | ||
| 1599 | "disin;": "\u22f2", | ||
| 1600 | "div;": "\xf7", | ||
| 1601 | "divide": "\xf7", | ||
| 1602 | "divide;": "\xf7", | ||
| 1603 | "divideontimes;": "\u22c7", | ||
| 1604 | "divonx;": "\u22c7", | ||
| 1605 | "djcy;": "\u0452", | ||
| 1606 | "dlcorn;": "\u231e", | ||
| 1607 | "dlcrop;": "\u230d", | ||
| 1608 | "dollar;": "$", | ||
| 1609 | "dopf;": "\U0001d555", | ||
| 1610 | "dot;": "\u02d9", | ||
| 1611 | "doteq;": "\u2250", | ||
| 1612 | "doteqdot;": "\u2251", | ||
| 1613 | "dotminus;": "\u2238", | ||
| 1614 | "dotplus;": "\u2214", | ||
| 1615 | "dotsquare;": "\u22a1", | ||
| 1616 | "doublebarwedge;": "\u2306", | ||
| 1617 | "downarrow;": "\u2193", | ||
| 1618 | "downdownarrows;": "\u21ca", | ||
| 1619 | "downharpoonleft;": "\u21c3", | ||
| 1620 | "downharpoonright;": "\u21c2", | ||
| 1621 | "drbkarow;": "\u2910", | ||
| 1622 | "drcorn;": "\u231f", | ||
| 1623 | "drcrop;": "\u230c", | ||
| 1624 | "dscr;": "\U0001d4b9", | ||
| 1625 | "dscy;": "\u0455", | ||
| 1626 | "dsol;": "\u29f6", | ||
| 1627 | "dstrok;": "\u0111", | ||
| 1628 | "dtdot;": "\u22f1", | ||
| 1629 | "dtri;": "\u25bf", | ||
| 1630 | "dtrif;": "\u25be", | ||
| 1631 | "duarr;": "\u21f5", | ||
| 1632 | "duhar;": "\u296f", | ||
| 1633 | "dwangle;": "\u29a6", | ||
| 1634 | "dzcy;": "\u045f", | ||
| 1635 | "dzigrarr;": "\u27ff", | ||
| 1636 | "eDDot;": "\u2a77", | ||
| 1637 | "eDot;": "\u2251", | ||
| 1638 | "eacute": "\xe9", | ||
| 1639 | "eacute;": "\xe9", | ||
| 1640 | "easter;": "\u2a6e", | ||
| 1641 | "ecaron;": "\u011b", | ||
| 1642 | "ecir;": "\u2256", | ||
| 1643 | "ecirc": "\xea", | ||
| 1644 | "ecirc;": "\xea", | ||
| 1645 | "ecolon;": "\u2255", | ||
| 1646 | "ecy;": "\u044d", | ||
| 1647 | "edot;": "\u0117", | ||
| 1648 | "ee;": "\u2147", | ||
| 1649 | "efDot;": "\u2252", | ||
| 1650 | "efr;": "\U0001d522", | ||
| 1651 | "eg;": "\u2a9a", | ||
| 1652 | "egrave": "\xe8", | ||
| 1653 | "egrave;": "\xe8", | ||
| 1654 | "egs;": "\u2a96", | ||
| 1655 | "egsdot;": "\u2a98", | ||
| 1656 | "el;": "\u2a99", | ||
| 1657 | "elinters;": "\u23e7", | ||
| 1658 | "ell;": "\u2113", | ||
| 1659 | "els;": "\u2a95", | ||
| 1660 | "elsdot;": "\u2a97", | ||
| 1661 | "emacr;": "\u0113", | ||
| 1662 | "empty;": "\u2205", | ||
| 1663 | "emptyset;": "\u2205", | ||
| 1664 | "emptyv;": "\u2205", | ||
| 1665 | "emsp13;": "\u2004", | ||
| 1666 | "emsp14;": "\u2005", | ||
| 1667 | "emsp;": "\u2003", | ||
| 1668 | "eng;": "\u014b", | ||
| 1669 | "ensp;": "\u2002", | ||
| 1670 | "eogon;": "\u0119", | ||
| 1671 | "eopf;": "\U0001d556", | ||
| 1672 | "epar;": "\u22d5", | ||
| 1673 | "eparsl;": "\u29e3", | ||
| 1674 | "eplus;": "\u2a71", | ||
| 1675 | "epsi;": "\u03b5", | ||
| 1676 | "epsilon;": "\u03b5", | ||
| 1677 | "epsiv;": "\u03f5", | ||
| 1678 | "eqcirc;": "\u2256", | ||
| 1679 | "eqcolon;": "\u2255", | ||
| 1680 | "eqsim;": "\u2242", | ||
| 1681 | "eqslantgtr;": "\u2a96", | ||
| 1682 | "eqslantless;": "\u2a95", | ||
| 1683 | "equals;": "=", | ||
| 1684 | "equest;": "\u225f", | ||
| 1685 | "equiv;": "\u2261", | ||
| 1686 | "equivDD;": "\u2a78", | ||
| 1687 | "eqvparsl;": "\u29e5", | ||
| 1688 | "erDot;": "\u2253", | ||
| 1689 | "erarr;": "\u2971", | ||
| 1690 | "escr;": "\u212f", | ||
| 1691 | "esdot;": "\u2250", | ||
| 1692 | "esim;": "\u2242", | ||
| 1693 | "eta;": "\u03b7", | ||
| 1694 | "eth": "\xf0", | ||
| 1695 | "eth;": "\xf0", | ||
| 1696 | "euml": "\xeb", | ||
| 1697 | "euml;": "\xeb", | ||
| 1698 | "euro;": "\u20ac", | ||
| 1699 | "excl;": "!", | ||
| 1700 | "exist;": "\u2203", | ||
| 1701 | "expectation;": "\u2130", | ||
| 1702 | "exponentiale;": "\u2147", | ||
| 1703 | "fallingdotseq;": "\u2252", | ||
| 1704 | "fcy;": "\u0444", | ||
| 1705 | "female;": "\u2640", | ||
| 1706 | "ffilig;": "\ufb03", | ||
| 1707 | "fflig;": "\ufb00", | ||
| 1708 | "ffllig;": "\ufb04", | ||
| 1709 | "ffr;": "\U0001d523", | ||
| 1710 | "filig;": "\ufb01", | ||
| 1711 | "fjlig;": "fj", | ||
| 1712 | "flat;": "\u266d", | ||
| 1713 | "fllig;": "\ufb02", | ||
| 1714 | "fltns;": "\u25b1", | ||
| 1715 | "fnof;": "\u0192", | ||
| 1716 | "fopf;": "\U0001d557", | ||
| 1717 | "forall;": "\u2200", | ||
| 1718 | "fork;": "\u22d4", | ||
| 1719 | "forkv;": "\u2ad9", | ||
| 1720 | "fpartint;": "\u2a0d", | ||
| 1721 | "frac12": "\xbd", | ||
| 1722 | "frac12;": "\xbd", | ||
| 1723 | "frac13;": "\u2153", | ||
| 1724 | "frac14": "\xbc", | ||
| 1725 | "frac14;": "\xbc", | ||
| 1726 | "frac15;": "\u2155", | ||
| 1727 | "frac16;": "\u2159", | ||
| 1728 | "frac18;": "\u215b", | ||
| 1729 | "frac23;": "\u2154", | ||
| 1730 | "frac25;": "\u2156", | ||
| 1731 | "frac34": "\xbe", | ||
| 1732 | "frac34;": "\xbe", | ||
| 1733 | "frac35;": "\u2157", | ||
| 1734 | "frac38;": "\u215c", | ||
| 1735 | "frac45;": "\u2158", | ||
| 1736 | "frac56;": "\u215a", | ||
| 1737 | "frac58;": "\u215d", | ||
| 1738 | "frac78;": "\u215e", | ||
| 1739 | "frasl;": "\u2044", | ||
| 1740 | "frown;": "\u2322", | ||
| 1741 | "fscr;": "\U0001d4bb", | ||
| 1742 | "gE;": "\u2267", | ||
| 1743 | "gEl;": "\u2a8c", | ||
| 1744 | "gacute;": "\u01f5", | ||
| 1745 | "gamma;": "\u03b3", | ||
| 1746 | "gammad;": "\u03dd", | ||
| 1747 | "gap;": "\u2a86", | ||
| 1748 | "gbreve;": "\u011f", | ||
| 1749 | "gcirc;": "\u011d", | ||
| 1750 | "gcy;": "\u0433", | ||
| 1751 | "gdot;": "\u0121", | ||
| 1752 | "ge;": "\u2265", | ||
| 1753 | "gel;": "\u22db", | ||
| 1754 | "geq;": "\u2265", | ||
| 1755 | "geqq;": "\u2267", | ||
| 1756 | "geqslant;": "\u2a7e", | ||
| 1757 | "ges;": "\u2a7e", | ||
| 1758 | "gescc;": "\u2aa9", | ||
| 1759 | "gesdot;": "\u2a80", | ||
| 1760 | "gesdoto;": "\u2a82", | ||
| 1761 | "gesdotol;": "\u2a84", | ||
| 1762 | "gesl;": "\u22db\ufe00", | ||
| 1763 | "gesles;": "\u2a94", | ||
| 1764 | "gfr;": "\U0001d524", | ||
| 1765 | "gg;": "\u226b", | ||
| 1766 | "ggg;": "\u22d9", | ||
| 1767 | "gimel;": "\u2137", | ||
| 1768 | "gjcy;": "\u0453", | ||
| 1769 | "gl;": "\u2277", | ||
| 1770 | "glE;": "\u2a92", | ||
| 1771 | "gla;": "\u2aa5", | ||
| 1772 | "glj;": "\u2aa4", | ||
| 1773 | "gnE;": "\u2269", | ||
| 1774 | "gnap;": "\u2a8a", | ||
| 1775 | "gnapprox;": "\u2a8a", | ||
| 1776 | "gne;": "\u2a88", | ||
| 1777 | "gneq;": "\u2a88", | ||
| 1778 | "gneqq;": "\u2269", | ||
| 1779 | "gnsim;": "\u22e7", | ||
| 1780 | "gopf;": "\U0001d558", | ||
| 1781 | "grave;": "`", | ||
| 1782 | "gscr;": "\u210a", | ||
| 1783 | "gsim;": "\u2273", | ||
| 1784 | "gsime;": "\u2a8e", | ||
| 1785 | "gsiml;": "\u2a90", | ||
| 1786 | "gt": ">", | ||
| 1787 | "gt;": ">", | ||
| 1788 | "gtcc;": "\u2aa7", | ||
| 1789 | "gtcir;": "\u2a7a", | ||
| 1790 | "gtdot;": "\u22d7", | ||
| 1791 | "gtlPar;": "\u2995", | ||
| 1792 | "gtquest;": "\u2a7c", | ||
| 1793 | "gtrapprox;": "\u2a86", | ||
| 1794 | "gtrarr;": "\u2978", | ||
| 1795 | "gtrdot;": "\u22d7", | ||
| 1796 | "gtreqless;": "\u22db", | ||
| 1797 | "gtreqqless;": "\u2a8c", | ||
| 1798 | "gtrless;": "\u2277", | ||
| 1799 | "gtrsim;": "\u2273", | ||
| 1800 | "gvertneqq;": "\u2269\ufe00", | ||
| 1801 | "gvnE;": "\u2269\ufe00", | ||
| 1802 | "hArr;": "\u21d4", | ||
| 1803 | "hairsp;": "\u200a", | ||
| 1804 | "half;": "\xbd", | ||
| 1805 | "hamilt;": "\u210b", | ||
| 1806 | "hardcy;": "\u044a", | ||
| 1807 | "harr;": "\u2194", | ||
| 1808 | "harrcir;": "\u2948", | ||
| 1809 | "harrw;": "\u21ad", | ||
| 1810 | "hbar;": "\u210f", | ||
| 1811 | "hcirc;": "\u0125", | ||
| 1812 | "hearts;": "\u2665", | ||
| 1813 | "heartsuit;": "\u2665", | ||
| 1814 | "hellip;": "\u2026", | ||
| 1815 | "hercon;": "\u22b9", | ||
| 1816 | "hfr;": "\U0001d525", | ||
| 1817 | "hksearow;": "\u2925", | ||
| 1818 | "hkswarow;": "\u2926", | ||
| 1819 | "hoarr;": "\u21ff", | ||
| 1820 | "homtht;": "\u223b", | ||
| 1821 | "hookleftarrow;": "\u21a9", | ||
| 1822 | "hookrightarrow;": "\u21aa", | ||
| 1823 | "hopf;": "\U0001d559", | ||
| 1824 | "horbar;": "\u2015", | ||
| 1825 | "hscr;": "\U0001d4bd", | ||
| 1826 | "hslash;": "\u210f", | ||
| 1827 | "hstrok;": "\u0127", | ||
| 1828 | "hybull;": "\u2043", | ||
| 1829 | "hyphen;": "\u2010", | ||
| 1830 | "iacute": "\xed", | ||
| 1831 | "iacute;": "\xed", | ||
| 1832 | "ic;": "\u2063", | ||
| 1833 | "icirc": "\xee", | ||
| 1834 | "icirc;": "\xee", | ||
| 1835 | "icy;": "\u0438", | ||
| 1836 | "iecy;": "\u0435", | ||
| 1837 | "iexcl": "\xa1", | ||
| 1838 | "iexcl;": "\xa1", | ||
| 1839 | "iff;": "\u21d4", | ||
| 1840 | "ifr;": "\U0001d526", | ||
| 1841 | "igrave": "\xec", | ||
| 1842 | "igrave;": "\xec", | ||
| 1843 | "ii;": "\u2148", | ||
| 1844 | "iiiint;": "\u2a0c", | ||
| 1845 | "iiint;": "\u222d", | ||
| 1846 | "iinfin;": "\u29dc", | ||
| 1847 | "iiota;": "\u2129", | ||
| 1848 | "ijlig;": "\u0133", | ||
| 1849 | "imacr;": "\u012b", | ||
| 1850 | "image;": "\u2111", | ||
| 1851 | "imagline;": "\u2110", | ||
| 1852 | "imagpart;": "\u2111", | ||
| 1853 | "imath;": "\u0131", | ||
| 1854 | "imof;": "\u22b7", | ||
| 1855 | "imped;": "\u01b5", | ||
| 1856 | "in;": "\u2208", | ||
| 1857 | "incare;": "\u2105", | ||
| 1858 | "infin;": "\u221e", | ||
| 1859 | "infintie;": "\u29dd", | ||
| 1860 | "inodot;": "\u0131", | ||
| 1861 | "int;": "\u222b", | ||
| 1862 | "intcal;": "\u22ba", | ||
| 1863 | "integers;": "\u2124", | ||
| 1864 | "intercal;": "\u22ba", | ||
| 1865 | "intlarhk;": "\u2a17", | ||
| 1866 | "intprod;": "\u2a3c", | ||
| 1867 | "iocy;": "\u0451", | ||
| 1868 | "iogon;": "\u012f", | ||
| 1869 | "iopf;": "\U0001d55a", | ||
| 1870 | "iota;": "\u03b9", | ||
| 1871 | "iprod;": "\u2a3c", | ||
| 1872 | "iquest": "\xbf", | ||
| 1873 | "iquest;": "\xbf", | ||
| 1874 | "iscr;": "\U0001d4be", | ||
| 1875 | "isin;": "\u2208", | ||
| 1876 | "isinE;": "\u22f9", | ||
| 1877 | "isindot;": "\u22f5", | ||
| 1878 | "isins;": "\u22f4", | ||
| 1879 | "isinsv;": "\u22f3", | ||
| 1880 | "isinv;": "\u2208", | ||
| 1881 | "it;": "\u2062", | ||
| 1882 | "itilde;": "\u0129", | ||
| 1883 | "iukcy;": "\u0456", | ||
| 1884 | "iuml": "\xef", | ||
| 1885 | "iuml;": "\xef", | ||
| 1886 | "jcirc;": "\u0135", | ||
| 1887 | "jcy;": "\u0439", | ||
| 1888 | "jfr;": "\U0001d527", | ||
| 1889 | "jmath;": "\u0237", | ||
| 1890 | "jopf;": "\U0001d55b", | ||
| 1891 | "jscr;": "\U0001d4bf", | ||
| 1892 | "jsercy;": "\u0458", | ||
| 1893 | "jukcy;": "\u0454", | ||
| 1894 | "kappa;": "\u03ba", | ||
| 1895 | "kappav;": "\u03f0", | ||
| 1896 | "kcedil;": "\u0137", | ||
| 1897 | "kcy;": "\u043a", | ||
| 1898 | "kfr;": "\U0001d528", | ||
| 1899 | "kgreen;": "\u0138", | ||
| 1900 | "khcy;": "\u0445", | ||
| 1901 | "kjcy;": "\u045c", | ||
| 1902 | "kopf;": "\U0001d55c", | ||
| 1903 | "kscr;": "\U0001d4c0", | ||
| 1904 | "lAarr;": "\u21da", | ||
| 1905 | "lArr;": "\u21d0", | ||
| 1906 | "lAtail;": "\u291b", | ||
| 1907 | "lBarr;": "\u290e", | ||
| 1908 | "lE;": "\u2266", | ||
| 1909 | "lEg;": "\u2a8b", | ||
| 1910 | "lHar;": "\u2962", | ||
| 1911 | "lacute;": "\u013a", | ||
| 1912 | "laemptyv;": "\u29b4", | ||
| 1913 | "lagran;": "\u2112", | ||
| 1914 | "lambda;": "\u03bb", | ||
| 1915 | "lang;": "\u27e8", | ||
| 1916 | "langd;": "\u2991", | ||
| 1917 | "langle;": "\u27e8", | ||
| 1918 | "lap;": "\u2a85", | ||
| 1919 | "laquo": "\xab", | ||
| 1920 | "laquo;": "\xab", | ||
| 1921 | "larr;": "\u2190", | ||
| 1922 | "larrb;": "\u21e4", | ||
| 1923 | "larrbfs;": "\u291f", | ||
| 1924 | "larrfs;": "\u291d", | ||
| 1925 | "larrhk;": "\u21a9", | ||
| 1926 | "larrlp;": "\u21ab", | ||
| 1927 | "larrpl;": "\u2939", | ||
| 1928 | "larrsim;": "\u2973", | ||
| 1929 | "larrtl;": "\u21a2", | ||
| 1930 | "lat;": "\u2aab", | ||
| 1931 | "latail;": "\u2919", | ||
| 1932 | "late;": "\u2aad", | ||
| 1933 | "lates;": "\u2aad\ufe00", | ||
| 1934 | "lbarr;": "\u290c", | ||
| 1935 | "lbbrk;": "\u2772", | ||
| 1936 | "lbrace;": "{", | ||
| 1937 | "lbrack;": "[", | ||
| 1938 | "lbrke;": "\u298b", | ||
| 1939 | "lbrksld;": "\u298f", | ||
| 1940 | "lbrkslu;": "\u298d", | ||
| 1941 | "lcaron;": "\u013e", | ||
| 1942 | "lcedil;": "\u013c", | ||
| 1943 | "lceil;": "\u2308", | ||
| 1944 | "lcub;": "{", | ||
| 1945 | "lcy;": "\u043b", | ||
| 1946 | "ldca;": "\u2936", | ||
| 1947 | "ldquo;": "\u201c", | ||
| 1948 | "ldquor;": "\u201e", | ||
| 1949 | "ldrdhar;": "\u2967", | ||
| 1950 | "ldrushar;": "\u294b", | ||
| 1951 | "ldsh;": "\u21b2", | ||
| 1952 | "le;": "\u2264", | ||
| 1953 | "leftarrow;": "\u2190", | ||
| 1954 | "leftarrowtail;": "\u21a2", | ||
| 1955 | "leftharpoondown;": "\u21bd", | ||
| 1956 | "leftharpoonup;": "\u21bc", | ||
| 1957 | "leftleftarrows;": "\u21c7", | ||
| 1958 | "leftrightarrow;": "\u2194", | ||
| 1959 | "leftrightarrows;": "\u21c6", | ||
| 1960 | "leftrightharpoons;": "\u21cb", | ||
| 1961 | "leftrightsquigarrow;": "\u21ad", | ||
| 1962 | "leftthreetimes;": "\u22cb", | ||
| 1963 | "leg;": "\u22da", | ||
| 1964 | "leq;": "\u2264", | ||
| 1965 | "leqq;": "\u2266", | ||
| 1966 | "leqslant;": "\u2a7d", | ||
| 1967 | "les;": "\u2a7d", | ||
| 1968 | "lescc;": "\u2aa8", | ||
| 1969 | "lesdot;": "\u2a7f", | ||
| 1970 | "lesdoto;": "\u2a81", | ||
| 1971 | "lesdotor;": "\u2a83", | ||
| 1972 | "lesg;": "\u22da\ufe00", | ||
| 1973 | "lesges;": "\u2a93", | ||
| 1974 | "lessapprox;": "\u2a85", | ||
| 1975 | "lessdot;": "\u22d6", | ||
| 1976 | "lesseqgtr;": "\u22da", | ||
| 1977 | "lesseqqgtr;": "\u2a8b", | ||
| 1978 | "lessgtr;": "\u2276", | ||
| 1979 | "lesssim;": "\u2272", | ||
| 1980 | "lfisht;": "\u297c", | ||
| 1981 | "lfloor;": "\u230a", | ||
| 1982 | "lfr;": "\U0001d529", | ||
| 1983 | "lg;": "\u2276", | ||
| 1984 | "lgE;": "\u2a91", | ||
| 1985 | "lhard;": "\u21bd", | ||
| 1986 | "lharu;": "\u21bc", | ||
| 1987 | "lharul;": "\u296a", | ||
| 1988 | "lhblk;": "\u2584", | ||
| 1989 | "ljcy;": "\u0459", | ||
| 1990 | "ll;": "\u226a", | ||
| 1991 | "llarr;": "\u21c7", | ||
| 1992 | "llcorner;": "\u231e", | ||
| 1993 | "llhard;": "\u296b", | ||
| 1994 | "lltri;": "\u25fa", | ||
| 1995 | "lmidot;": "\u0140", | ||
| 1996 | "lmoust;": "\u23b0", | ||
| 1997 | "lmoustache;": "\u23b0", | ||
| 1998 | "lnE;": "\u2268", | ||
| 1999 | "lnap;": "\u2a89", | ||
| 2000 | "lnapprox;": "\u2a89", | ||
| 2001 | "lne;": "\u2a87", | ||
| 2002 | "lneq;": "\u2a87", | ||
| 2003 | "lneqq;": "\u2268", | ||
| 2004 | "lnsim;": "\u22e6", | ||
| 2005 | "loang;": "\u27ec", | ||
| 2006 | "loarr;": "\u21fd", | ||
| 2007 | "lobrk;": "\u27e6", | ||
| 2008 | "longleftarrow;": "\u27f5", | ||
| 2009 | "longleftrightarrow;": "\u27f7", | ||
| 2010 | "longmapsto;": "\u27fc", | ||
| 2011 | "longrightarrow;": "\u27f6", | ||
| 2012 | "looparrowleft;": "\u21ab", | ||
| 2013 | "looparrowright;": "\u21ac", | ||
| 2014 | "lopar;": "\u2985", | ||
| 2015 | "lopf;": "\U0001d55d", | ||
| 2016 | "loplus;": "\u2a2d", | ||
| 2017 | "lotimes;": "\u2a34", | ||
| 2018 | "lowast;": "\u2217", | ||
| 2019 | "lowbar;": "_", | ||
| 2020 | "loz;": "\u25ca", | ||
| 2021 | "lozenge;": "\u25ca", | ||
| 2022 | "lozf;": "\u29eb", | ||
| 2023 | "lpar;": "(", | ||
| 2024 | "lparlt;": "\u2993", | ||
| 2025 | "lrarr;": "\u21c6", | ||
| 2026 | "lrcorner;": "\u231f", | ||
| 2027 | "lrhar;": "\u21cb", | ||
| 2028 | "lrhard;": "\u296d", | ||
| 2029 | "lrm;": "\u200e", | ||
| 2030 | "lrtri;": "\u22bf", | ||
| 2031 | "lsaquo;": "\u2039", | ||
| 2032 | "lscr;": "\U0001d4c1", | ||
| 2033 | "lsh;": "\u21b0", | ||
| 2034 | "lsim;": "\u2272", | ||
| 2035 | "lsime;": "\u2a8d", | ||
| 2036 | "lsimg;": "\u2a8f", | ||
| 2037 | "lsqb;": "[", | ||
| 2038 | "lsquo;": "\u2018", | ||
| 2039 | "lsquor;": "\u201a", | ||
| 2040 | "lstrok;": "\u0142", | ||
| 2041 | "lt": "<", | ||
| 2042 | "lt;": "<", | ||
| 2043 | "ltcc;": "\u2aa6", | ||
| 2044 | "ltcir;": "\u2a79", | ||
| 2045 | "ltdot;": "\u22d6", | ||
| 2046 | "lthree;": "\u22cb", | ||
| 2047 | "ltimes;": "\u22c9", | ||
| 2048 | "ltlarr;": "\u2976", | ||
| 2049 | "ltquest;": "\u2a7b", | ||
| 2050 | "ltrPar;": "\u2996", | ||
| 2051 | "ltri;": "\u25c3", | ||
| 2052 | "ltrie;": "\u22b4", | ||
| 2053 | "ltrif;": "\u25c2", | ||
| 2054 | "lurdshar;": "\u294a", | ||
| 2055 | "luruhar;": "\u2966", | ||
| 2056 | "lvertneqq;": "\u2268\ufe00", | ||
| 2057 | "lvnE;": "\u2268\ufe00", | ||
| 2058 | "mDDot;": "\u223a", | ||
| 2059 | "macr": "\xaf", | ||
| 2060 | "macr;": "\xaf", | ||
| 2061 | "male;": "\u2642", | ||
| 2062 | "malt;": "\u2720", | ||
| 2063 | "maltese;": "\u2720", | ||
| 2064 | "map;": "\u21a6", | ||
| 2065 | "mapsto;": "\u21a6", | ||
| 2066 | "mapstodown;": "\u21a7", | ||
| 2067 | "mapstoleft;": "\u21a4", | ||
| 2068 | "mapstoup;": "\u21a5", | ||
| 2069 | "marker;": "\u25ae", | ||
| 2070 | "mcomma;": "\u2a29", | ||
| 2071 | "mcy;": "\u043c", | ||
| 2072 | "mdash;": "\u2014", | ||
| 2073 | "measuredangle;": "\u2221", | ||
| 2074 | "mfr;": "\U0001d52a", | ||
| 2075 | "mho;": "\u2127", | ||
| 2076 | "micro": "\xb5", | ||
| 2077 | "micro;": "\xb5", | ||
| 2078 | "mid;": "\u2223", | ||
| 2079 | "midast;": "*", | ||
| 2080 | "midcir;": "\u2af0", | ||
| 2081 | "middot": "\xb7", | ||
| 2082 | "middot;": "\xb7", | ||
| 2083 | "minus;": "\u2212", | ||
| 2084 | "minusb;": "\u229f", | ||
| 2085 | "minusd;": "\u2238", | ||
| 2086 | "minusdu;": "\u2a2a", | ||
| 2087 | "mlcp;": "\u2adb", | ||
| 2088 | "mldr;": "\u2026", | ||
| 2089 | "mnplus;": "\u2213", | ||
| 2090 | "models;": "\u22a7", | ||
| 2091 | "mopf;": "\U0001d55e", | ||
| 2092 | "mp;": "\u2213", | ||
| 2093 | "mscr;": "\U0001d4c2", | ||
| 2094 | "mstpos;": "\u223e", | ||
| 2095 | "mu;": "\u03bc", | ||
| 2096 | "multimap;": "\u22b8", | ||
| 2097 | "mumap;": "\u22b8", | ||
| 2098 | "nGg;": "\u22d9\u0338", | ||
| 2099 | "nGt;": "\u226b\u20d2", | ||
| 2100 | "nGtv;": "\u226b\u0338", | ||
| 2101 | "nLeftarrow;": "\u21cd", | ||
| 2102 | "nLeftrightarrow;": "\u21ce", | ||
| 2103 | "nLl;": "\u22d8\u0338", | ||
| 2104 | "nLt;": "\u226a\u20d2", | ||
| 2105 | "nLtv;": "\u226a\u0338", | ||
| 2106 | "nRightarrow;": "\u21cf", | ||
| 2107 | "nVDash;": "\u22af", | ||
| 2108 | "nVdash;": "\u22ae", | ||
| 2109 | "nabla;": "\u2207", | ||
| 2110 | "nacute;": "\u0144", | ||
| 2111 | "nang;": "\u2220\u20d2", | ||
| 2112 | "nap;": "\u2249", | ||
| 2113 | "napE;": "\u2a70\u0338", | ||
| 2114 | "napid;": "\u224b\u0338", | ||
| 2115 | "napos;": "\u0149", | ||
| 2116 | "napprox;": "\u2249", | ||
| 2117 | "natur;": "\u266e", | ||
| 2118 | "natural;": "\u266e", | ||
| 2119 | "naturals;": "\u2115", | ||
| 2120 | "nbsp": "\xa0", | ||
| 2121 | "nbsp;": "\xa0", | ||
| 2122 | "nbump;": "\u224e\u0338", | ||
| 2123 | "nbumpe;": "\u224f\u0338", | ||
| 2124 | "ncap;": "\u2a43", | ||
| 2125 | "ncaron;": "\u0148", | ||
| 2126 | "ncedil;": "\u0146", | ||
| 2127 | "ncong;": "\u2247", | ||
| 2128 | "ncongdot;": "\u2a6d\u0338", | ||
| 2129 | "ncup;": "\u2a42", | ||
| 2130 | "ncy;": "\u043d", | ||
| 2131 | "ndash;": "\u2013", | ||
| 2132 | "ne;": "\u2260", | ||
| 2133 | "neArr;": "\u21d7", | ||
| 2134 | "nearhk;": "\u2924", | ||
| 2135 | "nearr;": "\u2197", | ||
| 2136 | "nearrow;": "\u2197", | ||
| 2137 | "nedot;": "\u2250\u0338", | ||
| 2138 | "nequiv;": "\u2262", | ||
| 2139 | "nesear;": "\u2928", | ||
| 2140 | "nesim;": "\u2242\u0338", | ||
| 2141 | "nexist;": "\u2204", | ||
| 2142 | "nexists;": "\u2204", | ||
| 2143 | "nfr;": "\U0001d52b", | ||
| 2144 | "ngE;": "\u2267\u0338", | ||
| 2145 | "nge;": "\u2271", | ||
| 2146 | "ngeq;": "\u2271", | ||
| 2147 | "ngeqq;": "\u2267\u0338", | ||
| 2148 | "ngeqslant;": "\u2a7e\u0338", | ||
| 2149 | "nges;": "\u2a7e\u0338", | ||
| 2150 | "ngsim;": "\u2275", | ||
| 2151 | "ngt;": "\u226f", | ||
| 2152 | "ngtr;": "\u226f", | ||
| 2153 | "nhArr;": "\u21ce", | ||
| 2154 | "nharr;": "\u21ae", | ||
| 2155 | "nhpar;": "\u2af2", | ||
| 2156 | "ni;": "\u220b", | ||
| 2157 | "nis;": "\u22fc", | ||
| 2158 | "nisd;": "\u22fa", | ||
| 2159 | "niv;": "\u220b", | ||
| 2160 | "njcy;": "\u045a", | ||
| 2161 | "nlArr;": "\u21cd", | ||
| 2162 | "nlE;": "\u2266\u0338", | ||
| 2163 | "nlarr;": "\u219a", | ||
| 2164 | "nldr;": "\u2025", | ||
| 2165 | "nle;": "\u2270", | ||
| 2166 | "nleftarrow;": "\u219a", | ||
| 2167 | "nleftrightarrow;": "\u21ae", | ||
| 2168 | "nleq;": "\u2270", | ||
| 2169 | "nleqq;": "\u2266\u0338", | ||
| 2170 | "nleqslant;": "\u2a7d\u0338", | ||
| 2171 | "nles;": "\u2a7d\u0338", | ||
| 2172 | "nless;": "\u226e", | ||
| 2173 | "nlsim;": "\u2274", | ||
| 2174 | "nlt;": "\u226e", | ||
| 2175 | "nltri;": "\u22ea", | ||
| 2176 | "nltrie;": "\u22ec", | ||
| 2177 | "nmid;": "\u2224", | ||
| 2178 | "nopf;": "\U0001d55f", | ||
| 2179 | "not": "\xac", | ||
| 2180 | "not;": "\xac", | ||
| 2181 | "notin;": "\u2209", | ||
| 2182 | "notinE;": "\u22f9\u0338", | ||
| 2183 | "notindot;": "\u22f5\u0338", | ||
| 2184 | "notinva;": "\u2209", | ||
| 2185 | "notinvb;": "\u22f7", | ||
| 2186 | "notinvc;": "\u22f6", | ||
| 2187 | "notni;": "\u220c", | ||
| 2188 | "notniva;": "\u220c", | ||
| 2189 | "notnivb;": "\u22fe", | ||
| 2190 | "notnivc;": "\u22fd", | ||
| 2191 | "npar;": "\u2226", | ||
| 2192 | "nparallel;": "\u2226", | ||
| 2193 | "nparsl;": "\u2afd\u20e5", | ||
| 2194 | "npart;": "\u2202\u0338", | ||
| 2195 | "npolint;": "\u2a14", | ||
| 2196 | "npr;": "\u2280", | ||
| 2197 | "nprcue;": "\u22e0", | ||
| 2198 | "npre;": "\u2aaf\u0338", | ||
| 2199 | "nprec;": "\u2280", | ||
| 2200 | "npreceq;": "\u2aaf\u0338", | ||
| 2201 | "nrArr;": "\u21cf", | ||
| 2202 | "nrarr;": "\u219b", | ||
| 2203 | "nrarrc;": "\u2933\u0338", | ||
| 2204 | "nrarrw;": "\u219d\u0338", | ||
| 2205 | "nrightarrow;": "\u219b", | ||
| 2206 | "nrtri;": "\u22eb", | ||
| 2207 | "nrtrie;": "\u22ed", | ||
| 2208 | "nsc;": "\u2281", | ||
| 2209 | "nsccue;": "\u22e1", | ||
| 2210 | "nsce;": "\u2ab0\u0338", | ||
| 2211 | "nscr;": "\U0001d4c3", | ||
| 2212 | "nshortmid;": "\u2224", | ||
| 2213 | "nshortparallel;": "\u2226", | ||
| 2214 | "nsim;": "\u2241", | ||
| 2215 | "nsime;": "\u2244", | ||
| 2216 | "nsimeq;": "\u2244", | ||
| 2217 | "nsmid;": "\u2224", | ||
| 2218 | "nspar;": "\u2226", | ||
| 2219 | "nsqsube;": "\u22e2", | ||
| 2220 | "nsqsupe;": "\u22e3", | ||
| 2221 | "nsub;": "\u2284", | ||
| 2222 | "nsubE;": "\u2ac5\u0338", | ||
| 2223 | "nsube;": "\u2288", | ||
| 2224 | "nsubset;": "\u2282\u20d2", | ||
| 2225 | "nsubseteq;": "\u2288", | ||
| 2226 | "nsubseteqq;": "\u2ac5\u0338", | ||
| 2227 | "nsucc;": "\u2281", | ||
| 2228 | "nsucceq;": "\u2ab0\u0338", | ||
| 2229 | "nsup;": "\u2285", | ||
| 2230 | "nsupE;": "\u2ac6\u0338", | ||
| 2231 | "nsupe;": "\u2289", | ||
| 2232 | "nsupset;": "\u2283\u20d2", | ||
| 2233 | "nsupseteq;": "\u2289", | ||
| 2234 | "nsupseteqq;": "\u2ac6\u0338", | ||
| 2235 | "ntgl;": "\u2279", | ||
| 2236 | "ntilde": "\xf1", | ||
| 2237 | "ntilde;": "\xf1", | ||
| 2238 | "ntlg;": "\u2278", | ||
| 2239 | "ntriangleleft;": "\u22ea", | ||
| 2240 | "ntrianglelefteq;": "\u22ec", | ||
| 2241 | "ntriangleright;": "\u22eb", | ||
| 2242 | "ntrianglerighteq;": "\u22ed", | ||
| 2243 | "nu;": "\u03bd", | ||
| 2244 | "num;": "#", | ||
| 2245 | "numero;": "\u2116", | ||
| 2246 | "numsp;": "\u2007", | ||
| 2247 | "nvDash;": "\u22ad", | ||
| 2248 | "nvHarr;": "\u2904", | ||
| 2249 | "nvap;": "\u224d\u20d2", | ||
| 2250 | "nvdash;": "\u22ac", | ||
| 2251 | "nvge;": "\u2265\u20d2", | ||
| 2252 | "nvgt;": ">\u20d2", | ||
| 2253 | "nvinfin;": "\u29de", | ||
| 2254 | "nvlArr;": "\u2902", | ||
| 2255 | "nvle;": "\u2264\u20d2", | ||
| 2256 | "nvlt;": "<\u20d2", | ||
| 2257 | "nvltrie;": "\u22b4\u20d2", | ||
| 2258 | "nvrArr;": "\u2903", | ||
| 2259 | "nvrtrie;": "\u22b5\u20d2", | ||
| 2260 | "nvsim;": "\u223c\u20d2", | ||
| 2261 | "nwArr;": "\u21d6", | ||
| 2262 | "nwarhk;": "\u2923", | ||
| 2263 | "nwarr;": "\u2196", | ||
| 2264 | "nwarrow;": "\u2196", | ||
| 2265 | "nwnear;": "\u2927", | ||
| 2266 | "oS;": "\u24c8", | ||
| 2267 | "oacute": "\xf3", | ||
| 2268 | "oacute;": "\xf3", | ||
| 2269 | "oast;": "\u229b", | ||
| 2270 | "ocir;": "\u229a", | ||
| 2271 | "ocirc": "\xf4", | ||
| 2272 | "ocirc;": "\xf4", | ||
| 2273 | "ocy;": "\u043e", | ||
| 2274 | "odash;": "\u229d", | ||
| 2275 | "odblac;": "\u0151", | ||
| 2276 | "odiv;": "\u2a38", | ||
| 2277 | "odot;": "\u2299", | ||
| 2278 | "odsold;": "\u29bc", | ||
| 2279 | "oelig;": "\u0153", | ||
| 2280 | "ofcir;": "\u29bf", | ||
| 2281 | "ofr;": "\U0001d52c", | ||
| 2282 | "ogon;": "\u02db", | ||
| 2283 | "ograve": "\xf2", | ||
| 2284 | "ograve;": "\xf2", | ||
| 2285 | "ogt;": "\u29c1", | ||
| 2286 | "ohbar;": "\u29b5", | ||
| 2287 | "ohm;": "\u03a9", | ||
| 2288 | "oint;": "\u222e", | ||
| 2289 | "olarr;": "\u21ba", | ||
| 2290 | "olcir;": "\u29be", | ||
| 2291 | "olcross;": "\u29bb", | ||
| 2292 | "oline;": "\u203e", | ||
| 2293 | "olt;": "\u29c0", | ||
| 2294 | "omacr;": "\u014d", | ||
| 2295 | "omega;": "\u03c9", | ||
| 2296 | "omicron;": "\u03bf", | ||
| 2297 | "omid;": "\u29b6", | ||
| 2298 | "ominus;": "\u2296", | ||
| 2299 | "oopf;": "\U0001d560", | ||
| 2300 | "opar;": "\u29b7", | ||
| 2301 | "operp;": "\u29b9", | ||
| 2302 | "oplus;": "\u2295", | ||
| 2303 | "or;": "\u2228", | ||
| 2304 | "orarr;": "\u21bb", | ||
| 2305 | "ord;": "\u2a5d", | ||
| 2306 | "order;": "\u2134", | ||
| 2307 | "orderof;": "\u2134", | ||
| 2308 | "ordf": "\xaa", | ||
| 2309 | "ordf;": "\xaa", | ||
| 2310 | "ordm": "\xba", | ||
| 2311 | "ordm;": "\xba", | ||
| 2312 | "origof;": "\u22b6", | ||
| 2313 | "oror;": "\u2a56", | ||
| 2314 | "orslope;": "\u2a57", | ||
| 2315 | "orv;": "\u2a5b", | ||
| 2316 | "oscr;": "\u2134", | ||
| 2317 | "oslash": "\xf8", | ||
| 2318 | "oslash;": "\xf8", | ||
| 2319 | "osol;": "\u2298", | ||
| 2320 | "otilde": "\xf5", | ||
| 2321 | "otilde;": "\xf5", | ||
| 2322 | "otimes;": "\u2297", | ||
| 2323 | "otimesas;": "\u2a36", | ||
| 2324 | "ouml": "\xf6", | ||
| 2325 | "ouml;": "\xf6", | ||
| 2326 | "ovbar;": "\u233d", | ||
| 2327 | "par;": "\u2225", | ||
| 2328 | "para": "\xb6", | ||
| 2329 | "para;": "\xb6", | ||
| 2330 | "parallel;": "\u2225", | ||
| 2331 | "parsim;": "\u2af3", | ||
| 2332 | "parsl;": "\u2afd", | ||
| 2333 | "part;": "\u2202", | ||
| 2334 | "pcy;": "\u043f", | ||
| 2335 | "percnt;": "%", | ||
| 2336 | "period;": ".", | ||
| 2337 | "permil;": "\u2030", | ||
| 2338 | "perp;": "\u22a5", | ||
| 2339 | "pertenk;": "\u2031", | ||
| 2340 | "pfr;": "\U0001d52d", | ||
| 2341 | "phi;": "\u03c6", | ||
| 2342 | "phiv;": "\u03d5", | ||
| 2343 | "phmmat;": "\u2133", | ||
| 2344 | "phone;": "\u260e", | ||
| 2345 | "pi;": "\u03c0", | ||
| 2346 | "pitchfork;": "\u22d4", | ||
| 2347 | "piv;": "\u03d6", | ||
| 2348 | "planck;": "\u210f", | ||
| 2349 | "planckh;": "\u210e", | ||
| 2350 | "plankv;": "\u210f", | ||
| 2351 | "plus;": "+", | ||
| 2352 | "plusacir;": "\u2a23", | ||
| 2353 | "plusb;": "\u229e", | ||
| 2354 | "pluscir;": "\u2a22", | ||
| 2355 | "plusdo;": "\u2214", | ||
| 2356 | "plusdu;": "\u2a25", | ||
| 2357 | "pluse;": "\u2a72", | ||
| 2358 | "plusmn": "\xb1", | ||
| 2359 | "plusmn;": "\xb1", | ||
| 2360 | "plussim;": "\u2a26", | ||
| 2361 | "plustwo;": "\u2a27", | ||
| 2362 | "pm;": "\xb1", | ||
| 2363 | "pointint;": "\u2a15", | ||
| 2364 | "popf;": "\U0001d561", | ||
| 2365 | "pound": "\xa3", | ||
| 2366 | "pound;": "\xa3", | ||
| 2367 | "pr;": "\u227a", | ||
| 2368 | "prE;": "\u2ab3", | ||
| 2369 | "prap;": "\u2ab7", | ||
| 2370 | "prcue;": "\u227c", | ||
| 2371 | "pre;": "\u2aaf", | ||
| 2372 | "prec;": "\u227a", | ||
| 2373 | "precapprox;": "\u2ab7", | ||
| 2374 | "preccurlyeq;": "\u227c", | ||
| 2375 | "preceq;": "\u2aaf", | ||
| 2376 | "precnapprox;": "\u2ab9", | ||
| 2377 | "precneqq;": "\u2ab5", | ||
| 2378 | "precnsim;": "\u22e8", | ||
| 2379 | "precsim;": "\u227e", | ||
| 2380 | "prime;": "\u2032", | ||
| 2381 | "primes;": "\u2119", | ||
| 2382 | "prnE;": "\u2ab5", | ||
| 2383 | "prnap;": "\u2ab9", | ||
| 2384 | "prnsim;": "\u22e8", | ||
| 2385 | "prod;": "\u220f", | ||
| 2386 | "profalar;": "\u232e", | ||
| 2387 | "profline;": "\u2312", | ||
| 2388 | "profsurf;": "\u2313", | ||
| 2389 | "prop;": "\u221d", | ||
| 2390 | "propto;": "\u221d", | ||
| 2391 | "prsim;": "\u227e", | ||
| 2392 | "prurel;": "\u22b0", | ||
| 2393 | "pscr;": "\U0001d4c5", | ||
| 2394 | "psi;": "\u03c8", | ||
| 2395 | "puncsp;": "\u2008", | ||
| 2396 | "qfr;": "\U0001d52e", | ||
| 2397 | "qint;": "\u2a0c", | ||
| 2398 | "qopf;": "\U0001d562", | ||
| 2399 | "qprime;": "\u2057", | ||
| 2400 | "qscr;": "\U0001d4c6", | ||
| 2401 | "quaternions;": "\u210d", | ||
| 2402 | "quatint;": "\u2a16", | ||
| 2403 | "quest;": "?", | ||
| 2404 | "questeq;": "\u225f", | ||
| 2405 | "quot": "\"", | ||
| 2406 | "quot;": "\"", | ||
| 2407 | "rAarr;": "\u21db", | ||
| 2408 | "rArr;": "\u21d2", | ||
| 2409 | "rAtail;": "\u291c", | ||
| 2410 | "rBarr;": "\u290f", | ||
| 2411 | "rHar;": "\u2964", | ||
| 2412 | "race;": "\u223d\u0331", | ||
| 2413 | "racute;": "\u0155", | ||
| 2414 | "radic;": "\u221a", | ||
| 2415 | "raemptyv;": "\u29b3", | ||
| 2416 | "rang;": "\u27e9", | ||
| 2417 | "rangd;": "\u2992", | ||
| 2418 | "range;": "\u29a5", | ||
| 2419 | "rangle;": "\u27e9", | ||
| 2420 | "raquo": "\xbb", | ||
| 2421 | "raquo;": "\xbb", | ||
| 2422 | "rarr;": "\u2192", | ||
| 2423 | "rarrap;": "\u2975", | ||
| 2424 | "rarrb;": "\u21e5", | ||
| 2425 | "rarrbfs;": "\u2920", | ||
| 2426 | "rarrc;": "\u2933", | ||
| 2427 | "rarrfs;": "\u291e", | ||
| 2428 | "rarrhk;": "\u21aa", | ||
| 2429 | "rarrlp;": "\u21ac", | ||
| 2430 | "rarrpl;": "\u2945", | ||
| 2431 | "rarrsim;": "\u2974", | ||
| 2432 | "rarrtl;": "\u21a3", | ||
| 2433 | "rarrw;": "\u219d", | ||
| 2434 | "ratail;": "\u291a", | ||
| 2435 | "ratio;": "\u2236", | ||
| 2436 | "rationals;": "\u211a", | ||
| 2437 | "rbarr;": "\u290d", | ||
| 2438 | "rbbrk;": "\u2773", | ||
| 2439 | "rbrace;": "}", | ||
| 2440 | "rbrack;": "]", | ||
| 2441 | "rbrke;": "\u298c", | ||
| 2442 | "rbrksld;": "\u298e", | ||
| 2443 | "rbrkslu;": "\u2990", | ||
| 2444 | "rcaron;": "\u0159", | ||
| 2445 | "rcedil;": "\u0157", | ||
| 2446 | "rceil;": "\u2309", | ||
| 2447 | "rcub;": "}", | ||
| 2448 | "rcy;": "\u0440", | ||
| 2449 | "rdca;": "\u2937", | ||
| 2450 | "rdldhar;": "\u2969", | ||
| 2451 | "rdquo;": "\u201d", | ||
| 2452 | "rdquor;": "\u201d", | ||
| 2453 | "rdsh;": "\u21b3", | ||
| 2454 | "real;": "\u211c", | ||
| 2455 | "realine;": "\u211b", | ||
| 2456 | "realpart;": "\u211c", | ||
| 2457 | "reals;": "\u211d", | ||
| 2458 | "rect;": "\u25ad", | ||
| 2459 | "reg": "\xae", | ||
| 2460 | "reg;": "\xae", | ||
| 2461 | "rfisht;": "\u297d", | ||
| 2462 | "rfloor;": "\u230b", | ||
| 2463 | "rfr;": "\U0001d52f", | ||
| 2464 | "rhard;": "\u21c1", | ||
| 2465 | "rharu;": "\u21c0", | ||
| 2466 | "rharul;": "\u296c", | ||
| 2467 | "rho;": "\u03c1", | ||
| 2468 | "rhov;": "\u03f1", | ||
| 2469 | "rightarrow;": "\u2192", | ||
| 2470 | "rightarrowtail;": "\u21a3", | ||
| 2471 | "rightharpoondown;": "\u21c1", | ||
| 2472 | "rightharpoonup;": "\u21c0", | ||
| 2473 | "rightleftarrows;": "\u21c4", | ||
| 2474 | "rightleftharpoons;": "\u21cc", | ||
| 2475 | "rightrightarrows;": "\u21c9", | ||
| 2476 | "rightsquigarrow;": "\u219d", | ||
| 2477 | "rightthreetimes;": "\u22cc", | ||
| 2478 | "ring;": "\u02da", | ||
| 2479 | "risingdotseq;": "\u2253", | ||
| 2480 | "rlarr;": "\u21c4", | ||
| 2481 | "rlhar;": "\u21cc", | ||
| 2482 | "rlm;": "\u200f", | ||
| 2483 | "rmoust;": "\u23b1", | ||
| 2484 | "rmoustache;": "\u23b1", | ||
| 2485 | "rnmid;": "\u2aee", | ||
| 2486 | "roang;": "\u27ed", | ||
| 2487 | "roarr;": "\u21fe", | ||
| 2488 | "robrk;": "\u27e7", | ||
| 2489 | "ropar;": "\u2986", | ||
| 2490 | "ropf;": "\U0001d563", | ||
| 2491 | "roplus;": "\u2a2e", | ||
| 2492 | "rotimes;": "\u2a35", | ||
| 2493 | "rpar;": ")", | ||
| 2494 | "rpargt;": "\u2994", | ||
| 2495 | "rppolint;": "\u2a12", | ||
| 2496 | "rrarr;": "\u21c9", | ||
| 2497 | "rsaquo;": "\u203a", | ||
| 2498 | "rscr;": "\U0001d4c7", | ||
| 2499 | "rsh;": "\u21b1", | ||
| 2500 | "rsqb;": "]", | ||
| 2501 | "rsquo;": "\u2019", | ||
| 2502 | "rsquor;": "\u2019", | ||
| 2503 | "rthree;": "\u22cc", | ||
| 2504 | "rtimes;": "\u22ca", | ||
| 2505 | "rtri;": "\u25b9", | ||
| 2506 | "rtrie;": "\u22b5", | ||
| 2507 | "rtrif;": "\u25b8", | ||
| 2508 | "rtriltri;": "\u29ce", | ||
| 2509 | "ruluhar;": "\u2968", | ||
| 2510 | "rx;": "\u211e", | ||
| 2511 | "sacute;": "\u015b", | ||
| 2512 | "sbquo;": "\u201a", | ||
| 2513 | "sc;": "\u227b", | ||
| 2514 | "scE;": "\u2ab4", | ||
| 2515 | "scap;": "\u2ab8", | ||
| 2516 | "scaron;": "\u0161", | ||
| 2517 | "sccue;": "\u227d", | ||
| 2518 | "sce;": "\u2ab0", | ||
| 2519 | "scedil;": "\u015f", | ||
| 2520 | "scirc;": "\u015d", | ||
| 2521 | "scnE;": "\u2ab6", | ||
| 2522 | "scnap;": "\u2aba", | ||
| 2523 | "scnsim;": "\u22e9", | ||
| 2524 | "scpolint;": "\u2a13", | ||
| 2525 | "scsim;": "\u227f", | ||
| 2526 | "scy;": "\u0441", | ||
| 2527 | "sdot;": "\u22c5", | ||
| 2528 | "sdotb;": "\u22a1", | ||
| 2529 | "sdote;": "\u2a66", | ||
| 2530 | "seArr;": "\u21d8", | ||
| 2531 | "searhk;": "\u2925", | ||
| 2532 | "searr;": "\u2198", | ||
| 2533 | "searrow;": "\u2198", | ||
| 2534 | "sect": "\xa7", | ||
| 2535 | "sect;": "\xa7", | ||
| 2536 | "semi;": ";", | ||
| 2537 | "seswar;": "\u2929", | ||
| 2538 | "setminus;": "\u2216", | ||
| 2539 | "setmn;": "\u2216", | ||
| 2540 | "sext;": "\u2736", | ||
| 2541 | "sfr;": "\U0001d530", | ||
| 2542 | "sfrown;": "\u2322", | ||
| 2543 | "sharp;": "\u266f", | ||
| 2544 | "shchcy;": "\u0449", | ||
| 2545 | "shcy;": "\u0448", | ||
| 2546 | "shortmid;": "\u2223", | ||
| 2547 | "shortparallel;": "\u2225", | ||
| 2548 | "shy": "\xad", | ||
| 2549 | "shy;": "\xad", | ||
| 2550 | "sigma;": "\u03c3", | ||
| 2551 | "sigmaf;": "\u03c2", | ||
| 2552 | "sigmav;": "\u03c2", | ||
| 2553 | "sim;": "\u223c", | ||
| 2554 | "simdot;": "\u2a6a", | ||
| 2555 | "sime;": "\u2243", | ||
| 2556 | "simeq;": "\u2243", | ||
| 2557 | "simg;": "\u2a9e", | ||
| 2558 | "simgE;": "\u2aa0", | ||
| 2559 | "siml;": "\u2a9d", | ||
| 2560 | "simlE;": "\u2a9f", | ||
| 2561 | "simne;": "\u2246", | ||
| 2562 | "simplus;": "\u2a24", | ||
| 2563 | "simrarr;": "\u2972", | ||
| 2564 | "slarr;": "\u2190", | ||
| 2565 | "smallsetminus;": "\u2216", | ||
| 2566 | "smashp;": "\u2a33", | ||
| 2567 | "smeparsl;": "\u29e4", | ||
| 2568 | "smid;": "\u2223", | ||
| 2569 | "smile;": "\u2323", | ||
| 2570 | "smt;": "\u2aaa", | ||
| 2571 | "smte;": "\u2aac", | ||
| 2572 | "smtes;": "\u2aac\ufe00", | ||
| 2573 | "softcy;": "\u044c", | ||
| 2574 | "sol;": "/", | ||
| 2575 | "solb;": "\u29c4", | ||
| 2576 | "solbar;": "\u233f", | ||
| 2577 | "sopf;": "\U0001d564", | ||
| 2578 | "spades;": "\u2660", | ||
| 2579 | "spadesuit;": "\u2660", | ||
| 2580 | "spar;": "\u2225", | ||
| 2581 | "sqcap;": "\u2293", | ||
| 2582 | "sqcaps;": "\u2293\ufe00", | ||
| 2583 | "sqcup;": "\u2294", | ||
| 2584 | "sqcups;": "\u2294\ufe00", | ||
| 2585 | "sqsub;": "\u228f", | ||
| 2586 | "sqsube;": "\u2291", | ||
| 2587 | "sqsubset;": "\u228f", | ||
| 2588 | "sqsubseteq;": "\u2291", | ||
| 2589 | "sqsup;": "\u2290", | ||
| 2590 | "sqsupe;": "\u2292", | ||
| 2591 | "sqsupset;": "\u2290", | ||
| 2592 | "sqsupseteq;": "\u2292", | ||
| 2593 | "squ;": "\u25a1", | ||
| 2594 | "square;": "\u25a1", | ||
| 2595 | "squarf;": "\u25aa", | ||
| 2596 | "squf;": "\u25aa", | ||
| 2597 | "srarr;": "\u2192", | ||
| 2598 | "sscr;": "\U0001d4c8", | ||
| 2599 | "ssetmn;": "\u2216", | ||
| 2600 | "ssmile;": "\u2323", | ||
| 2601 | "sstarf;": "\u22c6", | ||
| 2602 | "star;": "\u2606", | ||
| 2603 | "starf;": "\u2605", | ||
| 2604 | "straightepsilon;": "\u03f5", | ||
| 2605 | "straightphi;": "\u03d5", | ||
| 2606 | "strns;": "\xaf", | ||
| 2607 | "sub;": "\u2282", | ||
| 2608 | "subE;": "\u2ac5", | ||
| 2609 | "subdot;": "\u2abd", | ||
| 2610 | "sube;": "\u2286", | ||
| 2611 | "subedot;": "\u2ac3", | ||
| 2612 | "submult;": "\u2ac1", | ||
| 2613 | "subnE;": "\u2acb", | ||
| 2614 | "subne;": "\u228a", | ||
| 2615 | "subplus;": "\u2abf", | ||
| 2616 | "subrarr;": "\u2979", | ||
| 2617 | "subset;": "\u2282", | ||
| 2618 | "subseteq;": "\u2286", | ||
| 2619 | "subseteqq;": "\u2ac5", | ||
| 2620 | "subsetneq;": "\u228a", | ||
| 2621 | "subsetneqq;": "\u2acb", | ||
| 2622 | "subsim;": "\u2ac7", | ||
| 2623 | "subsub;": "\u2ad5", | ||
| 2624 | "subsup;": "\u2ad3", | ||
| 2625 | "succ;": "\u227b", | ||
| 2626 | "succapprox;": "\u2ab8", | ||
| 2627 | "succcurlyeq;": "\u227d", | ||
| 2628 | "succeq;": "\u2ab0", | ||
| 2629 | "succnapprox;": "\u2aba", | ||
| 2630 | "succneqq;": "\u2ab6", | ||
| 2631 | "succnsim;": "\u22e9", | ||
| 2632 | "succsim;": "\u227f", | ||
| 2633 | "sum;": "\u2211", | ||
| 2634 | "sung;": "\u266a", | ||
| 2635 | "sup1": "\xb9", | ||
| 2636 | "sup1;": "\xb9", | ||
| 2637 | "sup2": "\xb2", | ||
| 2638 | "sup2;": "\xb2", | ||
| 2639 | "sup3": "\xb3", | ||
| 2640 | "sup3;": "\xb3", | ||
| 2641 | "sup;": "\u2283", | ||
| 2642 | "supE;": "\u2ac6", | ||
| 2643 | "supdot;": "\u2abe", | ||
| 2644 | "supdsub;": "\u2ad8", | ||
| 2645 | "supe;": "\u2287", | ||
| 2646 | "supedot;": "\u2ac4", | ||
| 2647 | "suphsol;": "\u27c9", | ||
| 2648 | "suphsub;": "\u2ad7", | ||
| 2649 | "suplarr;": "\u297b", | ||
| 2650 | "supmult;": "\u2ac2", | ||
| 2651 | "supnE;": "\u2acc", | ||
| 2652 | "supne;": "\u228b", | ||
| 2653 | "supplus;": "\u2ac0", | ||
| 2654 | "supset;": "\u2283", | ||
| 2655 | "supseteq;": "\u2287", | ||
| 2656 | "supseteqq;": "\u2ac6", | ||
| 2657 | "supsetneq;": "\u228b", | ||
| 2658 | "supsetneqq;": "\u2acc", | ||
| 2659 | "supsim;": "\u2ac8", | ||
| 2660 | "supsub;": "\u2ad4", | ||
| 2661 | "supsup;": "\u2ad6", | ||
| 2662 | "swArr;": "\u21d9", | ||
| 2663 | "swarhk;": "\u2926", | ||
| 2664 | "swarr;": "\u2199", | ||
| 2665 | "swarrow;": "\u2199", | ||
| 2666 | "swnwar;": "\u292a", | ||
| 2667 | "szlig": "\xdf", | ||
| 2668 | "szlig;": "\xdf", | ||
| 2669 | "target;": "\u2316", | ||
| 2670 | "tau;": "\u03c4", | ||
| 2671 | "tbrk;": "\u23b4", | ||
| 2672 | "tcaron;": "\u0165", | ||
| 2673 | "tcedil;": "\u0163", | ||
| 2674 | "tcy;": "\u0442", | ||
| 2675 | "tdot;": "\u20db", | ||
| 2676 | "telrec;": "\u2315", | ||
| 2677 | "tfr;": "\U0001d531", | ||
| 2678 | "there4;": "\u2234", | ||
| 2679 | "therefore;": "\u2234", | ||
| 2680 | "theta;": "\u03b8", | ||
| 2681 | "thetasym;": "\u03d1", | ||
| 2682 | "thetav;": "\u03d1", | ||
| 2683 | "thickapprox;": "\u2248", | ||
| 2684 | "thicksim;": "\u223c", | ||
| 2685 | "thinsp;": "\u2009", | ||
| 2686 | "thkap;": "\u2248", | ||
| 2687 | "thksim;": "\u223c", | ||
| 2688 | "thorn": "\xfe", | ||
| 2689 | "thorn;": "\xfe", | ||
| 2690 | "tilde;": "\u02dc", | ||
| 2691 | "times": "\xd7", | ||
| 2692 | "times;": "\xd7", | ||
| 2693 | "timesb;": "\u22a0", | ||
| 2694 | "timesbar;": "\u2a31", | ||
| 2695 | "timesd;": "\u2a30", | ||
| 2696 | "tint;": "\u222d", | ||
| 2697 | "toea;": "\u2928", | ||
| 2698 | "top;": "\u22a4", | ||
| 2699 | "topbot;": "\u2336", | ||
| 2700 | "topcir;": "\u2af1", | ||
| 2701 | "topf;": "\U0001d565", | ||
| 2702 | "topfork;": "\u2ada", | ||
| 2703 | "tosa;": "\u2929", | ||
| 2704 | "tprime;": "\u2034", | ||
| 2705 | "trade;": "\u2122", | ||
| 2706 | "triangle;": "\u25b5", | ||
| 2707 | "triangledown;": "\u25bf", | ||
| 2708 | "triangleleft;": "\u25c3", | ||
| 2709 | "trianglelefteq;": "\u22b4", | ||
| 2710 | "triangleq;": "\u225c", | ||
| 2711 | "triangleright;": "\u25b9", | ||
| 2712 | "trianglerighteq;": "\u22b5", | ||
| 2713 | "tridot;": "\u25ec", | ||
| 2714 | "trie;": "\u225c", | ||
| 2715 | "triminus;": "\u2a3a", | ||
| 2716 | "triplus;": "\u2a39", | ||
| 2717 | "trisb;": "\u29cd", | ||
| 2718 | "tritime;": "\u2a3b", | ||
| 2719 | "trpezium;": "\u23e2", | ||
| 2720 | "tscr;": "\U0001d4c9", | ||
| 2721 | "tscy;": "\u0446", | ||
| 2722 | "tshcy;": "\u045b", | ||
| 2723 | "tstrok;": "\u0167", | ||
| 2724 | "twixt;": "\u226c", | ||
| 2725 | "twoheadleftarrow;": "\u219e", | ||
| 2726 | "twoheadrightarrow;": "\u21a0", | ||
| 2727 | "uArr;": "\u21d1", | ||
| 2728 | "uHar;": "\u2963", | ||
| 2729 | "uacute": "\xfa", | ||
| 2730 | "uacute;": "\xfa", | ||
| 2731 | "uarr;": "\u2191", | ||
| 2732 | "ubrcy;": "\u045e", | ||
| 2733 | "ubreve;": "\u016d", | ||
| 2734 | "ucirc": "\xfb", | ||
| 2735 | "ucirc;": "\xfb", | ||
| 2736 | "ucy;": "\u0443", | ||
| 2737 | "udarr;": "\u21c5", | ||
| 2738 | "udblac;": "\u0171", | ||
| 2739 | "udhar;": "\u296e", | ||
| 2740 | "ufisht;": "\u297e", | ||
| 2741 | "ufr;": "\U0001d532", | ||
| 2742 | "ugrave": "\xf9", | ||
| 2743 | "ugrave;": "\xf9", | ||
| 2744 | "uharl;": "\u21bf", | ||
| 2745 | "uharr;": "\u21be", | ||
| 2746 | "uhblk;": "\u2580", | ||
| 2747 | "ulcorn;": "\u231c", | ||
| 2748 | "ulcorner;": "\u231c", | ||
| 2749 | "ulcrop;": "\u230f", | ||
| 2750 | "ultri;": "\u25f8", | ||
| 2751 | "umacr;": "\u016b", | ||
| 2752 | "uml": "\xa8", | ||
| 2753 | "uml;": "\xa8", | ||
| 2754 | "uogon;": "\u0173", | ||
| 2755 | "uopf;": "\U0001d566", | ||
| 2756 | "uparrow;": "\u2191", | ||
| 2757 | "updownarrow;": "\u2195", | ||
| 2758 | "upharpoonleft;": "\u21bf", | ||
| 2759 | "upharpoonright;": "\u21be", | ||
| 2760 | "uplus;": "\u228e", | ||
| 2761 | "upsi;": "\u03c5", | ||
| 2762 | "upsih;": "\u03d2", | ||
| 2763 | "upsilon;": "\u03c5", | ||
| 2764 | "upuparrows;": "\u21c8", | ||
| 2765 | "urcorn;": "\u231d", | ||
| 2766 | "urcorner;": "\u231d", | ||
| 2767 | "urcrop;": "\u230e", | ||
| 2768 | "uring;": "\u016f", | ||
| 2769 | "urtri;": "\u25f9", | ||
| 2770 | "uscr;": "\U0001d4ca", | ||
| 2771 | "utdot;": "\u22f0", | ||
| 2772 | "utilde;": "\u0169", | ||
| 2773 | "utri;": "\u25b5", | ||
| 2774 | "utrif;": "\u25b4", | ||
| 2775 | "uuarr;": "\u21c8", | ||
| 2776 | "uuml": "\xfc", | ||
| 2777 | "uuml;": "\xfc", | ||
| 2778 | "uwangle;": "\u29a7", | ||
| 2779 | "vArr;": "\u21d5", | ||
| 2780 | "vBar;": "\u2ae8", | ||
| 2781 | "vBarv;": "\u2ae9", | ||
| 2782 | "vDash;": "\u22a8", | ||
| 2783 | "vangrt;": "\u299c", | ||
| 2784 | "varepsilon;": "\u03f5", | ||
| 2785 | "varkappa;": "\u03f0", | ||
| 2786 | "varnothing;": "\u2205", | ||
| 2787 | "varphi;": "\u03d5", | ||
| 2788 | "varpi;": "\u03d6", | ||
| 2789 | "varpropto;": "\u221d", | ||
| 2790 | "varr;": "\u2195", | ||
| 2791 | "varrho;": "\u03f1", | ||
| 2792 | "varsigma;": "\u03c2", | ||
| 2793 | "varsubsetneq;": "\u228a\ufe00", | ||
| 2794 | "varsubsetneqq;": "\u2acb\ufe00", | ||
| 2795 | "varsupsetneq;": "\u228b\ufe00", | ||
| 2796 | "varsupsetneqq;": "\u2acc\ufe00", | ||
| 2797 | "vartheta;": "\u03d1", | ||
| 2798 | "vartriangleleft;": "\u22b2", | ||
| 2799 | "vartriangleright;": "\u22b3", | ||
| 2800 | "vcy;": "\u0432", | ||
| 2801 | "vdash;": "\u22a2", | ||
| 2802 | "vee;": "\u2228", | ||
| 2803 | "veebar;": "\u22bb", | ||
| 2804 | "veeeq;": "\u225a", | ||
| 2805 | "vellip;": "\u22ee", | ||
| 2806 | "verbar;": "|", | ||
| 2807 | "vert;": "|", | ||
| 2808 | "vfr;": "\U0001d533", | ||
| 2809 | "vltri;": "\u22b2", | ||
| 2810 | "vnsub;": "\u2282\u20d2", | ||
| 2811 | "vnsup;": "\u2283\u20d2", | ||
| 2812 | "vopf;": "\U0001d567", | ||
| 2813 | "vprop;": "\u221d", | ||
| 2814 | "vrtri;": "\u22b3", | ||
| 2815 | "vscr;": "\U0001d4cb", | ||
| 2816 | "vsubnE;": "\u2acb\ufe00", | ||
| 2817 | "vsubne;": "\u228a\ufe00", | ||
| 2818 | "vsupnE;": "\u2acc\ufe00", | ||
| 2819 | "vsupne;": "\u228b\ufe00", | ||
| 2820 | "vzigzag;": "\u299a", | ||
| 2821 | "wcirc;": "\u0175", | ||
| 2822 | "wedbar;": "\u2a5f", | ||
| 2823 | "wedge;": "\u2227", | ||
| 2824 | "wedgeq;": "\u2259", | ||
| 2825 | "weierp;": "\u2118", | ||
| 2826 | "wfr;": "\U0001d534", | ||
| 2827 | "wopf;": "\U0001d568", | ||
| 2828 | "wp;": "\u2118", | ||
| 2829 | "wr;": "\u2240", | ||
| 2830 | "wreath;": "\u2240", | ||
| 2831 | "wscr;": "\U0001d4cc", | ||
| 2832 | "xcap;": "\u22c2", | ||
| 2833 | "xcirc;": "\u25ef", | ||
| 2834 | "xcup;": "\u22c3", | ||
| 2835 | "xdtri;": "\u25bd", | ||
| 2836 | "xfr;": "\U0001d535", | ||
| 2837 | "xhArr;": "\u27fa", | ||
| 2838 | "xharr;": "\u27f7", | ||
| 2839 | "xi;": "\u03be", | ||
| 2840 | "xlArr;": "\u27f8", | ||
| 2841 | "xlarr;": "\u27f5", | ||
| 2842 | "xmap;": "\u27fc", | ||
| 2843 | "xnis;": "\u22fb", | ||
| 2844 | "xodot;": "\u2a00", | ||
| 2845 | "xopf;": "\U0001d569", | ||
| 2846 | "xoplus;": "\u2a01", | ||
| 2847 | "xotime;": "\u2a02", | ||
| 2848 | "xrArr;": "\u27f9", | ||
| 2849 | "xrarr;": "\u27f6", | ||
| 2850 | "xscr;": "\U0001d4cd", | ||
| 2851 | "xsqcup;": "\u2a06", | ||
| 2852 | "xuplus;": "\u2a04", | ||
| 2853 | "xutri;": "\u25b3", | ||
| 2854 | "xvee;": "\u22c1", | ||
| 2855 | "xwedge;": "\u22c0", | ||
| 2856 | "yacute": "\xfd", | ||
| 2857 | "yacute;": "\xfd", | ||
| 2858 | "yacy;": "\u044f", | ||
| 2859 | "ycirc;": "\u0177", | ||
| 2860 | "ycy;": "\u044b", | ||
| 2861 | "yen": "\xa5", | ||
| 2862 | "yen;": "\xa5", | ||
| 2863 | "yfr;": "\U0001d536", | ||
| 2864 | "yicy;": "\u0457", | ||
| 2865 | "yopf;": "\U0001d56a", | ||
| 2866 | "yscr;": "\U0001d4ce", | ||
| 2867 | "yucy;": "\u044e", | ||
| 2868 | "yuml": "\xff", | ||
| 2869 | "yuml;": "\xff", | ||
| 2870 | "zacute;": "\u017a", | ||
| 2871 | "zcaron;": "\u017e", | ||
| 2872 | "zcy;": "\u0437", | ||
| 2873 | "zdot;": "\u017c", | ||
| 2874 | "zeetrf;": "\u2128", | ||
| 2875 | "zeta;": "\u03b6", | ||
| 2876 | "zfr;": "\U0001d537", | ||
| 2877 | "zhcy;": "\u0436", | ||
| 2878 | "zigrarr;": "\u21dd", | ||
| 2879 | "zopf;": "\U0001d56b", | ||
| 2880 | "zscr;": "\U0001d4cf", | ||
| 2881 | "zwj;": "\u200d", | ||
| 2882 | "zwnj;": "\u200c", | ||
| 2883 | } | ||
| 2884 | |||
| 2885 | replacementCharacters = { | ||
| 2886 | 0x0: "\uFFFD", | ||
| 2887 | 0x0d: "\u000D", | ||
| 2888 | 0x80: "\u20AC", | ||
| 2889 | 0x81: "\u0081", | ||
| 2890 | 0x82: "\u201A", | ||
| 2891 | 0x83: "\u0192", | ||
| 2892 | 0x84: "\u201E", | ||
| 2893 | 0x85: "\u2026", | ||
| 2894 | 0x86: "\u2020", | ||
| 2895 | 0x87: "\u2021", | ||
| 2896 | 0x88: "\u02C6", | ||
| 2897 | 0x89: "\u2030", | ||
| 2898 | 0x8A: "\u0160", | ||
| 2899 | 0x8B: "\u2039", | ||
| 2900 | 0x8C: "\u0152", | ||
| 2901 | 0x8D: "\u008D", | ||
| 2902 | 0x8E: "\u017D", | ||
| 2903 | 0x8F: "\u008F", | ||
| 2904 | 0x90: "\u0090", | ||
| 2905 | 0x91: "\u2018", | ||
| 2906 | 0x92: "\u2019", | ||
| 2907 | 0x93: "\u201C", | ||
| 2908 | 0x94: "\u201D", | ||
| 2909 | 0x95: "\u2022", | ||
| 2910 | 0x96: "\u2013", | ||
| 2911 | 0x97: "\u2014", | ||
| 2912 | 0x98: "\u02DC", | ||
| 2913 | 0x99: "\u2122", | ||
| 2914 | 0x9A: "\u0161", | ||
| 2915 | 0x9B: "\u203A", | ||
| 2916 | 0x9C: "\u0153", | ||
| 2917 | 0x9D: "\u009D", | ||
| 2918 | 0x9E: "\u017E", | ||
| 2919 | 0x9F: "\u0178", | ||
| 2920 | } | ||
| 2921 | |||
| 2922 | tokenTypes = { | ||
| 2923 | "Doctype": 0, | ||
| 2924 | "Characters": 1, | ||
| 2925 | "SpaceCharacters": 2, | ||
| 2926 | "StartTag": 3, | ||
| 2927 | "EndTag": 4, | ||
| 2928 | "EmptyTag": 5, | ||
| 2929 | "Comment": 6, | ||
| 2930 | "ParseError": 7 | ||
| 2931 | } | ||
| 2932 | |||
| 2933 | tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"], | ||
| 2934 | tokenTypes["EmptyTag"]]) | ||
| 2935 | |||
| 2936 | |||
| 2937 | prefixes = dict([(v, k) for k, v in namespaces.items()]) | ||
| 2938 | prefixes["http://www.w3.org/1998/Math/MathML"] = "math" | ||
| 2939 | |||
| 2940 | |||
| 2941 | class DataLossWarning(UserWarning): | ||
| 2942 | """Raised when the current tree is unable to represent the input data""" | ||
| 2943 | pass | ||
| 2944 | |||
| 2945 | |||
| 2946 | class _ReparseException(Exception): | ||
| 2947 | pass | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/__init__.py | |||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/alphabeticalattributes.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/alphabeticalattributes.py new file mode 100644 index 0000000..d9e234a --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/alphabeticalattributes.py | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | from . import base | ||
| 4 | |||
| 5 | from collections import OrderedDict | ||
| 6 | |||
| 7 | |||
| 8 | def _attr_key(attr): | ||
| 9 | """Return an appropriate key for an attribute for sorting | ||
| 10 | |||
| 11 | Attributes have a namespace that can be either ``None`` or a string. We | ||
| 12 | can't compare the two because they're different types, so we convert | ||
| 13 | ``None`` to an empty string first. | ||
| 14 | |||
| 15 | """ | ||
| 16 | return (attr[0][0] or ''), attr[0][1] | ||
| 17 | |||
| 18 | |||
| 19 | class Filter(base.Filter): | ||
| 20 | """Alphabetizes attributes for elements""" | ||
| 21 | def __iter__(self): | ||
| 22 | for token in base.Filter.__iter__(self): | ||
| 23 | if token["type"] in ("StartTag", "EmptyTag"): | ||
| 24 | attrs = OrderedDict() | ||
| 25 | for name, value in sorted(token["data"].items(), | ||
| 26 | key=_attr_key): | ||
| 27 | attrs[name] = value | ||
| 28 | token["data"] = attrs | ||
| 29 | yield token | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/base.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/base.py new file mode 100644 index 0000000..f5aa523 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/base.py | |||
| @@ -0,0 +1,12 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | |||
| 4 | class Filter(object): | ||
| 5 | def __init__(self, source): | ||
| 6 | self.source = source | ||
| 7 | |||
| 8 | def __iter__(self): | ||
| 9 | return iter(self.source) | ||
| 10 | |||
| 11 | def __getattr__(self, name): | ||
| 12 | return getattr(self.source, name) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/inject_meta_charset.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/inject_meta_charset.py new file mode 100644 index 0000000..2f8ec4f --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/inject_meta_charset.py | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | from . import base | ||
| 4 | |||
| 5 | |||
| 6 | class Filter(base.Filter): | ||
| 7 | """Injects ``<meta charset=ENCODING>`` tag into head of document""" | ||
| 8 | def __init__(self, source, encoding): | ||
| 9 | """Creates a Filter | ||
| 10 | |||
| 11 | :arg source: the source token stream | ||
| 12 | |||
| 13 | :arg encoding: the encoding to set | ||
| 14 | |||
| 15 | """ | ||
| 16 | base.Filter.__init__(self, source) | ||
| 17 | self.encoding = encoding | ||
| 18 | |||
| 19 | def __iter__(self): | ||
| 20 | state = "pre_head" | ||
| 21 | meta_found = (self.encoding is None) | ||
| 22 | pending = [] | ||
| 23 | |||
| 24 | for token in base.Filter.__iter__(self): | ||
| 25 | type = token["type"] | ||
| 26 | if type == "StartTag": | ||
| 27 | if token["name"].lower() == "head": | ||
| 28 | state = "in_head" | ||
| 29 | |||
| 30 | elif type == "EmptyTag": | ||
| 31 | if token["name"].lower() == "meta": | ||
| 32 | # replace charset with actual encoding | ||
| 33 | has_http_equiv_content_type = False | ||
| 34 | for (namespace, name), value in token["data"].items(): | ||
| 35 | if namespace is not None: | ||
| 36 | continue | ||
| 37 | elif name.lower() == 'charset': | ||
| 38 | token["data"][(namespace, name)] = self.encoding | ||
| 39 | meta_found = True | ||
| 40 | break | ||
| 41 | elif name == 'http-equiv' and value.lower() == 'content-type': | ||
| 42 | has_http_equiv_content_type = True | ||
| 43 | else: | ||
| 44 | if has_http_equiv_content_type and (None, "content") in token["data"]: | ||
| 45 | token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding | ||
| 46 | meta_found = True | ||
| 47 | |||
| 48 | elif token["name"].lower() == "head" and not meta_found: | ||
| 49 | # insert meta into empty head | ||
| 50 | yield {"type": "StartTag", "name": "head", | ||
| 51 | "data": token["data"]} | ||
| 52 | yield {"type": "EmptyTag", "name": "meta", | ||
| 53 | "data": {(None, "charset"): self.encoding}} | ||
| 54 | yield {"type": "EndTag", "name": "head"} | ||
| 55 | meta_found = True | ||
| 56 | continue | ||
| 57 | |||
| 58 | elif type == "EndTag": | ||
| 59 | if token["name"].lower() == "head" and pending: | ||
| 60 | # insert meta into head (if necessary) and flush pending queue | ||
| 61 | yield pending.pop(0) | ||
| 62 | if not meta_found: | ||
| 63 | yield {"type": "EmptyTag", "name": "meta", | ||
| 64 | "data": {(None, "charset"): self.encoding}} | ||
| 65 | while pending: | ||
| 66 | yield pending.pop(0) | ||
| 67 | meta_found = True | ||
| 68 | state = "post_head" | ||
| 69 | |||
| 70 | if state == "in_head": | ||
| 71 | pending.append(token) | ||
| 72 | else: | ||
| 73 | yield token | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/lint.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/lint.py new file mode 100644 index 0000000..b5bbd97 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/lint.py | |||
| @@ -0,0 +1,93 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | from pip._vendor.six import text_type | ||
| 4 | |||
| 5 | from . import base | ||
| 6 | from ..constants import namespaces, voidElements | ||
| 7 | |||
| 8 | from ..constants import spaceCharacters | ||
| 9 | spaceCharacters = "".join(spaceCharacters) | ||
| 10 | |||
| 11 | |||
| 12 | class Filter(base.Filter): | ||
| 13 | """Lints the token stream for errors | ||
| 14 | |||
| 15 | If it finds any errors, it'll raise an ``AssertionError``. | ||
| 16 | |||
| 17 | """ | ||
| 18 | def __init__(self, source, require_matching_tags=True): | ||
| 19 | """Creates a Filter | ||
| 20 | |||
| 21 | :arg source: the source token stream | ||
| 22 | |||
| 23 | :arg require_matching_tags: whether or not to require matching tags | ||
| 24 | |||
| 25 | """ | ||
| 26 | super(Filter, self).__init__(source) | ||
| 27 | self.require_matching_tags = require_matching_tags | ||
| 28 | |||
| 29 | def __iter__(self): | ||
| 30 | open_elements = [] | ||
| 31 | for token in base.Filter.__iter__(self): | ||
| 32 | type = token["type"] | ||
| 33 | if type in ("StartTag", "EmptyTag"): | ||
| 34 | namespace = token["namespace"] | ||
| 35 | name = token["name"] | ||
| 36 | assert namespace is None or isinstance(namespace, text_type) | ||
| 37 | assert namespace != "" | ||
| 38 | assert isinstance(name, text_type) | ||
| 39 | assert name != "" | ||
| 40 | assert isinstance(token["data"], dict) | ||
| 41 | if (not namespace or namespace == namespaces["html"]) and name in voidElements: | ||
| 42 | assert type == "EmptyTag" | ||
| 43 | else: | ||
| 44 | assert type == "StartTag" | ||
| 45 | if type == "StartTag" and self.require_matching_tags: | ||
| 46 | open_elements.append((namespace, name)) | ||
| 47 | for (namespace, name), value in token["data"].items(): | ||
| 48 | assert namespace is None or isinstance(namespace, text_type) | ||
| 49 | assert namespace != "" | ||
| 50 | assert isinstance(name, text_type) | ||
| 51 | assert name != "" | ||
| 52 | assert isinstance(value, text_type) | ||
| 53 | |||
| 54 | elif type == "EndTag": | ||
| 55 | namespace = token["namespace"] | ||
| 56 | name = token["name"] | ||
| 57 | assert namespace is None or isinstance(namespace, text_type) | ||
| 58 | assert namespace != "" | ||
| 59 | assert isinstance(name, text_type) | ||
| 60 | assert name != "" | ||
| 61 | if (not namespace or namespace == namespaces["html"]) and name in voidElements: | ||
| 62 | assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name} | ||
| 63 | elif self.require_matching_tags: | ||
| 64 | start = open_elements.pop() | ||
| 65 | assert start == (namespace, name) | ||
| 66 | |||
| 67 | elif type == "Comment": | ||
| 68 | data = token["data"] | ||
| 69 | assert isinstance(data, text_type) | ||
| 70 | |||
| 71 | elif type in ("Characters", "SpaceCharacters"): | ||
| 72 | data = token["data"] | ||
| 73 | assert isinstance(data, text_type) | ||
| 74 | assert data != "" | ||
| 75 | if type == "SpaceCharacters": | ||
| 76 | assert data.strip(spaceCharacters) == "" | ||
| 77 | |||
| 78 | elif type == "Doctype": | ||
| 79 | name = token["name"] | ||
| 80 | assert name is None or isinstance(name, text_type) | ||
| 81 | assert token["publicId"] is None or isinstance(name, text_type) | ||
| 82 | assert token["systemId"] is None or isinstance(name, text_type) | ||
| 83 | |||
| 84 | elif type == "Entity": | ||
| 85 | assert isinstance(token["name"], text_type) | ||
| 86 | |||
| 87 | elif type == "SerializerError": | ||
| 88 | assert isinstance(token["data"], text_type) | ||
| 89 | |||
| 90 | else: | ||
| 91 | assert False, "Unknown token type: %(type)s" % {"type": type} | ||
| 92 | |||
| 93 | yield token | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py new file mode 100644 index 0000000..c8d5e54 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py | |||
| @@ -0,0 +1,207 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | from . import base | ||
| 4 | |||
| 5 | |||
| 6 | class Filter(base.Filter): | ||
| 7 | """Removes optional tags from the token stream""" | ||
| 8 | def slider(self): | ||
| 9 | previous1 = previous2 = None | ||
| 10 | for token in self.source: | ||
| 11 | if previous1 is not None: | ||
| 12 | yield previous2, previous1, token | ||
| 13 | previous2 = previous1 | ||
| 14 | previous1 = token | ||
| 15 | if previous1 is not None: | ||
| 16 | yield previous2, previous1, None | ||
| 17 | |||
| 18 | def __iter__(self): | ||
| 19 | for previous, token, next in self.slider(): | ||
| 20 | type = token["type"] | ||
| 21 | if type == "StartTag": | ||
| 22 | if (token["data"] or | ||
| 23 | not self.is_optional_start(token["name"], previous, next)): | ||
| 24 | yield token | ||
| 25 | elif type == "EndTag": | ||
| 26 | if not self.is_optional_end(token["name"], next): | ||
| 27 | yield token | ||
| 28 | else: | ||
| 29 | yield token | ||
| 30 | |||
| 31 | def is_optional_start(self, tagname, previous, next): | ||
| 32 | type = next and next["type"] or None | ||
| 33 | if tagname in 'html': | ||
| 34 | # An html element's start tag may be omitted if the first thing | ||
| 35 | # inside the html element is not a space character or a comment. | ||
| 36 | return type not in ("Comment", "SpaceCharacters") | ||
| 37 | elif tagname == 'head': | ||
| 38 | # A head element's start tag may be omitted if the first thing | ||
| 39 | # inside the head element is an element. | ||
| 40 | # XXX: we also omit the start tag if the head element is empty | ||
| 41 | if type in ("StartTag", "EmptyTag"): | ||
| 42 | return True | ||
| 43 | elif type == "EndTag": | ||
| 44 | return next["name"] == "head" | ||
| 45 | elif tagname == 'body': | ||
| 46 | # A body element's start tag may be omitted if the first thing | ||
| 47 | # inside the body element is not a space character or a comment, | ||
| 48 | # except if the first thing inside the body element is a script | ||
| 49 | # or style element and the node immediately preceding the body | ||
| 50 | # element is a head element whose end tag has been omitted. | ||
| 51 | if type in ("Comment", "SpaceCharacters"): | ||
| 52 | return False | ||
| 53 | elif type == "StartTag": | ||
| 54 | # XXX: we do not look at the preceding event, so we never omit | ||
| 55 | # the body element's start tag if it's followed by a script or | ||
| 56 | # a style element. | ||
| 57 | return next["name"] not in ('script', 'style') | ||
| 58 | else: | ||
| 59 | return True | ||
| 60 | elif tagname == 'colgroup': | ||
| 61 | # A colgroup element's start tag may be omitted if the first thing | ||
| 62 | # inside the colgroup element is a col element, and if the element | ||
| 63 | # is not immediately preceded by another colgroup element whose | ||
| 64 | # end tag has been omitted. | ||
| 65 | if type in ("StartTag", "EmptyTag"): | ||
| 66 | # XXX: we do not look at the preceding event, so instead we never | ||
| 67 | # omit the colgroup element's end tag when it is immediately | ||
| 68 | # followed by another colgroup element. See is_optional_end. | ||
| 69 | return next["name"] == "col" | ||
| 70 | else: | ||
| 71 | return False | ||
| 72 | elif tagname == 'tbody': | ||
| 73 | # A tbody element's start tag may be omitted if the first thing | ||
| 74 | # inside the tbody element is a tr element, and if the element is | ||
| 75 | # not immediately preceded by a tbody, thead, or tfoot element | ||
| 76 | # whose end tag has been omitted. | ||
| 77 | if type == "StartTag": | ||
| 78 | # omit the thead and tfoot elements' end tag when they are | ||
| 79 | # immediately followed by a tbody element. See is_optional_end. | ||
| 80 | if previous and previous['type'] == 'EndTag' and \ | ||
| 81 | previous['name'] in ('tbody', 'thead', 'tfoot'): | ||
| 82 | return False | ||
| 83 | return next["name"] == 'tr' | ||
| 84 | else: | ||
| 85 | return False | ||
| 86 | return False | ||
| 87 | |||
| 88 | def is_optional_end(self, tagname, next): | ||
| 89 | type = next and next["type"] or None | ||
| 90 | if tagname in ('html', 'head', 'body'): | ||
| 91 | # An html element's end tag may be omitted if the html element | ||
| 92 | # is not immediately followed by a space character or a comment. | ||
| 93 | return type not in ("Comment", "SpaceCharacters") | ||
| 94 | elif tagname in ('li', 'optgroup', 'tr'): | ||
| 95 | # A li element's end tag may be omitted if the li element is | ||
| 96 | # immediately followed by another li element or if there is | ||
| 97 | # no more content in the parent element. | ||
| 98 | # An optgroup element's end tag may be omitted if the optgroup | ||
| 99 | # element is immediately followed by another optgroup element, | ||
| 100 | # or if there is no more content in the parent element. | ||
| 101 | # A tr element's end tag may be omitted if the tr element is | ||
| 102 | # immediately followed by another tr element, or if there is | ||
| 103 | # no more content in the parent element. | ||
| 104 | if type == "StartTag": | ||
| 105 | return next["name"] == tagname | ||
| 106 | else: | ||
| 107 | return type == "EndTag" or type is None | ||
| 108 | elif tagname in ('dt', 'dd'): | ||
| 109 | # A dt element's end tag may be omitted if the dt element is | ||
| 110 | # immediately followed by another dt element or a dd element. | ||
| 111 | # A dd element's end tag may be omitted if the dd element is | ||
| 112 | # immediately followed by another dd element or a dt element, | ||
| 113 | # or if there is no more content in the parent element. | ||
| 114 | if type == "StartTag": | ||
| 115 | return next["name"] in ('dt', 'dd') | ||
| 116 | elif tagname == 'dd': | ||
| 117 | return type == "EndTag" or type is None | ||
| 118 | else: | ||
| 119 | return False | ||
| 120 | elif tagname == 'p': | ||
| 121 | # A p element's end tag may be omitted if the p element is | ||
| 122 | # immediately followed by an address, article, aside, | ||
| 123 | # blockquote, datagrid, dialog, dir, div, dl, fieldset, | ||
| 124 | # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu, | ||
| 125 | # nav, ol, p, pre, section, table, or ul, element, or if | ||
| 126 | # there is no more content in the parent element. | ||
| 127 | if type in ("StartTag", "EmptyTag"): | ||
| 128 | return next["name"] in ('address', 'article', 'aside', | ||
| 129 | 'blockquote', 'datagrid', 'dialog', | ||
| 130 | 'dir', 'div', 'dl', 'fieldset', 'footer', | ||
| 131 | 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', | ||
| 132 | 'header', 'hr', 'menu', 'nav', 'ol', | ||
| 133 | 'p', 'pre', 'section', 'table', 'ul') | ||
| 134 | else: | ||
| 135 | return type == "EndTag" or type is None | ||
| 136 | elif tagname == 'option': | ||
| 137 | # An option element's end tag may be omitted if the option | ||
| 138 | # element is immediately followed by another option element, | ||
| 139 | # or if it is immediately followed by an <code>optgroup</code> | ||
| 140 | # element, or if there is no more content in the parent | ||
| 141 | # element. | ||
| 142 | if type == "StartTag": | ||
| 143 | return next["name"] in ('option', 'optgroup') | ||
| 144 | else: | ||
| 145 | return type == "EndTag" or type is None | ||
| 146 | elif tagname in ('rt', 'rp'): | ||
| 147 | # An rt element's end tag may be omitted if the rt element is | ||
| 148 | # immediately followed by an rt or rp element, or if there is | ||
| 149 | # no more content in the parent element. | ||
| 150 | # An rp element's end tag may be omitted if the rp element is | ||
| 151 | # immediately followed by an rt or rp element, or if there is | ||
| 152 | # no more content in the parent element. | ||
| 153 | if type == "StartTag": | ||
| 154 | return next["name"] in ('rt', 'rp') | ||
| 155 | else: | ||
| 156 | return type == "EndTag" or type is None | ||
| 157 | elif tagname == 'colgroup': | ||
| 158 | # A colgroup element's end tag may be omitted if the colgroup | ||
| 159 | # element is not immediately followed by a space character or | ||
| 160 | # a comment. | ||
| 161 | if type in ("Comment", "SpaceCharacters"): | ||
| 162 | return False | ||
| 163 | elif type == "StartTag": | ||
| 164 | # XXX: we also look for an immediately following colgroup | ||
| 165 | # element. See is_optional_start. | ||
| 166 | return next["name"] != 'colgroup' | ||
| 167 | else: | ||
| 168 | return True | ||
| 169 | elif tagname in ('thead', 'tbody'): | ||
| 170 | # A thead element's end tag may be omitted if the thead element | ||
| 171 | # is immediately followed by a tbody or tfoot element. | ||
| 172 | # A tbody element's end tag may be omitted if the tbody element | ||
| 173 | # is immediately followed by a tbody or tfoot element, or if | ||
| 174 | # there is no more content in the parent element. | ||
| 175 | # A tfoot element's end tag may be omitted if the tfoot element | ||
| 176 | # is immediately followed by a tbody element, or if there is no | ||
| 177 | # more content in the parent element. | ||
| 178 | # XXX: we never omit the end tag when the following element is | ||
| 179 | # a tbody. See is_optional_start. | ||
| 180 | if type == "StartTag": | ||
| 181 | return next["name"] in ['tbody', 'tfoot'] | ||
| 182 | elif tagname == 'tbody': | ||
| 183 | return type == "EndTag" or type is None | ||
| 184 | else: | ||
| 185 | return False | ||
| 186 | elif tagname == 'tfoot': | ||
| 187 | # A tfoot element's end tag may be omitted if the tfoot element | ||
| 188 | # is immediately followed by a tbody element, or if there is no | ||
| 189 | # more content in the parent element. | ||
| 190 | # XXX: we never omit the end tag when the following element is | ||
| 191 | # a tbody. See is_optional_start. | ||
| 192 | if type == "StartTag": | ||
| 193 | return next["name"] == 'tbody' | ||
| 194 | else: | ||
| 195 | return type == "EndTag" or type is None | ||
| 196 | elif tagname in ('td', 'th'): | ||
| 197 | # A td element's end tag may be omitted if the td element is | ||
| 198 | # immediately followed by a td or th element, or if there is | ||
| 199 | # no more content in the parent element. | ||
| 200 | # A th element's end tag may be omitted if the th element is | ||
| 201 | # immediately followed by a td or th element, or if there is | ||
| 202 | # no more content in the parent element. | ||
| 203 | if type == "StartTag": | ||
| 204 | return next["name"] in ('td', 'th') | ||
| 205 | else: | ||
| 206 | return type == "EndTag" or type is None | ||
| 207 | return False | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py new file mode 100644 index 0000000..c3199a5 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py | |||
| @@ -0,0 +1,896 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | import re | ||
| 4 | from xml.sax.saxutils import escape, unescape | ||
| 5 | |||
| 6 | from pip._vendor.six.moves import urllib_parse as urlparse | ||
| 7 | |||
| 8 | from . import base | ||
| 9 | from ..constants import namespaces, prefixes | ||
| 10 | |||
| 11 | __all__ = ["Filter"] | ||
| 12 | |||
| 13 | |||
| 14 | allowed_elements = frozenset(( | ||
| 15 | (namespaces['html'], 'a'), | ||
| 16 | (namespaces['html'], 'abbr'), | ||
| 17 | (namespaces['html'], 'acronym'), | ||
| 18 | (namespaces['html'], 'address'), | ||
| 19 | (namespaces['html'], 'area'), | ||
| 20 | (namespaces['html'], 'article'), | ||
| 21 | (namespaces['html'], 'aside'), | ||
| 22 | (namespaces['html'], 'audio'), | ||
| 23 | (namespaces['html'], 'b'), | ||
| 24 | (namespaces['html'], 'big'), | ||
| 25 | (namespaces['html'], 'blockquote'), | ||
| 26 | (namespaces['html'], 'br'), | ||
| 27 | (namespaces['html'], 'button'), | ||
| 28 | (namespaces['html'], 'canvas'), | ||
| 29 | (namespaces['html'], 'caption'), | ||
| 30 | (namespaces['html'], 'center'), | ||
| 31 | (namespaces['html'], 'cite'), | ||
| 32 | (namespaces['html'], 'code'), | ||
| 33 | (namespaces['html'], 'col'), | ||
| 34 | (namespaces['html'], 'colgroup'), | ||
| 35 | (namespaces['html'], 'command'), | ||
| 36 | (namespaces['html'], 'datagrid'), | ||
| 37 | (namespaces['html'], 'datalist'), | ||
| 38 | (namespaces['html'], 'dd'), | ||
| 39 | (namespaces['html'], 'del'), | ||
| 40 | (namespaces['html'], 'details'), | ||
| 41 | (namespaces['html'], 'dfn'), | ||
| 42 | (namespaces['html'], 'dialog'), | ||
| 43 | (namespaces['html'], 'dir'), | ||
| 44 | (namespaces['html'], 'div'), | ||
| 45 | (namespaces['html'], 'dl'), | ||
| 46 | (namespaces['html'], 'dt'), | ||
| 47 | (namespaces['html'], 'em'), | ||
| 48 | (namespaces['html'], 'event-source'), | ||
| 49 | (namespaces['html'], 'fieldset'), | ||
| 50 | (namespaces['html'], 'figcaption'), | ||
| 51 | (namespaces['html'], 'figure'), | ||
| 52 | (namespaces['html'], 'footer'), | ||
| 53 | (namespaces['html'], 'font'), | ||
| 54 | (namespaces['html'], 'form'), | ||
| 55 | (namespaces['html'], 'header'), | ||
| 56 | (namespaces['html'], 'h1'), | ||
| 57 | (namespaces['html'], 'h2'), | ||
| 58 | (namespaces['html'], 'h3'), | ||
| 59 | (namespaces['html'], 'h4'), | ||
| 60 | (namespaces['html'], 'h5'), | ||
| 61 | (namespaces['html'], 'h6'), | ||
| 62 | (namespaces['html'], 'hr'), | ||
| 63 | (namespaces['html'], 'i'), | ||
| 64 | (namespaces['html'], 'img'), | ||
| 65 | (namespaces['html'], 'input'), | ||
| 66 | (namespaces['html'], 'ins'), | ||
| 67 | (namespaces['html'], 'keygen'), | ||
| 68 | (namespaces['html'], 'kbd'), | ||
| 69 | (namespaces['html'], 'label'), | ||
| 70 | (namespaces['html'], 'legend'), | ||
| 71 | (namespaces['html'], 'li'), | ||
| 72 | (namespaces['html'], 'm'), | ||
| 73 | (namespaces['html'], 'map'), | ||
| 74 | (namespaces['html'], 'menu'), | ||
| 75 | (namespaces['html'], 'meter'), | ||
| 76 | (namespaces['html'], 'multicol'), | ||
| 77 | (namespaces['html'], 'nav'), | ||
| 78 | (namespaces['html'], 'nextid'), | ||
| 79 | (namespaces['html'], 'ol'), | ||
| 80 | (namespaces['html'], 'output'), | ||
| 81 | (namespaces['html'], 'optgroup'), | ||
| 82 | (namespaces['html'], 'option'), | ||
| 83 | (namespaces['html'], 'p'), | ||
| 84 | (namespaces['html'], 'pre'), | ||
| 85 | (namespaces['html'], 'progress'), | ||
| 86 | (namespaces['html'], 'q'), | ||
| 87 | (namespaces['html'], 's'), | ||
| 88 | (namespaces['html'], 'samp'), | ||
| 89 | (namespaces['html'], 'section'), | ||
| 90 | (namespaces['html'], 'select'), | ||
| 91 | (namespaces['html'], 'small'), | ||
| 92 | (namespaces['html'], 'sound'), | ||
| 93 | (namespaces['html'], 'source'), | ||
| 94 | (namespaces['html'], 'spacer'), | ||
| 95 | (namespaces['html'], 'span'), | ||
| 96 | (namespaces['html'], 'strike'), | ||
| 97 | (namespaces['html'], 'strong'), | ||
| 98 | (namespaces['html'], 'sub'), | ||
| 99 | (namespaces['html'], 'sup'), | ||
| 100 | (namespaces['html'], 'table'), | ||
| 101 | (namespaces['html'], 'tbody'), | ||
| 102 | (namespaces['html'], 'td'), | ||
| 103 | (namespaces['html'], 'textarea'), | ||
| 104 | (namespaces['html'], 'time'), | ||
| 105 | (namespaces['html'], 'tfoot'), | ||
| 106 | (namespaces['html'], 'th'), | ||
| 107 | (namespaces['html'], 'thead'), | ||
| 108 | (namespaces['html'], 'tr'), | ||
| 109 | (namespaces['html'], 'tt'), | ||
| 110 | (namespaces['html'], 'u'), | ||
| 111 | (namespaces['html'], 'ul'), | ||
| 112 | (namespaces['html'], 'var'), | ||
| 113 | (namespaces['html'], 'video'), | ||
| 114 | (namespaces['mathml'], 'maction'), | ||
| 115 | (namespaces['mathml'], 'math'), | ||
| 116 | (namespaces['mathml'], 'merror'), | ||
| 117 | (namespaces['mathml'], 'mfrac'), | ||
| 118 | (namespaces['mathml'], 'mi'), | ||
| 119 | (namespaces['mathml'], 'mmultiscripts'), | ||
| 120 | (namespaces['mathml'], 'mn'), | ||
| 121 | (namespaces['mathml'], 'mo'), | ||
| 122 | (namespaces['mathml'], 'mover'), | ||
| 123 | (namespaces['mathml'], 'mpadded'), | ||
| 124 | (namespaces['mathml'], 'mphantom'), | ||
| 125 | (namespaces['mathml'], 'mprescripts'), | ||
| 126 | (namespaces['mathml'], 'mroot'), | ||
| 127 | (namespaces['mathml'], 'mrow'), | ||
| 128 | (namespaces['mathml'], 'mspace'), | ||
| 129 | (namespaces['mathml'], 'msqrt'), | ||
| 130 | (namespaces['mathml'], 'mstyle'), | ||
| 131 | (namespaces['mathml'], 'msub'), | ||
| 132 | (namespaces['mathml'], 'msubsup'), | ||
| 133 | (namespaces['mathml'], 'msup'), | ||
| 134 | (namespaces['mathml'], 'mtable'), | ||
| 135 | (namespaces['mathml'], 'mtd'), | ||
| 136 | (namespaces['mathml'], 'mtext'), | ||
| 137 | (namespaces['mathml'], 'mtr'), | ||
| 138 | (namespaces['mathml'], 'munder'), | ||
| 139 | (namespaces['mathml'], 'munderover'), | ||
| 140 | (namespaces['mathml'], 'none'), | ||
| 141 | (namespaces['svg'], 'a'), | ||
| 142 | (namespaces['svg'], 'animate'), | ||
| 143 | (namespaces['svg'], 'animateColor'), | ||
| 144 | (namespaces['svg'], 'animateMotion'), | ||
| 145 | (namespaces['svg'], 'animateTransform'), | ||
| 146 | (namespaces['svg'], 'clipPath'), | ||
| 147 | (namespaces['svg'], 'circle'), | ||
| 148 | (namespaces['svg'], 'defs'), | ||
| 149 | (namespaces['svg'], 'desc'), | ||
| 150 | (namespaces['svg'], 'ellipse'), | ||
| 151 | (namespaces['svg'], 'font-face'), | ||
| 152 | (namespaces['svg'], 'font-face-name'), | ||
| 153 | (namespaces['svg'], 'font-face-src'), | ||
| 154 | (namespaces['svg'], 'g'), | ||
| 155 | (namespaces['svg'], 'glyph'), | ||
| 156 | (namespaces['svg'], 'hkern'), | ||
| 157 | (namespaces['svg'], 'linearGradient'), | ||
| 158 | (namespaces['svg'], 'line'), | ||
| 159 | (namespaces['svg'], 'marker'), | ||
| 160 | (namespaces['svg'], 'metadata'), | ||
| 161 | (namespaces['svg'], 'missing-glyph'), | ||
| 162 | (namespaces['svg'], 'mpath'), | ||
| 163 | (namespaces['svg'], 'path'), | ||
| 164 | (namespaces['svg'], 'polygon'), | ||
| 165 | (namespaces['svg'], 'polyline'), | ||
| 166 | (namespaces['svg'], 'radialGradient'), | ||
| 167 | (namespaces['svg'], 'rect'), | ||
| 168 | (namespaces['svg'], 'set'), | ||
| 169 | (namespaces['svg'], 'stop'), | ||
| 170 | (namespaces['svg'], 'svg'), | ||
| 171 | (namespaces['svg'], 'switch'), | ||
| 172 | (namespaces['svg'], 'text'), | ||
| 173 | (namespaces['svg'], 'title'), | ||
| 174 | (namespaces['svg'], 'tspan'), | ||
| 175 | (namespaces['svg'], 'use'), | ||
| 176 | )) | ||
| 177 | |||
| 178 | allowed_attributes = frozenset(( | ||
| 179 | # HTML attributes | ||
| 180 | (None, 'abbr'), | ||
| 181 | (None, 'accept'), | ||
| 182 | (None, 'accept-charset'), | ||
| 183 | (None, 'accesskey'), | ||
| 184 | (None, 'action'), | ||
| 185 | (None, 'align'), | ||
| 186 | (None, 'alt'), | ||
| 187 | (None, 'autocomplete'), | ||
| 188 | (None, 'autofocus'), | ||
| 189 | (None, 'axis'), | ||
| 190 | (None, 'background'), | ||
| 191 | (None, 'balance'), | ||
| 192 | (None, 'bgcolor'), | ||
| 193 | (None, 'bgproperties'), | ||
| 194 | (None, 'border'), | ||
| 195 | (None, 'bordercolor'), | ||
| 196 | (None, 'bordercolordark'), | ||
| 197 | (None, 'bordercolorlight'), | ||
| 198 | (None, 'bottompadding'), | ||
| 199 | (None, 'cellpadding'), | ||
| 200 | (None, 'cellspacing'), | ||
| 201 | (None, 'ch'), | ||
| 202 | (None, 'challenge'), | ||
| 203 | (None, 'char'), | ||
| 204 | (None, 'charoff'), | ||
| 205 | (None, 'choff'), | ||
| 206 | (None, 'charset'), | ||
| 207 | (None, 'checked'), | ||
| 208 | (None, 'cite'), | ||
| 209 | (None, 'class'), | ||
| 210 | (None, 'clear'), | ||
| 211 | (None, 'color'), | ||
| 212 | (None, 'cols'), | ||
| 213 | (None, 'colspan'), | ||
| 214 | (None, 'compact'), | ||
| 215 | (None, 'contenteditable'), | ||
| 216 | (None, 'controls'), | ||
| 217 | (None, 'coords'), | ||
| 218 | (None, 'data'), | ||
| 219 | (None, 'datafld'), | ||
| 220 | (None, 'datapagesize'), | ||
| 221 | (None, 'datasrc'), | ||
| 222 | (None, 'datetime'), | ||
| 223 | (None, 'default'), | ||
| 224 | (None, 'delay'), | ||
| 225 | (None, 'dir'), | ||
| 226 | (None, 'disabled'), | ||
| 227 | (None, 'draggable'), | ||
| 228 | (None, 'dynsrc'), | ||
| 229 | (None, 'enctype'), | ||
| 230 | (None, 'end'), | ||
| 231 | (None, 'face'), | ||
| 232 | (None, 'for'), | ||
| 233 | (None, 'form'), | ||
| 234 | (None, 'frame'), | ||
| 235 | (None, 'galleryimg'), | ||
| 236 | (None, 'gutter'), | ||
| 237 | (None, 'headers'), | ||
| 238 | (None, 'height'), | ||
| 239 | (None, 'hidefocus'), | ||
| 240 | (None, 'hidden'), | ||
| 241 | (None, 'high'), | ||
| 242 | (None, 'href'), | ||
| 243 | (None, 'hreflang'), | ||
| 244 | (None, 'hspace'), | ||
| 245 | (None, 'icon'), | ||
| 246 | (None, 'id'), | ||
| 247 | (None, 'inputmode'), | ||
| 248 | (None, 'ismap'), | ||
| 249 | (None, 'keytype'), | ||
| 250 | (None, 'label'), | ||
| 251 | (None, 'leftspacing'), | ||
| 252 | (None, 'lang'), | ||
| 253 | (None, 'list'), | ||
| 254 | (None, 'longdesc'), | ||
| 255 | (None, 'loop'), | ||
| 256 | (None, 'loopcount'), | ||
| 257 | (None, 'loopend'), | ||
| 258 | (None, 'loopstart'), | ||
| 259 | (None, 'low'), | ||
| 260 | (None, 'lowsrc'), | ||
| 261 | (None, 'max'), | ||
| 262 | (None, 'maxlength'), | ||
| 263 | (None, 'media'), | ||
| 264 | (None, 'method'), | ||
| 265 | (None, 'min'), | ||
| 266 | (None, 'multiple'), | ||
| 267 | (None, 'name'), | ||
| 268 | (None, 'nohref'), | ||
| 269 | (None, 'noshade'), | ||
| 270 | (None, 'nowrap'), | ||
| 271 | (None, 'open'), | ||
| 272 | (None, 'optimum'), | ||
| 273 | (None, 'pattern'), | ||
| 274 | (None, 'ping'), | ||
| 275 | (None, 'point-size'), | ||
| 276 | (None, 'poster'), | ||
| 277 | (None, 'pqg'), | ||
| 278 | (None, 'preload'), | ||
| 279 | (None, 'prompt'), | ||
| 280 | (None, 'radiogroup'), | ||
| 281 | (None, 'readonly'), | ||
| 282 | (None, 'rel'), | ||
| 283 | (None, 'repeat-max'), | ||
| 284 | (None, 'repeat-min'), | ||
| 285 | (None, 'replace'), | ||
| 286 | (None, 'required'), | ||
| 287 | (None, 'rev'), | ||
| 288 | (None, 'rightspacing'), | ||
| 289 | (None, 'rows'), | ||
| 290 | (None, 'rowspan'), | ||
| 291 | (None, 'rules'), | ||
| 292 | (None, 'scope'), | ||
| 293 | (None, 'selected'), | ||
| 294 | (None, 'shape'), | ||
| 295 | (None, 'size'), | ||
| 296 | (None, 'span'), | ||
| 297 | (None, 'src'), | ||
| 298 | (None, 'start'), | ||
| 299 | (None, 'step'), | ||
| 300 | (None, 'style'), | ||
| 301 | (None, 'summary'), | ||
| 302 | (None, 'suppress'), | ||
| 303 | (None, 'tabindex'), | ||
| 304 | (None, 'target'), | ||
| 305 | (None, 'template'), | ||
| 306 | (None, 'title'), | ||
| 307 | (None, 'toppadding'), | ||
| 308 | (None, 'type'), | ||
| 309 | (None, 'unselectable'), | ||
| 310 | (None, 'usemap'), | ||
| 311 | (None, 'urn'), | ||
| 312 | (None, 'valign'), | ||
| 313 | (None, 'value'), | ||
| 314 | (None, 'variable'), | ||
| 315 | (None, 'volume'), | ||
| 316 | (None, 'vspace'), | ||
| 317 | (None, 'vrml'), | ||
| 318 | (None, 'width'), | ||
| 319 | (None, 'wrap'), | ||
| 320 | (namespaces['xml'], 'lang'), | ||
| 321 | # MathML attributes | ||
| 322 | (None, 'actiontype'), | ||
| 323 | (None, 'align'), | ||
| 324 | (None, 'columnalign'), | ||
| 325 | (None, 'columnalign'), | ||
| 326 | (None, 'columnalign'), | ||
| 327 | (None, 'columnlines'), | ||
| 328 | (None, 'columnspacing'), | ||
| 329 | (None, 'columnspan'), | ||
| 330 | (None, 'depth'), | ||
| 331 | (None, 'display'), | ||
| 332 | (None, 'displaystyle'), | ||
| 333 | (None, 'equalcolumns'), | ||
| 334 | (None, 'equalrows'), | ||
| 335 | (None, 'fence'), | ||
| 336 | (None, 'fontstyle'), | ||
| 337 | (None, 'fontweight'), | ||
| 338 | (None, 'frame'), | ||
| 339 | (None, 'height'), | ||
| 340 | (None, 'linethickness'), | ||
| 341 | (None, 'lspace'), | ||
| 342 | (None, 'mathbackground'), | ||
| 343 | (None, 'mathcolor'), | ||
| 344 | (None, 'mathvariant'), | ||
| 345 | (None, 'mathvariant'), | ||
| 346 | (None, 'maxsize'), | ||
| 347 | (None, 'minsize'), | ||
| 348 | (None, 'other'), | ||
| 349 | (None, 'rowalign'), | ||
| 350 | (None, 'rowalign'), | ||
| 351 | (None, 'rowalign'), | ||
| 352 | (None, 'rowlines'), | ||
| 353 | (None, 'rowspacing'), | ||
| 354 | (None, 'rowspan'), | ||
| 355 | (None, 'rspace'), | ||
| 356 | (None, 'scriptlevel'), | ||
| 357 | (None, 'selection'), | ||
| 358 | (None, 'separator'), | ||
| 359 | (None, 'stretchy'), | ||
| 360 | (None, 'width'), | ||
| 361 | (None, 'width'), | ||
| 362 | (namespaces['xlink'], 'href'), | ||
| 363 | (namespaces['xlink'], 'show'), | ||
| 364 | (namespaces['xlink'], 'type'), | ||
| 365 | # SVG attributes | ||
| 366 | (None, 'accent-height'), | ||
| 367 | (None, 'accumulate'), | ||
| 368 | (None, 'additive'), | ||
| 369 | (None, 'alphabetic'), | ||
| 370 | (None, 'arabic-form'), | ||
| 371 | (None, 'ascent'), | ||
| 372 | (None, 'attributeName'), | ||
| 373 | (None, 'attributeType'), | ||
| 374 | (None, 'baseProfile'), | ||
| 375 | (None, 'bbox'), | ||
| 376 | (None, 'begin'), | ||
| 377 | (None, 'by'), | ||
| 378 | (None, 'calcMode'), | ||
| 379 | (None, 'cap-height'), | ||
| 380 | (None, 'class'), | ||
| 381 | (None, 'clip-path'), | ||
| 382 | (None, 'color'), | ||
| 383 | (None, 'color-rendering'), | ||
| 384 | (None, 'content'), | ||
| 385 | (None, 'cx'), | ||
| 386 | (None, 'cy'), | ||
| 387 | (None, 'd'), | ||
| 388 | (None, 'dx'), | ||
| 389 | (None, 'dy'), | ||
| 390 | (None, 'descent'), | ||
| 391 | (None, 'display'), | ||
| 392 | (None, 'dur'), | ||
| 393 | (None, 'end'), | ||
| 394 | (None, 'fill'), | ||
| 395 | (None, 'fill-opacity'), | ||
| 396 | (None, 'fill-rule'), | ||
| 397 | (None, 'font-family'), | ||
| 398 | (None, 'font-size'), | ||
| 399 | (None, 'font-stretch'), | ||
| 400 | (None, 'font-style'), | ||
| 401 | (None, 'font-variant'), | ||
| 402 | (None, 'font-weight'), | ||
| 403 | (None, 'from'), | ||
| 404 | (None, 'fx'), | ||
| 405 | (None, 'fy'), | ||
| 406 | (None, 'g1'), | ||
| 407 | (None, 'g2'), | ||
| 408 | (None, 'glyph-name'), | ||
| 409 | (None, 'gradientUnits'), | ||
| 410 | (None, 'hanging'), | ||
| 411 | (None, 'height'), | ||
| 412 | (None, 'horiz-adv-x'), | ||
| 413 | (None, 'horiz-origin-x'), | ||
| 414 | (None, 'id'), | ||
| 415 | (None, 'ideographic'), | ||
| 416 | (None, 'k'), | ||
| 417 | (None, 'keyPoints'), | ||
| 418 | (None, 'keySplines'), | ||
| 419 | (None, 'keyTimes'), | ||
| 420 | (None, 'lang'), | ||
| 421 | (None, 'marker-end'), | ||
| 422 | (None, 'marker-mid'), | ||
| 423 | (None, 'marker-start'), | ||
| 424 | (None, 'markerHeight'), | ||
| 425 | (None, 'markerUnits'), | ||
| 426 | (None, 'markerWidth'), | ||
| 427 | (None, 'mathematical'), | ||
| 428 | (None, 'max'), | ||
| 429 | (None, 'min'), | ||
| 430 | (None, 'name'), | ||
| 431 | (None, 'offset'), | ||
| 432 | (None, 'opacity'), | ||
| 433 | (None, 'orient'), | ||
| 434 | (None, 'origin'), | ||
| 435 | (None, 'overline-position'), | ||
| 436 | (None, 'overline-thickness'), | ||
| 437 | (None, 'panose-1'), | ||
| 438 | (None, 'path'), | ||
| 439 | (None, 'pathLength'), | ||
| 440 | (None, 'points'), | ||
| 441 | (None, 'preserveAspectRatio'), | ||
| 442 | (None, 'r'), | ||
| 443 | (None, 'refX'), | ||
| 444 | (None, 'refY'), | ||
| 445 | (None, 'repeatCount'), | ||
| 446 | (None, 'repeatDur'), | ||
| 447 | (None, 'requiredExtensions'), | ||
| 448 | (None, 'requiredFeatures'), | ||
| 449 | (None, 'restart'), | ||
| 450 | (None, 'rotate'), | ||
| 451 | (None, 'rx'), | ||
| 452 | (None, 'ry'), | ||
| 453 | (None, 'slope'), | ||
| 454 | (None, 'stemh'), | ||
| 455 | (None, 'stemv'), | ||
| 456 | (None, 'stop-color'), | ||
| 457 | (None, 'stop-opacity'), | ||
| 458 | (None, 'strikethrough-position'), | ||
| 459 | (None, 'strikethrough-thickness'), | ||
| 460 | (None, 'stroke'), | ||
| 461 | (None, 'stroke-dasharray'), | ||
| 462 | (None, 'stroke-dashoffset'), | ||
| 463 | (None, 'stroke-linecap'), | ||
| 464 | (None, 'stroke-linejoin'), | ||
| 465 | (None, 'stroke-miterlimit'), | ||
| 466 | (None, 'stroke-opacity'), | ||
| 467 | (None, 'stroke-width'), | ||
| 468 | (None, 'systemLanguage'), | ||
| 469 | (None, 'target'), | ||
| 470 | (None, 'text-anchor'), | ||
| 471 | (None, 'to'), | ||
| 472 | (None, 'transform'), | ||
| 473 | (None, 'type'), | ||
| 474 | (None, 'u1'), | ||
| 475 | (None, 'u2'), | ||
| 476 | (None, 'underline-position'), | ||
| 477 | (None, 'underline-thickness'), | ||
| 478 | (None, 'unicode'), | ||
| 479 | (None, 'unicode-range'), | ||
| 480 | (None, 'units-per-em'), | ||
| 481 | (None, 'values'), | ||
| 482 | (None, 'version'), | ||
| 483 | (None, 'viewBox'), | ||
| 484 | (None, 'visibility'), | ||
| 485 | (None, 'width'), | ||
| 486 | (None, 'widths'), | ||
| 487 | (None, 'x'), | ||
| 488 | (None, 'x-height'), | ||
| 489 | (None, 'x1'), | ||
| 490 | (None, 'x2'), | ||
| 491 | (namespaces['xlink'], 'actuate'), | ||
| 492 | (namespaces['xlink'], 'arcrole'), | ||
| 493 | (namespaces['xlink'], 'href'), | ||
| 494 | (namespaces['xlink'], 'role'), | ||
| 495 | (namespaces['xlink'], 'show'), | ||
| 496 | (namespaces['xlink'], 'title'), | ||
| 497 | (namespaces['xlink'], 'type'), | ||
| 498 | (namespaces['xml'], 'base'), | ||
| 499 | (namespaces['xml'], 'lang'), | ||
| 500 | (namespaces['xml'], 'space'), | ||
| 501 | (None, 'y'), | ||
| 502 | (None, 'y1'), | ||
| 503 | (None, 'y2'), | ||
| 504 | (None, 'zoomAndPan'), | ||
| 505 | )) | ||
| 506 | |||
| 507 | attr_val_is_uri = frozenset(( | ||
| 508 | (None, 'href'), | ||
| 509 | (None, 'src'), | ||
| 510 | (None, 'cite'), | ||
| 511 | (None, 'action'), | ||
| 512 | (None, 'longdesc'), | ||
| 513 | (None, 'poster'), | ||
| 514 | (None, 'background'), | ||
| 515 | (None, 'datasrc'), | ||
| 516 | (None, 'dynsrc'), | ||
| 517 | (None, 'lowsrc'), | ||
| 518 | (None, 'ping'), | ||
| 519 | (namespaces['xlink'], 'href'), | ||
| 520 | (namespaces['xml'], 'base'), | ||
| 521 | )) | ||
| 522 | |||
| 523 | svg_attr_val_allows_ref = frozenset(( | ||
| 524 | (None, 'clip-path'), | ||
| 525 | (None, 'color-profile'), | ||
| 526 | (None, 'cursor'), | ||
| 527 | (None, 'fill'), | ||
| 528 | (None, 'filter'), | ||
| 529 | (None, 'marker'), | ||
| 530 | (None, 'marker-start'), | ||
| 531 | (None, 'marker-mid'), | ||
| 532 | (None, 'marker-end'), | ||
| 533 | (None, 'mask'), | ||
| 534 | (None, 'stroke'), | ||
| 535 | )) | ||
| 536 | |||
| 537 | svg_allow_local_href = frozenset(( | ||
| 538 | (None, 'altGlyph'), | ||
| 539 | (None, 'animate'), | ||
| 540 | (None, 'animateColor'), | ||
| 541 | (None, 'animateMotion'), | ||
| 542 | (None, 'animateTransform'), | ||
| 543 | (None, 'cursor'), | ||
| 544 | (None, 'feImage'), | ||
| 545 | (None, 'filter'), | ||
| 546 | (None, 'linearGradient'), | ||
| 547 | (None, 'pattern'), | ||
| 548 | (None, 'radialGradient'), | ||
| 549 | (None, 'textpath'), | ||
| 550 | (None, 'tref'), | ||
| 551 | (None, 'set'), | ||
| 552 | (None, 'use') | ||
| 553 | )) | ||
| 554 | |||
| 555 | allowed_css_properties = frozenset(( | ||
| 556 | 'azimuth', | ||
| 557 | 'background-color', | ||
| 558 | 'border-bottom-color', | ||
| 559 | 'border-collapse', | ||
| 560 | 'border-color', | ||
| 561 | 'border-left-color', | ||
| 562 | 'border-right-color', | ||
| 563 | 'border-top-color', | ||
| 564 | 'clear', | ||
| 565 | 'color', | ||
| 566 | 'cursor', | ||
| 567 | 'direction', | ||
| 568 | 'display', | ||
| 569 | 'elevation', | ||
| 570 | 'float', | ||
| 571 | 'font', | ||
| 572 | 'font-family', | ||
| 573 | 'font-size', | ||
| 574 | 'font-style', | ||
| 575 | 'font-variant', | ||
| 576 | 'font-weight', | ||
| 577 | 'height', | ||
| 578 | 'letter-spacing', | ||
| 579 | 'line-height', | ||
| 580 | 'overflow', | ||
| 581 | 'pause', | ||
| 582 | 'pause-after', | ||
| 583 | 'pause-before', | ||
| 584 | 'pitch', | ||
| 585 | 'pitch-range', | ||
| 586 | 'richness', | ||
| 587 | 'speak', | ||
| 588 | 'speak-header', | ||
| 589 | 'speak-numeral', | ||
| 590 | 'speak-punctuation', | ||
| 591 | 'speech-rate', | ||
| 592 | 'stress', | ||
| 593 | 'text-align', | ||
| 594 | 'text-decoration', | ||
| 595 | 'text-indent', | ||
| 596 | 'unicode-bidi', | ||
| 597 | 'vertical-align', | ||
| 598 | 'voice-family', | ||
| 599 | 'volume', | ||
| 600 | 'white-space', | ||
| 601 | 'width', | ||
| 602 | )) | ||
| 603 | |||
| 604 | allowed_css_keywords = frozenset(( | ||
| 605 | 'auto', | ||
| 606 | 'aqua', | ||
| 607 | 'black', | ||
| 608 | 'block', | ||
| 609 | 'blue', | ||
| 610 | 'bold', | ||
| 611 | 'both', | ||
| 612 | 'bottom', | ||
| 613 | 'brown', | ||
| 614 | 'center', | ||
| 615 | 'collapse', | ||
| 616 | 'dashed', | ||
| 617 | 'dotted', | ||
| 618 | 'fuchsia', | ||
| 619 | 'gray', | ||
| 620 | 'green', | ||
| 621 | '!important', | ||
| 622 | 'italic', | ||
| 623 | 'left', | ||
| 624 | 'lime', | ||
| 625 | 'maroon', | ||
| 626 | 'medium', | ||
| 627 | 'none', | ||
| 628 | 'navy', | ||
| 629 | 'normal', | ||
| 630 | 'nowrap', | ||
| 631 | 'olive', | ||
| 632 | 'pointer', | ||
| 633 | 'purple', | ||
| 634 | 'red', | ||
| 635 | 'right', | ||
| 636 | 'solid', | ||
| 637 | 'silver', | ||
| 638 | 'teal', | ||
| 639 | 'top', | ||
| 640 | 'transparent', | ||
| 641 | 'underline', | ||
| 642 | 'white', | ||
| 643 | 'yellow', | ||
| 644 | )) | ||
| 645 | |||
| 646 | allowed_svg_properties = frozenset(( | ||
| 647 | 'fill', | ||
| 648 | 'fill-opacity', | ||
| 649 | 'fill-rule', | ||
| 650 | 'stroke', | ||
| 651 | 'stroke-width', | ||
| 652 | 'stroke-linecap', | ||
| 653 | 'stroke-linejoin', | ||
| 654 | 'stroke-opacity', | ||
| 655 | )) | ||
| 656 | |||
| 657 | allowed_protocols = frozenset(( | ||
| 658 | 'ed2k', | ||
| 659 | 'ftp', | ||
| 660 | 'http', | ||
| 661 | 'https', | ||
| 662 | 'irc', | ||
| 663 | 'mailto', | ||
| 664 | 'news', | ||
| 665 | 'gopher', | ||
| 666 | 'nntp', | ||
| 667 | 'telnet', | ||
| 668 | 'webcal', | ||
| 669 | 'xmpp', | ||
| 670 | 'callto', | ||
| 671 | 'feed', | ||
| 672 | 'urn', | ||
| 673 | 'aim', | ||
| 674 | 'rsync', | ||
| 675 | 'tag', | ||
| 676 | 'ssh', | ||
| 677 | 'sftp', | ||
| 678 | 'rtsp', | ||
| 679 | 'afs', | ||
| 680 | 'data', | ||
| 681 | )) | ||
| 682 | |||
| 683 | allowed_content_types = frozenset(( | ||
| 684 | 'image/png', | ||
| 685 | 'image/jpeg', | ||
| 686 | 'image/gif', | ||
| 687 | 'image/webp', | ||
| 688 | 'image/bmp', | ||
| 689 | 'text/plain', | ||
| 690 | )) | ||
| 691 | |||
| 692 | |||
| 693 | data_content_type = re.compile(r''' | ||
| 694 | ^ | ||
| 695 | # Match a content type <application>/<type> | ||
| 696 | (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) | ||
| 697 | # Match any character set and encoding | ||
| 698 | (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) | ||
| 699 | |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) | ||
| 700 | # Assume the rest is data | ||
| 701 | ,.* | ||
| 702 | $ | ||
| 703 | ''', | ||
| 704 | re.VERBOSE) | ||
| 705 | |||
| 706 | |||
| 707 | class Filter(base.Filter): | ||
| 708 | """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes""" | ||
| 709 | def __init__(self, | ||
| 710 | source, | ||
| 711 | allowed_elements=allowed_elements, | ||
| 712 | allowed_attributes=allowed_attributes, | ||
| 713 | allowed_css_properties=allowed_css_properties, | ||
| 714 | allowed_css_keywords=allowed_css_keywords, | ||
| 715 | allowed_svg_properties=allowed_svg_properties, | ||
| 716 | allowed_protocols=allowed_protocols, | ||
| 717 | allowed_content_types=allowed_content_types, | ||
| 718 | attr_val_is_uri=attr_val_is_uri, | ||
| 719 | svg_attr_val_allows_ref=svg_attr_val_allows_ref, | ||
| 720 | svg_allow_local_href=svg_allow_local_href): | ||
| 721 | """Creates a Filter | ||
| 722 | |||
| 723 | :arg allowed_elements: set of elements to allow--everything else will | ||
| 724 | be escaped | ||
| 725 | |||
| 726 | :arg allowed_attributes: set of attributes to allow in | ||
| 727 | elements--everything else will be stripped | ||
| 728 | |||
| 729 | :arg allowed_css_properties: set of CSS properties to allow--everything | ||
| 730 | else will be stripped | ||
| 731 | |||
| 732 | :arg allowed_css_keywords: set of CSS keywords to allow--everything | ||
| 733 | else will be stripped | ||
| 734 | |||
| 735 | :arg allowed_svg_properties: set of SVG properties to allow--everything | ||
| 736 | else will be removed | ||
| 737 | |||
| 738 | :arg allowed_protocols: set of allowed protocols for URIs | ||
| 739 | |||
| 740 | :arg allowed_content_types: set of allowed content types for ``data`` URIs. | ||
| 741 | |||
| 742 | :arg attr_val_is_uri: set of attributes that have URI values--values | ||
| 743 | that have a scheme not listed in ``allowed_protocols`` are removed | ||
| 744 | |||
| 745 | :arg svg_attr_val_allows_ref: set of SVG attributes that can have | ||
| 746 | references | ||
| 747 | |||
| 748 | :arg svg_allow_local_href: set of SVG elements that can have local | ||
| 749 | hrefs--these are removed | ||
| 750 | |||
| 751 | """ | ||
| 752 | super(Filter, self).__init__(source) | ||
| 753 | self.allowed_elements = allowed_elements | ||
| 754 | self.allowed_attributes = allowed_attributes | ||
| 755 | self.allowed_css_properties = allowed_css_properties | ||
| 756 | self.allowed_css_keywords = allowed_css_keywords | ||
| 757 | self.allowed_svg_properties = allowed_svg_properties | ||
| 758 | self.allowed_protocols = allowed_protocols | ||
| 759 | self.allowed_content_types = allowed_content_types | ||
| 760 | self.attr_val_is_uri = attr_val_is_uri | ||
| 761 | self.svg_attr_val_allows_ref = svg_attr_val_allows_ref | ||
| 762 | self.svg_allow_local_href = svg_allow_local_href | ||
| 763 | |||
| 764 | def __iter__(self): | ||
| 765 | for token in base.Filter.__iter__(self): | ||
| 766 | token = self.sanitize_token(token) | ||
| 767 | if token: | ||
| 768 | yield token | ||
| 769 | |||
| 770 | # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and | ||
| 771 | # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes | ||
| 772 | # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and | ||
| 773 | # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI | ||
| 774 | # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are | ||
| 775 | # allowed. | ||
| 776 | # | ||
| 777 | # sanitize_html('<script> do_nasty_stuff() </script>') | ||
| 778 | # => <script> do_nasty_stuff() </script> | ||
| 779 | # sanitize_html('<a href="javascript: sucker();">Click here for $100</a>') | ||
| 780 | # => <a>Click here for $100</a> | ||
| 781 | def sanitize_token(self, token): | ||
| 782 | |||
| 783 | # accommodate filters which use token_type differently | ||
| 784 | token_type = token["type"] | ||
| 785 | if token_type in ("StartTag", "EndTag", "EmptyTag"): | ||
| 786 | name = token["name"] | ||
| 787 | namespace = token["namespace"] | ||
| 788 | if ((namespace, name) in self.allowed_elements or | ||
| 789 | (namespace is None and | ||
| 790 | (namespaces["html"], name) in self.allowed_elements)): | ||
| 791 | return self.allowed_token(token) | ||
| 792 | else: | ||
| 793 | return self.disallowed_token(token) | ||
| 794 | elif token_type == "Comment": | ||
| 795 | pass | ||
| 796 | else: | ||
| 797 | return token | ||
| 798 | |||
| 799 | def allowed_token(self, token): | ||
| 800 | if "data" in token: | ||
| 801 | attrs = token["data"] | ||
| 802 | attr_names = set(attrs.keys()) | ||
| 803 | |||
| 804 | # Remove forbidden attributes | ||
| 805 | for to_remove in (attr_names - self.allowed_attributes): | ||
| 806 | del token["data"][to_remove] | ||
| 807 | attr_names.remove(to_remove) | ||
| 808 | |||
| 809 | # Remove attributes with disallowed URL values | ||
| 810 | for attr in (attr_names & self.attr_val_is_uri): | ||
| 811 | assert attr in attrs | ||
| 812 | # I don't have a clue where this regexp comes from or why it matches those | ||
| 813 | # characters, nor why we call unescape. I just know it's always been here. | ||
| 814 | # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all | ||
| 815 | # this will do is remove *more* than it otherwise would. | ||
| 816 | val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '', | ||
| 817 | unescape(attrs[attr])).lower() | ||
| 818 | # remove replacement characters from unescaped characters | ||
| 819 | val_unescaped = val_unescaped.replace("\ufffd", "") | ||
| 820 | try: | ||
| 821 | uri = urlparse.urlparse(val_unescaped) | ||
| 822 | except ValueError: | ||
| 823 | uri = None | ||
| 824 | del attrs[attr] | ||
| 825 | if uri and uri.scheme: | ||
| 826 | if uri.scheme not in self.allowed_protocols: | ||
| 827 | del attrs[attr] | ||
| 828 | if uri.scheme == 'data': | ||
| 829 | m = data_content_type.match(uri.path) | ||
| 830 | if not m: | ||
| 831 | del attrs[attr] | ||
| 832 | elif m.group('content_type') not in self.allowed_content_types: | ||
| 833 | del attrs[attr] | ||
| 834 | |||
| 835 | for attr in self.svg_attr_val_allows_ref: | ||
| 836 | if attr in attrs: | ||
| 837 | attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', | ||
| 838 | ' ', | ||
| 839 | unescape(attrs[attr])) | ||
| 840 | if (token["name"] in self.svg_allow_local_href and | ||
| 841 | (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*', | ||
| 842 | attrs[(namespaces['xlink'], 'href')])): | ||
| 843 | del attrs[(namespaces['xlink'], 'href')] | ||
| 844 | if (None, 'style') in attrs: | ||
| 845 | attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')]) | ||
| 846 | token["data"] = attrs | ||
| 847 | return token | ||
| 848 | |||
| 849 | def disallowed_token(self, token): | ||
| 850 | token_type = token["type"] | ||
| 851 | if token_type == "EndTag": | ||
| 852 | token["data"] = "</%s>" % token["name"] | ||
| 853 | elif token["data"]: | ||
| 854 | assert token_type in ("StartTag", "EmptyTag") | ||
| 855 | attrs = [] | ||
| 856 | for (ns, name), v in token["data"].items(): | ||
| 857 | attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v))) | ||
| 858 | token["data"] = "<%s%s>" % (token["name"], ''.join(attrs)) | ||
| 859 | else: | ||
| 860 | token["data"] = "<%s>" % token["name"] | ||
| 861 | if token.get("selfClosing"): | ||
| 862 | token["data"] = token["data"][:-1] + "/>" | ||
| 863 | |||
| 864 | token["type"] = "Characters" | ||
| 865 | |||
| 866 | del token["name"] | ||
| 867 | return token | ||
| 868 | |||
| 869 | def sanitize_css(self, style): | ||
| 870 | # disallow urls | ||
| 871 | style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) | ||
| 872 | |||
| 873 | # gauntlet | ||
| 874 | if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): | ||
| 875 | return '' | ||
| 876 | if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): | ||
| 877 | return '' | ||
| 878 | |||
| 879 | clean = [] | ||
| 880 | for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style): | ||
| 881 | if not value: | ||
| 882 | continue | ||
| 883 | if prop.lower() in self.allowed_css_properties: | ||
| 884 | clean.append(prop + ': ' + value + ';') | ||
| 885 | elif prop.split('-')[0].lower() in ['background', 'border', 'margin', | ||
| 886 | 'padding']: | ||
| 887 | for keyword in value.split(): | ||
| 888 | if keyword not in self.allowed_css_keywords and \ | ||
| 889 | not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa | ||
| 890 | break | ||
| 891 | else: | ||
| 892 | clean.append(prop + ': ' + value + ';') | ||
| 893 | elif prop.lower() in self.allowed_svg_properties: | ||
| 894 | clean.append(prop + ': ' + value + ';') | ||
| 895 | |||
| 896 | return ' '.join(clean) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/whitespace.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/whitespace.py new file mode 100644 index 0000000..24bb0de --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/whitespace.py | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | import re | ||
| 4 | |||
| 5 | from . import base | ||
| 6 | from ..constants import rcdataElements, spaceCharacters | ||
| 7 | spaceCharacters = "".join(spaceCharacters) | ||
| 8 | |||
| 9 | SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) | ||
| 10 | |||
| 11 | |||
| 12 | class Filter(base.Filter): | ||
| 13 | """Collapses whitespace except in pre, textarea, and script elements""" | ||
| 14 | spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) | ||
| 15 | |||
| 16 | def __iter__(self): | ||
| 17 | preserve = 0 | ||
| 18 | for token in base.Filter.__iter__(self): | ||
| 19 | type = token["type"] | ||
| 20 | if type == "StartTag" \ | ||
| 21 | and (preserve or token["name"] in self.spacePreserveElements): | ||
| 22 | preserve += 1 | ||
| 23 | |||
| 24 | elif type == "EndTag" and preserve: | ||
| 25 | preserve -= 1 | ||
| 26 | |||
| 27 | elif not preserve and type == "SpaceCharacters" and token["data"]: | ||
| 28 | # Test on token["data"] above to not introduce spaces where there were not | ||
| 29 | token["data"] = " " | ||
| 30 | |||
| 31 | elif not preserve and type == "Characters": | ||
| 32 | token["data"] = collapse_spaces(token["data"]) | ||
| 33 | |||
| 34 | yield token | ||
| 35 | |||
| 36 | |||
| 37 | def collapse_spaces(text): | ||
| 38 | return SPACES_REGEX.sub(' ', text) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/html5parser.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/html5parser.py new file mode 100644 index 0000000..b185971 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/html5parser.py | |||
| @@ -0,0 +1,2791 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | from pip._vendor.six import with_metaclass, viewkeys | ||
| 3 | |||
| 4 | import types | ||
| 5 | from collections import OrderedDict | ||
| 6 | |||
| 7 | from . import _inputstream | ||
| 8 | from . import _tokenizer | ||
| 9 | |||
| 10 | from . import treebuilders | ||
| 11 | from .treebuilders.base import Marker | ||
| 12 | |||
| 13 | from . import _utils | ||
| 14 | from .constants import ( | ||
| 15 | spaceCharacters, asciiUpper2Lower, | ||
| 16 | specialElements, headingElements, cdataElements, rcdataElements, | ||
| 17 | tokenTypes, tagTokenTypes, | ||
| 18 | namespaces, | ||
| 19 | htmlIntegrationPointElements, mathmlTextIntegrationPointElements, | ||
| 20 | adjustForeignAttributes as adjustForeignAttributesMap, | ||
| 21 | adjustMathMLAttributes, adjustSVGAttributes, | ||
| 22 | E, | ||
| 23 | _ReparseException | ||
| 24 | ) | ||
| 25 | |||
| 26 | |||
| 27 | def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): | ||
| 28 | """Parse an HTML document as a string or file-like object into a tree | ||
| 29 | |||
| 30 | :arg doc: the document to parse as a string or file-like object | ||
| 31 | |||
| 32 | :arg treebuilder: the treebuilder to use when parsing | ||
| 33 | |||
| 34 | :arg namespaceHTMLElements: whether or not to namespace HTML elements | ||
| 35 | |||
| 36 | :returns: parsed tree | ||
| 37 | |||
| 38 | Example: | ||
| 39 | |||
| 40 | >>> from html5lib.html5parser import parse | ||
| 41 | >>> parse('<html><body><p>This is a doc</p></body></html>') | ||
| 42 | <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0> | ||
| 43 | |||
| 44 | """ | ||
| 45 | tb = treebuilders.getTreeBuilder(treebuilder) | ||
| 46 | p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) | ||
| 47 | return p.parse(doc, **kwargs) | ||
| 48 | |||
| 49 | |||
| 50 | def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): | ||
| 51 | """Parse an HTML fragment as a string or file-like object into a tree | ||
| 52 | |||
| 53 | :arg doc: the fragment to parse as a string or file-like object | ||
| 54 | |||
| 55 | :arg container: the container context to parse the fragment in | ||
| 56 | |||
| 57 | :arg treebuilder: the treebuilder to use when parsing | ||
| 58 | |||
| 59 | :arg namespaceHTMLElements: whether or not to namespace HTML elements | ||
| 60 | |||
| 61 | :returns: parsed tree | ||
| 62 | |||
| 63 | Example: | ||
| 64 | |||
| 65 | >>> from html5lib.html5libparser import parseFragment | ||
| 66 | >>> parseFragment('<b>this is a fragment</b>') | ||
| 67 | <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090> | ||
| 68 | |||
| 69 | """ | ||
| 70 | tb = treebuilders.getTreeBuilder(treebuilder) | ||
| 71 | p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) | ||
| 72 | return p.parseFragment(doc, container=container, **kwargs) | ||
| 73 | |||
| 74 | |||
| 75 | def method_decorator_metaclass(function): | ||
| 76 | class Decorated(type): | ||
| 77 | def __new__(meta, classname, bases, classDict): | ||
| 78 | for attributeName, attribute in classDict.items(): | ||
| 79 | if isinstance(attribute, types.FunctionType): | ||
| 80 | attribute = function(attribute) | ||
| 81 | |||
| 82 | classDict[attributeName] = attribute | ||
| 83 | return type.__new__(meta, classname, bases, classDict) | ||
| 84 | return Decorated | ||
| 85 | |||
| 86 | |||
| 87 | class HTMLParser(object): | ||
| 88 | """HTML parser | ||
| 89 | |||
| 90 | Generates a tree structure from a stream of (possibly malformed) HTML. | ||
| 91 | |||
| 92 | """ | ||
| 93 | |||
| 94 | def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): | ||
| 95 | """ | ||
| 96 | :arg tree: a treebuilder class controlling the type of tree that will be | ||
| 97 | returned. Built in treebuilders can be accessed through | ||
| 98 | html5lib.treebuilders.getTreeBuilder(treeType) | ||
| 99 | |||
| 100 | :arg strict: raise an exception when a parse error is encountered | ||
| 101 | |||
| 102 | :arg namespaceHTMLElements: whether or not to namespace HTML elements | ||
| 103 | |||
| 104 | :arg debug: whether or not to enable debug mode which logs things | ||
| 105 | |||
| 106 | Example: | ||
| 107 | |||
| 108 | >>> from html5lib.html5parser import HTMLParser | ||
| 109 | >>> parser = HTMLParser() # generates parser with etree builder | ||
| 110 | >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict | ||
| 111 | |||
| 112 | """ | ||
| 113 | |||
| 114 | # Raise an exception on the first error encountered | ||
| 115 | self.strict = strict | ||
| 116 | |||
| 117 | if tree is None: | ||
| 118 | tree = treebuilders.getTreeBuilder("etree") | ||
| 119 | self.tree = tree(namespaceHTMLElements) | ||
| 120 | self.errors = [] | ||
| 121 | |||
| 122 | self.phases = dict([(name, cls(self, self.tree)) for name, cls in | ||
| 123 | getPhases(debug).items()]) | ||
| 124 | |||
| 125 | def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): | ||
| 126 | |||
| 127 | self.innerHTMLMode = innerHTML | ||
| 128 | self.container = container | ||
| 129 | self.scripting = scripting | ||
| 130 | self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) | ||
| 131 | self.reset() | ||
| 132 | |||
| 133 | try: | ||
| 134 | self.mainLoop() | ||
| 135 | except _ReparseException: | ||
| 136 | self.reset() | ||
| 137 | self.mainLoop() | ||
| 138 | |||
| 139 | def reset(self): | ||
| 140 | self.tree.reset() | ||
| 141 | self.firstStartTag = False | ||
| 142 | self.errors = [] | ||
| 143 | self.log = [] # only used with debug mode | ||
| 144 | # "quirks" / "limited quirks" / "no quirks" | ||
| 145 | self.compatMode = "no quirks" | ||
| 146 | |||
| 147 | if self.innerHTMLMode: | ||
| 148 | self.innerHTML = self.container.lower() | ||
| 149 | |||
| 150 | if self.innerHTML in cdataElements: | ||
| 151 | self.tokenizer.state = self.tokenizer.rcdataState | ||
| 152 | elif self.innerHTML in rcdataElements: | ||
| 153 | self.tokenizer.state = self.tokenizer.rawtextState | ||
| 154 | elif self.innerHTML == 'plaintext': | ||
| 155 | self.tokenizer.state = self.tokenizer.plaintextState | ||
| 156 | else: | ||
| 157 | # state already is data state | ||
| 158 | # self.tokenizer.state = self.tokenizer.dataState | ||
| 159 | pass | ||
| 160 | self.phase = self.phases["beforeHtml"] | ||
| 161 | self.phase.insertHtmlElement() | ||
| 162 | self.resetInsertionMode() | ||
| 163 | else: | ||
| 164 | self.innerHTML = False # pylint:disable=redefined-variable-type | ||
| 165 | self.phase = self.phases["initial"] | ||
| 166 | |||
| 167 | self.lastPhase = None | ||
| 168 | |||
| 169 | self.beforeRCDataPhase = None | ||
| 170 | |||
| 171 | self.framesetOK = True | ||
| 172 | |||
| 173 | @property | ||
| 174 | def documentEncoding(self): | ||
| 175 | """Name of the character encoding that was used to decode the input stream, or | ||
| 176 | :obj:`None` if that is not determined yet | ||
| 177 | |||
| 178 | """ | ||
| 179 | if not hasattr(self, 'tokenizer'): | ||
| 180 | return None | ||
| 181 | return self.tokenizer.stream.charEncoding[0].name | ||
| 182 | |||
| 183 | def isHTMLIntegrationPoint(self, element): | ||
| 184 | if (element.name == "annotation-xml" and | ||
| 185 | element.namespace == namespaces["mathml"]): | ||
| 186 | return ("encoding" in element.attributes and | ||
| 187 | element.attributes["encoding"].translate( | ||
| 188 | asciiUpper2Lower) in | ||
| 189 | ("text/html", "application/xhtml+xml")) | ||
| 190 | else: | ||
| 191 | return (element.namespace, element.name) in htmlIntegrationPointElements | ||
| 192 | |||
| 193 | def isMathMLTextIntegrationPoint(self, element): | ||
| 194 | return (element.namespace, element.name) in mathmlTextIntegrationPointElements | ||
| 195 | |||
| 196 | def mainLoop(self): | ||
| 197 | CharactersToken = tokenTypes["Characters"] | ||
| 198 | SpaceCharactersToken = tokenTypes["SpaceCharacters"] | ||
| 199 | StartTagToken = tokenTypes["StartTag"] | ||
| 200 | EndTagToken = tokenTypes["EndTag"] | ||
| 201 | CommentToken = tokenTypes["Comment"] | ||
| 202 | DoctypeToken = tokenTypes["Doctype"] | ||
| 203 | ParseErrorToken = tokenTypes["ParseError"] | ||
| 204 | |||
| 205 | for token in self.normalizedTokens(): | ||
| 206 | prev_token = None | ||
| 207 | new_token = token | ||
| 208 | while new_token is not None: | ||
| 209 | prev_token = new_token | ||
| 210 | currentNode = self.tree.openElements[-1] if self.tree.openElements else None | ||
| 211 | currentNodeNamespace = currentNode.namespace if currentNode else None | ||
| 212 | currentNodeName = currentNode.name if currentNode else None | ||
| 213 | |||
| 214 | type = new_token["type"] | ||
| 215 | |||
| 216 | if type == ParseErrorToken: | ||
| 217 | self.parseError(new_token["data"], new_token.get("datavars", {})) | ||
| 218 | new_token = None | ||
| 219 | else: | ||
| 220 | if (len(self.tree.openElements) == 0 or | ||
| 221 | currentNodeNamespace == self.tree.defaultNamespace or | ||
| 222 | (self.isMathMLTextIntegrationPoint(currentNode) and | ||
| 223 | ((type == StartTagToken and | ||
| 224 | token["name"] not in frozenset(["mglyph", "malignmark"])) or | ||
| 225 | type in (CharactersToken, SpaceCharactersToken))) or | ||
| 226 | (currentNodeNamespace == namespaces["mathml"] and | ||
| 227 | currentNodeName == "annotation-xml" and | ||
| 228 | type == StartTagToken and | ||
| 229 | token["name"] == "svg") or | ||
| 230 | (self.isHTMLIntegrationPoint(currentNode) and | ||
| 231 | type in (StartTagToken, CharactersToken, SpaceCharactersToken))): | ||
| 232 | phase = self.phase | ||
| 233 | else: | ||
| 234 | phase = self.phases["inForeignContent"] | ||
| 235 | |||
| 236 | if type == CharactersToken: | ||
| 237 | new_token = phase.processCharacters(new_token) | ||
| 238 | elif type == SpaceCharactersToken: | ||
| 239 | new_token = phase.processSpaceCharacters(new_token) | ||
| 240 | elif type == StartTagToken: | ||
| 241 | new_token = phase.processStartTag(new_token) | ||
| 242 | elif type == EndTagToken: | ||
| 243 | new_token = phase.processEndTag(new_token) | ||
| 244 | elif type == CommentToken: | ||
| 245 | new_token = phase.processComment(new_token) | ||
| 246 | elif type == DoctypeToken: | ||
| 247 | new_token = phase.processDoctype(new_token) | ||
| 248 | |||
| 249 | if (type == StartTagToken and prev_token["selfClosing"] and | ||
| 250 | not prev_token["selfClosingAcknowledged"]): | ||
| 251 | self.parseError("non-void-element-with-trailing-solidus", | ||
| 252 | {"name": prev_token["name"]}) | ||
| 253 | |||
| 254 | # When the loop finishes it's EOF | ||
| 255 | reprocess = True | ||
| 256 | phases = [] | ||
| 257 | while reprocess: | ||
| 258 | phases.append(self.phase) | ||
| 259 | reprocess = self.phase.processEOF() | ||
| 260 | if reprocess: | ||
| 261 | assert self.phase not in phases | ||
| 262 | |||
| 263 | def normalizedTokens(self): | ||
| 264 | for token in self.tokenizer: | ||
| 265 | yield self.normalizeToken(token) | ||
| 266 | |||
| 267 | def parse(self, stream, *args, **kwargs): | ||
| 268 | """Parse a HTML document into a well-formed tree | ||
| 269 | |||
| 270 | :arg stream: a file-like object or string containing the HTML to be parsed | ||
| 271 | |||
| 272 | The optional encoding parameter must be a string that indicates | ||
| 273 | the encoding. If specified, that encoding will be used, | ||
| 274 | regardless of any BOM or later declaration (such as in a meta | ||
| 275 | element). | ||
| 276 | |||
| 277 | :arg scripting: treat noscript elements as if JavaScript was turned on | ||
| 278 | |||
| 279 | :returns: parsed tree | ||
| 280 | |||
| 281 | Example: | ||
| 282 | |||
| 283 | >>> from html5lib.html5parser import HTMLParser | ||
| 284 | >>> parser = HTMLParser() | ||
| 285 | >>> parser.parse('<html><body><p>This is a doc</p></body></html>') | ||
| 286 | <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0> | ||
| 287 | |||
| 288 | """ | ||
| 289 | self._parse(stream, False, None, *args, **kwargs) | ||
| 290 | return self.tree.getDocument() | ||
| 291 | |||
| 292 | def parseFragment(self, stream, *args, **kwargs): | ||
| 293 | """Parse a HTML fragment into a well-formed tree fragment | ||
| 294 | |||
| 295 | :arg container: name of the element we're setting the innerHTML | ||
| 296 | property if set to None, default to 'div' | ||
| 297 | |||
| 298 | :arg stream: a file-like object or string containing the HTML to be parsed | ||
| 299 | |||
| 300 | The optional encoding parameter must be a string that indicates | ||
| 301 | the encoding. If specified, that encoding will be used, | ||
| 302 | regardless of any BOM or later declaration (such as in a meta | ||
| 303 | element) | ||
| 304 | |||
| 305 | :arg scripting: treat noscript elements as if JavaScript was turned on | ||
| 306 | |||
| 307 | :returns: parsed tree | ||
| 308 | |||
| 309 | Example: | ||
| 310 | |||
| 311 | >>> from html5lib.html5libparser import HTMLParser | ||
| 312 | >>> parser = HTMLParser() | ||
| 313 | >>> parser.parseFragment('<b>this is a fragment</b>') | ||
| 314 | <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090> | ||
| 315 | |||
| 316 | """ | ||
| 317 | self._parse(stream, True, *args, **kwargs) | ||
| 318 | return self.tree.getFragment() | ||
| 319 | |||
| 320 | def parseError(self, errorcode="XXX-undefined-error", datavars=None): | ||
| 321 | # XXX The idea is to make errorcode mandatory. | ||
| 322 | if datavars is None: | ||
| 323 | datavars = {} | ||
| 324 | self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) | ||
| 325 | if self.strict: | ||
| 326 | raise ParseError(E[errorcode] % datavars) | ||
| 327 | |||
| 328 | def normalizeToken(self, token): | ||
| 329 | # HTML5 specific normalizations to the token stream | ||
| 330 | if token["type"] == tokenTypes["StartTag"]: | ||
| 331 | raw = token["data"] | ||
| 332 | token["data"] = OrderedDict(raw) | ||
| 333 | if len(raw) > len(token["data"]): | ||
| 334 | # we had some duplicated attribute, fix so first wins | ||
| 335 | token["data"].update(raw[::-1]) | ||
| 336 | |||
| 337 | return token | ||
| 338 | |||
| 339 | def adjustMathMLAttributes(self, token): | ||
| 340 | adjust_attributes(token, adjustMathMLAttributes) | ||
| 341 | |||
| 342 | def adjustSVGAttributes(self, token): | ||
| 343 | adjust_attributes(token, adjustSVGAttributes) | ||
| 344 | |||
| 345 | def adjustForeignAttributes(self, token): | ||
| 346 | adjust_attributes(token, adjustForeignAttributesMap) | ||
| 347 | |||
| 348 | def reparseTokenNormal(self, token): | ||
| 349 | # pylint:disable=unused-argument | ||
| 350 | self.parser.phase() | ||
| 351 | |||
| 352 | def resetInsertionMode(self): | ||
| 353 | # The name of this method is mostly historical. (It's also used in the | ||
| 354 | # specification.) | ||
| 355 | last = False | ||
| 356 | newModes = { | ||
| 357 | "select": "inSelect", | ||
| 358 | "td": "inCell", | ||
| 359 | "th": "inCell", | ||
| 360 | "tr": "inRow", | ||
| 361 | "tbody": "inTableBody", | ||
| 362 | "thead": "inTableBody", | ||
| 363 | "tfoot": "inTableBody", | ||
| 364 | "caption": "inCaption", | ||
| 365 | "colgroup": "inColumnGroup", | ||
| 366 | "table": "inTable", | ||
| 367 | "head": "inBody", | ||
| 368 | "body": "inBody", | ||
| 369 | "frameset": "inFrameset", | ||
| 370 | "html": "beforeHead" | ||
| 371 | } | ||
| 372 | for node in self.tree.openElements[::-1]: | ||
| 373 | nodeName = node.name | ||
| 374 | new_phase = None | ||
| 375 | if node == self.tree.openElements[0]: | ||
| 376 | assert self.innerHTML | ||
| 377 | last = True | ||
| 378 | nodeName = self.innerHTML | ||
| 379 | # Check for conditions that should only happen in the innerHTML | ||
| 380 | # case | ||
| 381 | if nodeName in ("select", "colgroup", "head", "html"): | ||
| 382 | assert self.innerHTML | ||
| 383 | |||
| 384 | if not last and node.namespace != self.tree.defaultNamespace: | ||
| 385 | continue | ||
| 386 | |||
| 387 | if nodeName in newModes: | ||
| 388 | new_phase = self.phases[newModes[nodeName]] | ||
| 389 | break | ||
| 390 | elif last: | ||
| 391 | new_phase = self.phases["inBody"] | ||
| 392 | break | ||
| 393 | |||
| 394 | self.phase = new_phase | ||
| 395 | |||
| 396 | def parseRCDataRawtext(self, token, contentType): | ||
| 397 | # Generic RCDATA/RAWTEXT Parsing algorithm | ||
| 398 | assert contentType in ("RAWTEXT", "RCDATA") | ||
| 399 | |||
| 400 | self.tree.insertElement(token) | ||
| 401 | |||
| 402 | if contentType == "RAWTEXT": | ||
| 403 | self.tokenizer.state = self.tokenizer.rawtextState | ||
| 404 | else: | ||
| 405 | self.tokenizer.state = self.tokenizer.rcdataState | ||
| 406 | |||
| 407 | self.originalPhase = self.phase | ||
| 408 | |||
| 409 | self.phase = self.phases["text"] | ||
| 410 | |||
| 411 | |||
| 412 | @_utils.memoize | ||
| 413 | def getPhases(debug): | ||
| 414 | def log(function): | ||
| 415 | """Logger that records which phase processes each token""" | ||
| 416 | type_names = dict((value, key) for key, value in | ||
| 417 | tokenTypes.items()) | ||
| 418 | |||
| 419 | def wrapped(self, *args, **kwargs): | ||
| 420 | if function.__name__.startswith("process") and len(args) > 0: | ||
| 421 | token = args[0] | ||
| 422 | try: | ||
| 423 | info = {"type": type_names[token['type']]} | ||
| 424 | except: | ||
| 425 | raise | ||
| 426 | if token['type'] in tagTokenTypes: | ||
| 427 | info["name"] = token['name'] | ||
| 428 | |||
| 429 | self.parser.log.append((self.parser.tokenizer.state.__name__, | ||
| 430 | self.parser.phase.__class__.__name__, | ||
| 431 | self.__class__.__name__, | ||
| 432 | function.__name__, | ||
| 433 | info)) | ||
| 434 | return function(self, *args, **kwargs) | ||
| 435 | else: | ||
| 436 | return function(self, *args, **kwargs) | ||
| 437 | return wrapped | ||
| 438 | |||
| 439 | def getMetaclass(use_metaclass, metaclass_func): | ||
| 440 | if use_metaclass: | ||
| 441 | return method_decorator_metaclass(metaclass_func) | ||
| 442 | else: | ||
| 443 | return type | ||
| 444 | |||
| 445 | # pylint:disable=unused-argument | ||
| 446 | class Phase(with_metaclass(getMetaclass(debug, log))): | ||
| 447 | """Base class for helper object that implements each phase of processing | ||
| 448 | """ | ||
| 449 | |||
| 450 | def __init__(self, parser, tree): | ||
| 451 | self.parser = parser | ||
| 452 | self.tree = tree | ||
| 453 | |||
| 454 | def processEOF(self): | ||
| 455 | raise NotImplementedError | ||
| 456 | |||
| 457 | def processComment(self, token): | ||
| 458 | # For most phases the following is correct. Where it's not it will be | ||
| 459 | # overridden. | ||
| 460 | self.tree.insertComment(token, self.tree.openElements[-1]) | ||
| 461 | |||
| 462 | def processDoctype(self, token): | ||
| 463 | self.parser.parseError("unexpected-doctype") | ||
| 464 | |||
| 465 | def processCharacters(self, token): | ||
| 466 | self.tree.insertText(token["data"]) | ||
| 467 | |||
| 468 | def processSpaceCharacters(self, token): | ||
| 469 | self.tree.insertText(token["data"]) | ||
| 470 | |||
| 471 | def processStartTag(self, token): | ||
| 472 | return self.startTagHandler[token["name"]](token) | ||
| 473 | |||
| 474 | def startTagHtml(self, token): | ||
| 475 | if not self.parser.firstStartTag and token["name"] == "html": | ||
| 476 | self.parser.parseError("non-html-root") | ||
| 477 | # XXX Need a check here to see if the first start tag token emitted is | ||
| 478 | # this token... If it's not, invoke self.parser.parseError(). | ||
| 479 | for attr, value in token["data"].items(): | ||
| 480 | if attr not in self.tree.openElements[0].attributes: | ||
| 481 | self.tree.openElements[0].attributes[attr] = value | ||
| 482 | self.parser.firstStartTag = False | ||
| 483 | |||
| 484 | def processEndTag(self, token): | ||
| 485 | return self.endTagHandler[token["name"]](token) | ||
| 486 | |||
| 487 | class InitialPhase(Phase): | ||
| 488 | def processSpaceCharacters(self, token): | ||
| 489 | pass | ||
| 490 | |||
| 491 | def processComment(self, token): | ||
| 492 | self.tree.insertComment(token, self.tree.document) | ||
| 493 | |||
| 494 | def processDoctype(self, token): | ||
| 495 | name = token["name"] | ||
| 496 | publicId = token["publicId"] | ||
| 497 | systemId = token["systemId"] | ||
| 498 | correct = token["correct"] | ||
| 499 | |||
| 500 | if (name != "html" or publicId is not None or | ||
| 501 | systemId is not None and systemId != "about:legacy-compat"): | ||
| 502 | self.parser.parseError("unknown-doctype") | ||
| 503 | |||
| 504 | if publicId is None: | ||
| 505 | publicId = "" | ||
| 506 | |||
| 507 | self.tree.insertDoctype(token) | ||
| 508 | |||
| 509 | if publicId != "": | ||
| 510 | publicId = publicId.translate(asciiUpper2Lower) | ||
| 511 | |||
| 512 | if (not correct or token["name"] != "html" or | ||
| 513 | publicId.startswith( | ||
| 514 | ("+//silmaril//dtd html pro v0r11 19970101//", | ||
| 515 | "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", | ||
| 516 | "-//as//dtd html 3.0 aswedit + extensions//", | ||
| 517 | "-//ietf//dtd html 2.0 level 1//", | ||
| 518 | "-//ietf//dtd html 2.0 level 2//", | ||
| 519 | "-//ietf//dtd html 2.0 strict level 1//", | ||
| 520 | "-//ietf//dtd html 2.0 strict level 2//", | ||
| 521 | "-//ietf//dtd html 2.0 strict//", | ||
| 522 | "-//ietf//dtd html 2.0//", | ||
| 523 | "-//ietf//dtd html 2.1e//", | ||
| 524 | "-//ietf//dtd html 3.0//", | ||
| 525 | "-//ietf//dtd html 3.2 final//", | ||
| 526 | "-//ietf//dtd html 3.2//", | ||
| 527 | "-//ietf//dtd html 3//", | ||
| 528 | "-//ietf//dtd html level 0//", | ||
| 529 | "-//ietf//dtd html level 1//", | ||
| 530 | "-//ietf//dtd html level 2//", | ||
| 531 | "-//ietf//dtd html level 3//", | ||
| 532 | "-//ietf//dtd html strict level 0//", | ||
| 533 | "-//ietf//dtd html strict level 1//", | ||
| 534 | "-//ietf//dtd html strict level 2//", | ||
| 535 | "-//ietf//dtd html strict level 3//", | ||
| 536 | "-//ietf//dtd html strict//", | ||
| 537 | "-//ietf//dtd html//", | ||
| 538 | "-//metrius//dtd metrius presentational//", | ||
| 539 | "-//microsoft//dtd internet explorer 2.0 html strict//", | ||
| 540 | "-//microsoft//dtd internet explorer 2.0 html//", | ||
| 541 | "-//microsoft//dtd internet explorer 2.0 tables//", | ||
| 542 | "-//microsoft//dtd internet explorer 3.0 html strict//", | ||
| 543 | "-//microsoft//dtd internet explorer 3.0 html//", | ||
| 544 | "-//microsoft//dtd internet explorer 3.0 tables//", | ||
| 545 | "-//netscape comm. corp.//dtd html//", | ||
| 546 | "-//netscape comm. corp.//dtd strict html//", | ||
| 547 | "-//o'reilly and associates//dtd html 2.0//", | ||
| 548 | "-//o'reilly and associates//dtd html extended 1.0//", | ||
| 549 | "-//o'reilly and associates//dtd html extended relaxed 1.0//", | ||
| 550 | "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", | ||
| 551 | "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", | ||
| 552 | "-//spyglass//dtd html 2.0 extended//", | ||
| 553 | "-//sq//dtd html 2.0 hotmetal + extensions//", | ||
| 554 | "-//sun microsystems corp.//dtd hotjava html//", | ||
| 555 | "-//sun microsystems corp.//dtd hotjava strict html//", | ||
| 556 | "-//w3c//dtd html 3 1995-03-24//", | ||
| 557 | "-//w3c//dtd html 3.2 draft//", | ||
| 558 | "-//w3c//dtd html 3.2 final//", | ||
| 559 | "-//w3c//dtd html 3.2//", | ||
| 560 | "-//w3c//dtd html 3.2s draft//", | ||
| 561 | "-//w3c//dtd html 4.0 frameset//", | ||
| 562 | "-//w3c//dtd html 4.0 transitional//", | ||
| 563 | "-//w3c//dtd html experimental 19960712//", | ||
| 564 | "-//w3c//dtd html experimental 970421//", | ||
| 565 | "-//w3c//dtd w3 html//", | ||
| 566 | "-//w3o//dtd w3 html 3.0//", | ||
| 567 | "-//webtechs//dtd mozilla html 2.0//", | ||
| 568 | "-//webtechs//dtd mozilla html//")) or | ||
| 569 | publicId in ("-//w3o//dtd w3 html strict 3.0//en//", | ||
| 570 | "-/w3c/dtd html 4.0 transitional/en", | ||
| 571 | "html") or | ||
| 572 | publicId.startswith( | ||
| 573 | ("-//w3c//dtd html 4.01 frameset//", | ||
| 574 | "-//w3c//dtd html 4.01 transitional//")) and | ||
| 575 | systemId is None or | ||
| 576 | systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): | ||
| 577 | self.parser.compatMode = "quirks" | ||
| 578 | elif (publicId.startswith( | ||
| 579 | ("-//w3c//dtd xhtml 1.0 frameset//", | ||
| 580 | "-//w3c//dtd xhtml 1.0 transitional//")) or | ||
| 581 | publicId.startswith( | ||
| 582 | ("-//w3c//dtd html 4.01 frameset//", | ||
| 583 | "-//w3c//dtd html 4.01 transitional//")) and | ||
| 584 | systemId is not None): | ||
| 585 | self.parser.compatMode = "limited quirks" | ||
| 586 | |||
| 587 | self.parser.phase = self.parser.phases["beforeHtml"] | ||
| 588 | |||
| 589 | def anythingElse(self): | ||
| 590 | self.parser.compatMode = "quirks" | ||
| 591 | self.parser.phase = self.parser.phases["beforeHtml"] | ||
| 592 | |||
| 593 | def processCharacters(self, token): | ||
| 594 | self.parser.parseError("expected-doctype-but-got-chars") | ||
| 595 | self.anythingElse() | ||
| 596 | return token | ||
| 597 | |||
| 598 | def processStartTag(self, token): | ||
| 599 | self.parser.parseError("expected-doctype-but-got-start-tag", | ||
| 600 | {"name": token["name"]}) | ||
| 601 | self.anythingElse() | ||
| 602 | return token | ||
| 603 | |||
| 604 | def processEndTag(self, token): | ||
| 605 | self.parser.parseError("expected-doctype-but-got-end-tag", | ||
| 606 | {"name": token["name"]}) | ||
| 607 | self.anythingElse() | ||
| 608 | return token | ||
| 609 | |||
| 610 | def processEOF(self): | ||
| 611 | self.parser.parseError("expected-doctype-but-got-eof") | ||
| 612 | self.anythingElse() | ||
| 613 | return True | ||
| 614 | |||
| 615 | class BeforeHtmlPhase(Phase): | ||
| 616 | # helper methods | ||
| 617 | def insertHtmlElement(self): | ||
| 618 | self.tree.insertRoot(impliedTagToken("html", "StartTag")) | ||
| 619 | self.parser.phase = self.parser.phases["beforeHead"] | ||
| 620 | |||
| 621 | # other | ||
| 622 | def processEOF(self): | ||
| 623 | self.insertHtmlElement() | ||
| 624 | return True | ||
| 625 | |||
| 626 | def processComment(self, token): | ||
| 627 | self.tree.insertComment(token, self.tree.document) | ||
| 628 | |||
| 629 | def processSpaceCharacters(self, token): | ||
| 630 | pass | ||
| 631 | |||
| 632 | def processCharacters(self, token): | ||
| 633 | self.insertHtmlElement() | ||
| 634 | return token | ||
| 635 | |||
| 636 | def processStartTag(self, token): | ||
| 637 | if token["name"] == "html": | ||
| 638 | self.parser.firstStartTag = True | ||
| 639 | self.insertHtmlElement() | ||
| 640 | return token | ||
| 641 | |||
| 642 | def processEndTag(self, token): | ||
| 643 | if token["name"] not in ("head", "body", "html", "br"): | ||
| 644 | self.parser.parseError("unexpected-end-tag-before-html", | ||
| 645 | {"name": token["name"]}) | ||
| 646 | else: | ||
| 647 | self.insertHtmlElement() | ||
| 648 | return token | ||
| 649 | |||
| 650 | class BeforeHeadPhase(Phase): | ||
| 651 | def __init__(self, parser, tree): | ||
| 652 | Phase.__init__(self, parser, tree) | ||
| 653 | |||
| 654 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 655 | ("html", self.startTagHtml), | ||
| 656 | ("head", self.startTagHead) | ||
| 657 | ]) | ||
| 658 | self.startTagHandler.default = self.startTagOther | ||
| 659 | |||
| 660 | self.endTagHandler = _utils.MethodDispatcher([ | ||
| 661 | (("head", "body", "html", "br"), self.endTagImplyHead) | ||
| 662 | ]) | ||
| 663 | self.endTagHandler.default = self.endTagOther | ||
| 664 | |||
| 665 | def processEOF(self): | ||
| 666 | self.startTagHead(impliedTagToken("head", "StartTag")) | ||
| 667 | return True | ||
| 668 | |||
| 669 | def processSpaceCharacters(self, token): | ||
| 670 | pass | ||
| 671 | |||
| 672 | def processCharacters(self, token): | ||
| 673 | self.startTagHead(impliedTagToken("head", "StartTag")) | ||
| 674 | return token | ||
| 675 | |||
| 676 | def startTagHtml(self, token): | ||
| 677 | return self.parser.phases["inBody"].processStartTag(token) | ||
| 678 | |||
| 679 | def startTagHead(self, token): | ||
| 680 | self.tree.insertElement(token) | ||
| 681 | self.tree.headPointer = self.tree.openElements[-1] | ||
| 682 | self.parser.phase = self.parser.phases["inHead"] | ||
| 683 | |||
| 684 | def startTagOther(self, token): | ||
| 685 | self.startTagHead(impliedTagToken("head", "StartTag")) | ||
| 686 | return token | ||
| 687 | |||
| 688 | def endTagImplyHead(self, token): | ||
| 689 | self.startTagHead(impliedTagToken("head", "StartTag")) | ||
| 690 | return token | ||
| 691 | |||
| 692 | def endTagOther(self, token): | ||
| 693 | self.parser.parseError("end-tag-after-implied-root", | ||
| 694 | {"name": token["name"]}) | ||
| 695 | |||
| 696 | class InHeadPhase(Phase): | ||
| 697 | def __init__(self, parser, tree): | ||
| 698 | Phase.__init__(self, parser, tree) | ||
| 699 | |||
| 700 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 701 | ("html", self.startTagHtml), | ||
| 702 | ("title", self.startTagTitle), | ||
| 703 | (("noframes", "style"), self.startTagNoFramesStyle), | ||
| 704 | ("noscript", self.startTagNoscript), | ||
| 705 | ("script", self.startTagScript), | ||
| 706 | (("base", "basefont", "bgsound", "command", "link"), | ||
| 707 | self.startTagBaseLinkCommand), | ||
| 708 | ("meta", self.startTagMeta), | ||
| 709 | ("head", self.startTagHead) | ||
| 710 | ]) | ||
| 711 | self.startTagHandler.default = self.startTagOther | ||
| 712 | |||
| 713 | self.endTagHandler = _utils.MethodDispatcher([ | ||
| 714 | ("head", self.endTagHead), | ||
| 715 | (("br", "html", "body"), self.endTagHtmlBodyBr) | ||
| 716 | ]) | ||
| 717 | self.endTagHandler.default = self.endTagOther | ||
| 718 | |||
| 719 | # the real thing | ||
| 720 | def processEOF(self): | ||
| 721 | self.anythingElse() | ||
| 722 | return True | ||
| 723 | |||
| 724 | def processCharacters(self, token): | ||
| 725 | self.anythingElse() | ||
| 726 | return token | ||
| 727 | |||
| 728 | def startTagHtml(self, token): | ||
| 729 | return self.parser.phases["inBody"].processStartTag(token) | ||
| 730 | |||
| 731 | def startTagHead(self, token): | ||
| 732 | self.parser.parseError("two-heads-are-not-better-than-one") | ||
| 733 | |||
| 734 | def startTagBaseLinkCommand(self, token): | ||
| 735 | self.tree.insertElement(token) | ||
| 736 | self.tree.openElements.pop() | ||
| 737 | token["selfClosingAcknowledged"] = True | ||
| 738 | |||
| 739 | def startTagMeta(self, token): | ||
| 740 | self.tree.insertElement(token) | ||
| 741 | self.tree.openElements.pop() | ||
| 742 | token["selfClosingAcknowledged"] = True | ||
| 743 | |||
| 744 | attributes = token["data"] | ||
| 745 | if self.parser.tokenizer.stream.charEncoding[1] == "tentative": | ||
| 746 | if "charset" in attributes: | ||
| 747 | self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) | ||
| 748 | elif ("content" in attributes and | ||
| 749 | "http-equiv" in attributes and | ||
| 750 | attributes["http-equiv"].lower() == "content-type"): | ||
| 751 | # Encoding it as UTF-8 here is a hack, as really we should pass | ||
| 752 | # the abstract Unicode string, and just use the | ||
| 753 | # ContentAttrParser on that, but using UTF-8 allows all chars | ||
| 754 | # to be encoded and as a ASCII-superset works. | ||
| 755 | data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) | ||
| 756 | parser = _inputstream.ContentAttrParser(data) | ||
| 757 | codec = parser.parse() | ||
| 758 | self.parser.tokenizer.stream.changeEncoding(codec) | ||
| 759 | |||
| 760 | def startTagTitle(self, token): | ||
| 761 | self.parser.parseRCDataRawtext(token, "RCDATA") | ||
| 762 | |||
| 763 | def startTagNoFramesStyle(self, token): | ||
| 764 | # Need to decide whether to implement the scripting-disabled case | ||
| 765 | self.parser.parseRCDataRawtext(token, "RAWTEXT") | ||
| 766 | |||
| 767 | def startTagNoscript(self, token): | ||
| 768 | if self.parser.scripting: | ||
| 769 | self.parser.parseRCDataRawtext(token, "RAWTEXT") | ||
| 770 | else: | ||
| 771 | self.tree.insertElement(token) | ||
| 772 | self.parser.phase = self.parser.phases["inHeadNoscript"] | ||
| 773 | |||
| 774 | def startTagScript(self, token): | ||
| 775 | self.tree.insertElement(token) | ||
| 776 | self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState | ||
| 777 | self.parser.originalPhase = self.parser.phase | ||
| 778 | self.parser.phase = self.parser.phases["text"] | ||
| 779 | |||
| 780 | def startTagOther(self, token): | ||
| 781 | self.anythingElse() | ||
| 782 | return token | ||
| 783 | |||
| 784 | def endTagHead(self, token): | ||
| 785 | node = self.parser.tree.openElements.pop() | ||
| 786 | assert node.name == "head", "Expected head got %s" % node.name | ||
| 787 | self.parser.phase = self.parser.phases["afterHead"] | ||
| 788 | |||
| 789 | def endTagHtmlBodyBr(self, token): | ||
| 790 | self.anythingElse() | ||
| 791 | return token | ||
| 792 | |||
| 793 | def endTagOther(self, token): | ||
| 794 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
| 795 | |||
| 796 | def anythingElse(self): | ||
| 797 | self.endTagHead(impliedTagToken("head")) | ||
| 798 | |||
| 799 | class InHeadNoscriptPhase(Phase): | ||
| 800 | def __init__(self, parser, tree): | ||
| 801 | Phase.__init__(self, parser, tree) | ||
| 802 | |||
| 803 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 804 | ("html", self.startTagHtml), | ||
| 805 | (("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand), | ||
| 806 | (("head", "noscript"), self.startTagHeadNoscript), | ||
| 807 | ]) | ||
| 808 | self.startTagHandler.default = self.startTagOther | ||
| 809 | |||
| 810 | self.endTagHandler = _utils.MethodDispatcher([ | ||
| 811 | ("noscript", self.endTagNoscript), | ||
| 812 | ("br", self.endTagBr), | ||
| 813 | ]) | ||
| 814 | self.endTagHandler.default = self.endTagOther | ||
| 815 | |||
| 816 | def processEOF(self): | ||
| 817 | self.parser.parseError("eof-in-head-noscript") | ||
| 818 | self.anythingElse() | ||
| 819 | return True | ||
| 820 | |||
| 821 | def processComment(self, token): | ||
| 822 | return self.parser.phases["inHead"].processComment(token) | ||
| 823 | |||
| 824 | def processCharacters(self, token): | ||
| 825 | self.parser.parseError("char-in-head-noscript") | ||
| 826 | self.anythingElse() | ||
| 827 | return token | ||
| 828 | |||
| 829 | def processSpaceCharacters(self, token): | ||
| 830 | return self.parser.phases["inHead"].processSpaceCharacters(token) | ||
| 831 | |||
| 832 | def startTagHtml(self, token): | ||
| 833 | return self.parser.phases["inBody"].processStartTag(token) | ||
| 834 | |||
| 835 | def startTagBaseLinkCommand(self, token): | ||
| 836 | return self.parser.phases["inHead"].processStartTag(token) | ||
| 837 | |||
| 838 | def startTagHeadNoscript(self, token): | ||
| 839 | self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) | ||
| 840 | |||
| 841 | def startTagOther(self, token): | ||
| 842 | self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) | ||
| 843 | self.anythingElse() | ||
| 844 | return token | ||
| 845 | |||
| 846 | def endTagNoscript(self, token): | ||
| 847 | node = self.parser.tree.openElements.pop() | ||
| 848 | assert node.name == "noscript", "Expected noscript got %s" % node.name | ||
| 849 | self.parser.phase = self.parser.phases["inHead"] | ||
| 850 | |||
| 851 | def endTagBr(self, token): | ||
| 852 | self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) | ||
| 853 | self.anythingElse() | ||
| 854 | return token | ||
| 855 | |||
| 856 | def endTagOther(self, token): | ||
| 857 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
| 858 | |||
| 859 | def anythingElse(self): | ||
| 860 | # Caller must raise parse error first! | ||
| 861 | self.endTagNoscript(impliedTagToken("noscript")) | ||
| 862 | |||
| 863 | class AfterHeadPhase(Phase): | ||
| 864 | def __init__(self, parser, tree): | ||
| 865 | Phase.__init__(self, parser, tree) | ||
| 866 | |||
| 867 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 868 | ("html", self.startTagHtml), | ||
| 869 | ("body", self.startTagBody), | ||
| 870 | ("frameset", self.startTagFrameset), | ||
| 871 | (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", | ||
| 872 | "style", "title"), | ||
| 873 | self.startTagFromHead), | ||
| 874 | ("head", self.startTagHead) | ||
| 875 | ]) | ||
| 876 | self.startTagHandler.default = self.startTagOther | ||
| 877 | self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), | ||
| 878 | self.endTagHtmlBodyBr)]) | ||
| 879 | self.endTagHandler.default = self.endTagOther | ||
| 880 | |||
| 881 | def processEOF(self): | ||
| 882 | self.anythingElse() | ||
| 883 | return True | ||
| 884 | |||
| 885 | def processCharacters(self, token): | ||
| 886 | self.anythingElse() | ||
| 887 | return token | ||
| 888 | |||
| 889 | def startTagHtml(self, token): | ||
| 890 | return self.parser.phases["inBody"].processStartTag(token) | ||
| 891 | |||
| 892 | def startTagBody(self, token): | ||
| 893 | self.parser.framesetOK = False | ||
| 894 | self.tree.insertElement(token) | ||
| 895 | self.parser.phase = self.parser.phases["inBody"] | ||
| 896 | |||
| 897 | def startTagFrameset(self, token): | ||
| 898 | self.tree.insertElement(token) | ||
| 899 | self.parser.phase = self.parser.phases["inFrameset"] | ||
| 900 | |||
| 901 | def startTagFromHead(self, token): | ||
| 902 | self.parser.parseError("unexpected-start-tag-out-of-my-head", | ||
| 903 | {"name": token["name"]}) | ||
| 904 | self.tree.openElements.append(self.tree.headPointer) | ||
| 905 | self.parser.phases["inHead"].processStartTag(token) | ||
| 906 | for node in self.tree.openElements[::-1]: | ||
| 907 | if node.name == "head": | ||
| 908 | self.tree.openElements.remove(node) | ||
| 909 | break | ||
| 910 | |||
| 911 | def startTagHead(self, token): | ||
| 912 | self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) | ||
| 913 | |||
| 914 | def startTagOther(self, token): | ||
| 915 | self.anythingElse() | ||
| 916 | return token | ||
| 917 | |||
| 918 | def endTagHtmlBodyBr(self, token): | ||
| 919 | self.anythingElse() | ||
| 920 | return token | ||
| 921 | |||
| 922 | def endTagOther(self, token): | ||
| 923 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
| 924 | |||
| 925 | def anythingElse(self): | ||
| 926 | self.tree.insertElement(impliedTagToken("body", "StartTag")) | ||
| 927 | self.parser.phase = self.parser.phases["inBody"] | ||
| 928 | self.parser.framesetOK = True | ||
| 929 | |||
| 930 | class InBodyPhase(Phase): | ||
| 931 | # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody | ||
| 932 | # the really-really-really-very crazy mode | ||
| 933 | def __init__(self, parser, tree): | ||
| 934 | Phase.__init__(self, parser, tree) | ||
| 935 | |||
| 936 | # Set this to the default handler | ||
| 937 | self.processSpaceCharacters = self.processSpaceCharactersNonPre | ||
| 938 | |||
| 939 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 940 | ("html", self.startTagHtml), | ||
| 941 | (("base", "basefont", "bgsound", "command", "link", "meta", | ||
| 942 | "script", "style", "title"), | ||
| 943 | self.startTagProcessInHead), | ||
| 944 | ("body", self.startTagBody), | ||
| 945 | ("frameset", self.startTagFrameset), | ||
| 946 | (("address", "article", "aside", "blockquote", "center", "details", | ||
| 947 | "dir", "div", "dl", "fieldset", "figcaption", "figure", | ||
| 948 | "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p", | ||
| 949 | "section", "summary", "ul"), | ||
| 950 | self.startTagCloseP), | ||
| 951 | (headingElements, self.startTagHeading), | ||
| 952 | (("pre", "listing"), self.startTagPreListing), | ||
| 953 | ("form", self.startTagForm), | ||
| 954 | (("li", "dd", "dt"), self.startTagListItem), | ||
| 955 | ("plaintext", self.startTagPlaintext), | ||
| 956 | ("a", self.startTagA), | ||
| 957 | (("b", "big", "code", "em", "font", "i", "s", "small", "strike", | ||
| 958 | "strong", "tt", "u"), self.startTagFormatting), | ||
| 959 | ("nobr", self.startTagNobr), | ||
| 960 | ("button", self.startTagButton), | ||
| 961 | (("applet", "marquee", "object"), self.startTagAppletMarqueeObject), | ||
| 962 | ("xmp", self.startTagXmp), | ||
| 963 | ("table", self.startTagTable), | ||
| 964 | (("area", "br", "embed", "img", "keygen", "wbr"), | ||
| 965 | self.startTagVoidFormatting), | ||
| 966 | (("param", "source", "track"), self.startTagParamSource), | ||
| 967 | ("input", self.startTagInput), | ||
| 968 | ("hr", self.startTagHr), | ||
| 969 | ("image", self.startTagImage), | ||
| 970 | ("isindex", self.startTagIsIndex), | ||
| 971 | ("textarea", self.startTagTextarea), | ||
| 972 | ("iframe", self.startTagIFrame), | ||
| 973 | ("noscript", self.startTagNoscript), | ||
| 974 | (("noembed", "noframes"), self.startTagRawtext), | ||
| 975 | ("select", self.startTagSelect), | ||
| 976 | (("rp", "rt"), self.startTagRpRt), | ||
| 977 | (("option", "optgroup"), self.startTagOpt), | ||
| 978 | (("math"), self.startTagMath), | ||
| 979 | (("svg"), self.startTagSvg), | ||
| 980 | (("caption", "col", "colgroup", "frame", "head", | ||
| 981 | "tbody", "td", "tfoot", "th", "thead", | ||
| 982 | "tr"), self.startTagMisplaced) | ||
| 983 | ]) | ||
| 984 | self.startTagHandler.default = self.startTagOther | ||
| 985 | |||
| 986 | self.endTagHandler = _utils.MethodDispatcher([ | ||
| 987 | ("body", self.endTagBody), | ||
| 988 | ("html", self.endTagHtml), | ||
| 989 | (("address", "article", "aside", "blockquote", "button", "center", | ||
| 990 | "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", | ||
| 991 | "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre", | ||
| 992 | "section", "summary", "ul"), self.endTagBlock), | ||
| 993 | ("form", self.endTagForm), | ||
| 994 | ("p", self.endTagP), | ||
| 995 | (("dd", "dt", "li"), self.endTagListItem), | ||
| 996 | (headingElements, self.endTagHeading), | ||
| 997 | (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", | ||
| 998 | "strike", "strong", "tt", "u"), self.endTagFormatting), | ||
| 999 | (("applet", "marquee", "object"), self.endTagAppletMarqueeObject), | ||
| 1000 | ("br", self.endTagBr), | ||
| 1001 | ]) | ||
| 1002 | self.endTagHandler.default = self.endTagOther | ||
| 1003 | |||
| 1004 | def isMatchingFormattingElement(self, node1, node2): | ||
| 1005 | return (node1.name == node2.name and | ||
| 1006 | node1.namespace == node2.namespace and | ||
| 1007 | node1.attributes == node2.attributes) | ||
| 1008 | |||
| 1009 | # helper | ||
| 1010 | def addFormattingElement(self, token): | ||
| 1011 | self.tree.insertElement(token) | ||
| 1012 | element = self.tree.openElements[-1] | ||
| 1013 | |||
| 1014 | matchingElements = [] | ||
| 1015 | for node in self.tree.activeFormattingElements[::-1]: | ||
| 1016 | if node is Marker: | ||
| 1017 | break | ||
| 1018 | elif self.isMatchingFormattingElement(node, element): | ||
| 1019 | matchingElements.append(node) | ||
| 1020 | |||
| 1021 | assert len(matchingElements) <= 3 | ||
| 1022 | if len(matchingElements) == 3: | ||
| 1023 | self.tree.activeFormattingElements.remove(matchingElements[-1]) | ||
| 1024 | self.tree.activeFormattingElements.append(element) | ||
| 1025 | |||
| 1026 | # the real deal | ||
| 1027 | def processEOF(self): | ||
| 1028 | allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td", | ||
| 1029 | "tfoot", "th", "thead", "tr", "body", | ||
| 1030 | "html")) | ||
| 1031 | for node in self.tree.openElements[::-1]: | ||
| 1032 | if node.name not in allowed_elements: | ||
| 1033 | self.parser.parseError("expected-closing-tag-but-got-eof") | ||
| 1034 | break | ||
| 1035 | # Stop parsing | ||
| 1036 | |||
| 1037 | def processSpaceCharactersDropNewline(self, token): | ||
| 1038 | # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we | ||
| 1039 | # want to drop leading newlines | ||
| 1040 | data = token["data"] | ||
| 1041 | self.processSpaceCharacters = self.processSpaceCharactersNonPre | ||
| 1042 | if (data.startswith("\n") and | ||
| 1043 | self.tree.openElements[-1].name in ("pre", "listing", "textarea") and | ||
| 1044 | not self.tree.openElements[-1].hasContent()): | ||
| 1045 | data = data[1:] | ||
| 1046 | if data: | ||
| 1047 | self.tree.reconstructActiveFormattingElements() | ||
| 1048 | self.tree.insertText(data) | ||
| 1049 | |||
| 1050 | def processCharacters(self, token): | ||
| 1051 | if token["data"] == "\u0000": | ||
| 1052 | # The tokenizer should always emit null on its own | ||
| 1053 | return | ||
| 1054 | self.tree.reconstructActiveFormattingElements() | ||
| 1055 | self.tree.insertText(token["data"]) | ||
| 1056 | # This must be bad for performance | ||
| 1057 | if (self.parser.framesetOK and | ||
| 1058 | any([char not in spaceCharacters | ||
| 1059 | for char in token["data"]])): | ||
| 1060 | self.parser.framesetOK = False | ||
| 1061 | |||
| 1062 | def processSpaceCharactersNonPre(self, token): | ||
| 1063 | self.tree.reconstructActiveFormattingElements() | ||
| 1064 | self.tree.insertText(token["data"]) | ||
| 1065 | |||
| 1066 | def startTagProcessInHead(self, token): | ||
| 1067 | return self.parser.phases["inHead"].processStartTag(token) | ||
| 1068 | |||
| 1069 | def startTagBody(self, token): | ||
| 1070 | self.parser.parseError("unexpected-start-tag", {"name": "body"}) | ||
| 1071 | if (len(self.tree.openElements) == 1 or | ||
| 1072 | self.tree.openElements[1].name != "body"): | ||
| 1073 | assert self.parser.innerHTML | ||
| 1074 | else: | ||
| 1075 | self.parser.framesetOK = False | ||
| 1076 | for attr, value in token["data"].items(): | ||
| 1077 | if attr not in self.tree.openElements[1].attributes: | ||
| 1078 | self.tree.openElements[1].attributes[attr] = value | ||
| 1079 | |||
| 1080 | def startTagFrameset(self, token): | ||
| 1081 | self.parser.parseError("unexpected-start-tag", {"name": "frameset"}) | ||
| 1082 | if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"): | ||
| 1083 | assert self.parser.innerHTML | ||
| 1084 | elif not self.parser.framesetOK: | ||
| 1085 | pass | ||
| 1086 | else: | ||
| 1087 | if self.tree.openElements[1].parent: | ||
| 1088 | self.tree.openElements[1].parent.removeChild(self.tree.openElements[1]) | ||
| 1089 | while self.tree.openElements[-1].name != "html": | ||
| 1090 | self.tree.openElements.pop() | ||
| 1091 | self.tree.insertElement(token) | ||
| 1092 | self.parser.phase = self.parser.phases["inFrameset"] | ||
| 1093 | |||
| 1094 | def startTagCloseP(self, token): | ||
| 1095 | if self.tree.elementInScope("p", variant="button"): | ||
| 1096 | self.endTagP(impliedTagToken("p")) | ||
| 1097 | self.tree.insertElement(token) | ||
| 1098 | |||
| 1099 | def startTagPreListing(self, token): | ||
| 1100 | if self.tree.elementInScope("p", variant="button"): | ||
| 1101 | self.endTagP(impliedTagToken("p")) | ||
| 1102 | self.tree.insertElement(token) | ||
| 1103 | self.parser.framesetOK = False | ||
| 1104 | self.processSpaceCharacters = self.processSpaceCharactersDropNewline | ||
| 1105 | |||
| 1106 | def startTagForm(self, token): | ||
| 1107 | if self.tree.formPointer: | ||
| 1108 | self.parser.parseError("unexpected-start-tag", {"name": "form"}) | ||
| 1109 | else: | ||
| 1110 | if self.tree.elementInScope("p", variant="button"): | ||
| 1111 | self.endTagP(impliedTagToken("p")) | ||
| 1112 | self.tree.insertElement(token) | ||
| 1113 | self.tree.formPointer = self.tree.openElements[-1] | ||
| 1114 | |||
| 1115 | def startTagListItem(self, token): | ||
| 1116 | self.parser.framesetOK = False | ||
| 1117 | |||
| 1118 | stopNamesMap = {"li": ["li"], | ||
| 1119 | "dt": ["dt", "dd"], | ||
| 1120 | "dd": ["dt", "dd"]} | ||
| 1121 | stopNames = stopNamesMap[token["name"]] | ||
| 1122 | for node in reversed(self.tree.openElements): | ||
| 1123 | if node.name in stopNames: | ||
| 1124 | self.parser.phase.processEndTag( | ||
| 1125 | impliedTagToken(node.name, "EndTag")) | ||
| 1126 | break | ||
| 1127 | if (node.nameTuple in specialElements and | ||
| 1128 | node.name not in ("address", "div", "p")): | ||
| 1129 | break | ||
| 1130 | |||
| 1131 | if self.tree.elementInScope("p", variant="button"): | ||
| 1132 | self.parser.phase.processEndTag( | ||
| 1133 | impliedTagToken("p", "EndTag")) | ||
| 1134 | |||
| 1135 | self.tree.insertElement(token) | ||
| 1136 | |||
| 1137 | def startTagPlaintext(self, token): | ||
| 1138 | if self.tree.elementInScope("p", variant="button"): | ||
| 1139 | self.endTagP(impliedTagToken("p")) | ||
| 1140 | self.tree.insertElement(token) | ||
| 1141 | self.parser.tokenizer.state = self.parser.tokenizer.plaintextState | ||
| 1142 | |||
| 1143 | def startTagHeading(self, token): | ||
| 1144 | if self.tree.elementInScope("p", variant="button"): | ||
| 1145 | self.endTagP(impliedTagToken("p")) | ||
| 1146 | if self.tree.openElements[-1].name in headingElements: | ||
| 1147 | self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) | ||
| 1148 | self.tree.openElements.pop() | ||
| 1149 | self.tree.insertElement(token) | ||
| 1150 | |||
| 1151 | def startTagA(self, token): | ||
| 1152 | afeAElement = self.tree.elementInActiveFormattingElements("a") | ||
| 1153 | if afeAElement: | ||
| 1154 | self.parser.parseError("unexpected-start-tag-implies-end-tag", | ||
| 1155 | {"startName": "a", "endName": "a"}) | ||
| 1156 | self.endTagFormatting(impliedTagToken("a")) | ||
| 1157 | if afeAElement in self.tree.openElements: | ||
| 1158 | self.tree.openElements.remove(afeAElement) | ||
| 1159 | if afeAElement in self.tree.activeFormattingElements: | ||
| 1160 | self.tree.activeFormattingElements.remove(afeAElement) | ||
| 1161 | self.tree.reconstructActiveFormattingElements() | ||
| 1162 | self.addFormattingElement(token) | ||
| 1163 | |||
| 1164 | def startTagFormatting(self, token): | ||
| 1165 | self.tree.reconstructActiveFormattingElements() | ||
| 1166 | self.addFormattingElement(token) | ||
| 1167 | |||
| 1168 | def startTagNobr(self, token): | ||
| 1169 | self.tree.reconstructActiveFormattingElements() | ||
| 1170 | if self.tree.elementInScope("nobr"): | ||
| 1171 | self.parser.parseError("unexpected-start-tag-implies-end-tag", | ||
| 1172 | {"startName": "nobr", "endName": "nobr"}) | ||
| 1173 | self.processEndTag(impliedTagToken("nobr")) | ||
| 1174 | # XXX Need tests that trigger the following | ||
| 1175 | self.tree.reconstructActiveFormattingElements() | ||
| 1176 | self.addFormattingElement(token) | ||
| 1177 | |||
| 1178 | def startTagButton(self, token): | ||
| 1179 | if self.tree.elementInScope("button"): | ||
| 1180 | self.parser.parseError("unexpected-start-tag-implies-end-tag", | ||
| 1181 | {"startName": "button", "endName": "button"}) | ||
| 1182 | self.processEndTag(impliedTagToken("button")) | ||
| 1183 | return token | ||
| 1184 | else: | ||
| 1185 | self.tree.reconstructActiveFormattingElements() | ||
| 1186 | self.tree.insertElement(token) | ||
| 1187 | self.parser.framesetOK = False | ||
| 1188 | |||
| 1189 | def startTagAppletMarqueeObject(self, token): | ||
| 1190 | self.tree.reconstructActiveFormattingElements() | ||
| 1191 | self.tree.insertElement(token) | ||
| 1192 | self.tree.activeFormattingElements.append(Marker) | ||
| 1193 | self.parser.framesetOK = False | ||
| 1194 | |||
| 1195 | def startTagXmp(self, token): | ||
| 1196 | if self.tree.elementInScope("p", variant="button"): | ||
| 1197 | self.endTagP(impliedTagToken("p")) | ||
| 1198 | self.tree.reconstructActiveFormattingElements() | ||
| 1199 | self.parser.framesetOK = False | ||
| 1200 | self.parser.parseRCDataRawtext(token, "RAWTEXT") | ||
| 1201 | |||
| 1202 | def startTagTable(self, token): | ||
| 1203 | if self.parser.compatMode != "quirks": | ||
| 1204 | if self.tree.elementInScope("p", variant="button"): | ||
| 1205 | self.processEndTag(impliedTagToken("p")) | ||
| 1206 | self.tree.insertElement(token) | ||
| 1207 | self.parser.framesetOK = False | ||
| 1208 | self.parser.phase = self.parser.phases["inTable"] | ||
| 1209 | |||
| 1210 | def startTagVoidFormatting(self, token): | ||
| 1211 | self.tree.reconstructActiveFormattingElements() | ||
| 1212 | self.tree.insertElement(token) | ||
| 1213 | self.tree.openElements.pop() | ||
| 1214 | token["selfClosingAcknowledged"] = True | ||
| 1215 | self.parser.framesetOK = False | ||
| 1216 | |||
| 1217 | def startTagInput(self, token): | ||
| 1218 | framesetOK = self.parser.framesetOK | ||
| 1219 | self.startTagVoidFormatting(token) | ||
| 1220 | if ("type" in token["data"] and | ||
| 1221 | token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): | ||
| 1222 | # input type=hidden doesn't change framesetOK | ||
| 1223 | self.parser.framesetOK = framesetOK | ||
| 1224 | |||
| 1225 | def startTagParamSource(self, token): | ||
| 1226 | self.tree.insertElement(token) | ||
| 1227 | self.tree.openElements.pop() | ||
| 1228 | token["selfClosingAcknowledged"] = True | ||
| 1229 | |||
| 1230 | def startTagHr(self, token): | ||
| 1231 | if self.tree.elementInScope("p", variant="button"): | ||
| 1232 | self.endTagP(impliedTagToken("p")) | ||
| 1233 | self.tree.insertElement(token) | ||
| 1234 | self.tree.openElements.pop() | ||
| 1235 | token["selfClosingAcknowledged"] = True | ||
| 1236 | self.parser.framesetOK = False | ||
| 1237 | |||
| 1238 | def startTagImage(self, token): | ||
| 1239 | # No really... | ||
| 1240 | self.parser.parseError("unexpected-start-tag-treated-as", | ||
| 1241 | {"originalName": "image", "newName": "img"}) | ||
| 1242 | self.processStartTag(impliedTagToken("img", "StartTag", | ||
| 1243 | attributes=token["data"], | ||
| 1244 | selfClosing=token["selfClosing"])) | ||
| 1245 | |||
| 1246 | def startTagIsIndex(self, token): | ||
| 1247 | self.parser.parseError("deprecated-tag", {"name": "isindex"}) | ||
| 1248 | if self.tree.formPointer: | ||
| 1249 | return | ||
| 1250 | form_attrs = {} | ||
| 1251 | if "action" in token["data"]: | ||
| 1252 | form_attrs["action"] = token["data"]["action"] | ||
| 1253 | self.processStartTag(impliedTagToken("form", "StartTag", | ||
| 1254 | attributes=form_attrs)) | ||
| 1255 | self.processStartTag(impliedTagToken("hr", "StartTag")) | ||
| 1256 | self.processStartTag(impliedTagToken("label", "StartTag")) | ||
| 1257 | # XXX Localization ... | ||
| 1258 | if "prompt" in token["data"]: | ||
| 1259 | prompt = token["data"]["prompt"] | ||
| 1260 | else: | ||
| 1261 | prompt = "This is a searchable index. Enter search keywords: " | ||
| 1262 | self.processCharacters( | ||
| 1263 | {"type": tokenTypes["Characters"], "data": prompt}) | ||
| 1264 | attributes = token["data"].copy() | ||
| 1265 | if "action" in attributes: | ||
| 1266 | del attributes["action"] | ||
| 1267 | if "prompt" in attributes: | ||
| 1268 | del attributes["prompt"] | ||
| 1269 | attributes["name"] = "isindex" | ||
| 1270 | self.processStartTag(impliedTagToken("input", "StartTag", | ||
| 1271 | attributes=attributes, | ||
| 1272 | selfClosing=token["selfClosing"])) | ||
| 1273 | self.processEndTag(impliedTagToken("label")) | ||
| 1274 | self.processStartTag(impliedTagToken("hr", "StartTag")) | ||
| 1275 | self.processEndTag(impliedTagToken("form")) | ||
| 1276 | |||
| 1277 | def startTagTextarea(self, token): | ||
| 1278 | self.tree.insertElement(token) | ||
| 1279 | self.parser.tokenizer.state = self.parser.tokenizer.rcdataState | ||
| 1280 | self.processSpaceCharacters = self.processSpaceCharactersDropNewline | ||
| 1281 | self.parser.framesetOK = False | ||
| 1282 | |||
| 1283 | def startTagIFrame(self, token): | ||
| 1284 | self.parser.framesetOK = False | ||
| 1285 | self.startTagRawtext(token) | ||
| 1286 | |||
| 1287 | def startTagNoscript(self, token): | ||
| 1288 | if self.parser.scripting: | ||
| 1289 | self.startTagRawtext(token) | ||
| 1290 | else: | ||
| 1291 | self.startTagOther(token) | ||
| 1292 | |||
| 1293 | def startTagRawtext(self, token): | ||
| 1294 | """iframe, noembed noframes, noscript(if scripting enabled)""" | ||
| 1295 | self.parser.parseRCDataRawtext(token, "RAWTEXT") | ||
| 1296 | |||
| 1297 | def startTagOpt(self, token): | ||
| 1298 | if self.tree.openElements[-1].name == "option": | ||
| 1299 | self.parser.phase.processEndTag(impliedTagToken("option")) | ||
| 1300 | self.tree.reconstructActiveFormattingElements() | ||
| 1301 | self.parser.tree.insertElement(token) | ||
| 1302 | |||
| 1303 | def startTagSelect(self, token): | ||
| 1304 | self.tree.reconstructActiveFormattingElements() | ||
| 1305 | self.tree.insertElement(token) | ||
| 1306 | self.parser.framesetOK = False | ||
| 1307 | if self.parser.phase in (self.parser.phases["inTable"], | ||
| 1308 | self.parser.phases["inCaption"], | ||
| 1309 | self.parser.phases["inColumnGroup"], | ||
| 1310 | self.parser.phases["inTableBody"], | ||
| 1311 | self.parser.phases["inRow"], | ||
| 1312 | self.parser.phases["inCell"]): | ||
| 1313 | self.parser.phase = self.parser.phases["inSelectInTable"] | ||
| 1314 | else: | ||
| 1315 | self.parser.phase = self.parser.phases["inSelect"] | ||
| 1316 | |||
| 1317 | def startTagRpRt(self, token): | ||
| 1318 | if self.tree.elementInScope("ruby"): | ||
| 1319 | self.tree.generateImpliedEndTags() | ||
| 1320 | if self.tree.openElements[-1].name != "ruby": | ||
| 1321 | self.parser.parseError() | ||
| 1322 | self.tree.insertElement(token) | ||
| 1323 | |||
| 1324 | def startTagMath(self, token): | ||
| 1325 | self.tree.reconstructActiveFormattingElements() | ||
| 1326 | self.parser.adjustMathMLAttributes(token) | ||
| 1327 | self.parser.adjustForeignAttributes(token) | ||
| 1328 | token["namespace"] = namespaces["mathml"] | ||
| 1329 | self.tree.insertElement(token) | ||
| 1330 | # Need to get the parse error right for the case where the token | ||
| 1331 | # has a namespace not equal to the xmlns attribute | ||
| 1332 | if token["selfClosing"]: | ||
| 1333 | self.tree.openElements.pop() | ||
| 1334 | token["selfClosingAcknowledged"] = True | ||
| 1335 | |||
| 1336 | def startTagSvg(self, token): | ||
| 1337 | self.tree.reconstructActiveFormattingElements() | ||
| 1338 | self.parser.adjustSVGAttributes(token) | ||
| 1339 | self.parser.adjustForeignAttributes(token) | ||
| 1340 | token["namespace"] = namespaces["svg"] | ||
| 1341 | self.tree.insertElement(token) | ||
| 1342 | # Need to get the parse error right for the case where the token | ||
| 1343 | # has a namespace not equal to the xmlns attribute | ||
| 1344 | if token["selfClosing"]: | ||
| 1345 | self.tree.openElements.pop() | ||
| 1346 | token["selfClosingAcknowledged"] = True | ||
| 1347 | |||
| 1348 | def startTagMisplaced(self, token): | ||
| 1349 | """ Elements that should be children of other elements that have a | ||
| 1350 | different insertion mode; here they are ignored | ||
| 1351 | "caption", "col", "colgroup", "frame", "frameset", "head", | ||
| 1352 | "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", | ||
| 1353 | "tr", "noscript" | ||
| 1354 | """ | ||
| 1355 | self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]}) | ||
| 1356 | |||
| 1357 | def startTagOther(self, token): | ||
| 1358 | self.tree.reconstructActiveFormattingElements() | ||
| 1359 | self.tree.insertElement(token) | ||
| 1360 | |||
| 1361 | def endTagP(self, token): | ||
| 1362 | if not self.tree.elementInScope("p", variant="button"): | ||
| 1363 | self.startTagCloseP(impliedTagToken("p", "StartTag")) | ||
| 1364 | self.parser.parseError("unexpected-end-tag", {"name": "p"}) | ||
| 1365 | self.endTagP(impliedTagToken("p", "EndTag")) | ||
| 1366 | else: | ||
| 1367 | self.tree.generateImpliedEndTags("p") | ||
| 1368 | if self.tree.openElements[-1].name != "p": | ||
| 1369 | self.parser.parseError("unexpected-end-tag", {"name": "p"}) | ||
| 1370 | node = self.tree.openElements.pop() | ||
| 1371 | while node.name != "p": | ||
| 1372 | node = self.tree.openElements.pop() | ||
| 1373 | |||
| 1374 | def endTagBody(self, token): | ||
| 1375 | if not self.tree.elementInScope("body"): | ||
| 1376 | self.parser.parseError() | ||
| 1377 | return | ||
| 1378 | elif self.tree.openElements[-1].name != "body": | ||
| 1379 | for node in self.tree.openElements[2:]: | ||
| 1380 | if node.name not in frozenset(("dd", "dt", "li", "optgroup", | ||
| 1381 | "option", "p", "rp", "rt", | ||
| 1382 | "tbody", "td", "tfoot", | ||
| 1383 | "th", "thead", "tr", "body", | ||
| 1384 | "html")): | ||
| 1385 | # Not sure this is the correct name for the parse error | ||
| 1386 | self.parser.parseError( | ||
| 1387 | "expected-one-end-tag-but-got-another", | ||
| 1388 | {"gotName": "body", "expectedName": node.name}) | ||
| 1389 | break | ||
| 1390 | self.parser.phase = self.parser.phases["afterBody"] | ||
| 1391 | |||
| 1392 | def endTagHtml(self, token): | ||
| 1393 | # We repeat the test for the body end tag token being ignored here | ||
| 1394 | if self.tree.elementInScope("body"): | ||
| 1395 | self.endTagBody(impliedTagToken("body")) | ||
| 1396 | return token | ||
| 1397 | |||
| 1398 | def endTagBlock(self, token): | ||
| 1399 | # Put us back in the right whitespace handling mode | ||
| 1400 | if token["name"] == "pre": | ||
| 1401 | self.processSpaceCharacters = self.processSpaceCharactersNonPre | ||
| 1402 | inScope = self.tree.elementInScope(token["name"]) | ||
| 1403 | if inScope: | ||
| 1404 | self.tree.generateImpliedEndTags() | ||
| 1405 | if self.tree.openElements[-1].name != token["name"]: | ||
| 1406 | self.parser.parseError("end-tag-too-early", {"name": token["name"]}) | ||
| 1407 | if inScope: | ||
| 1408 | node = self.tree.openElements.pop() | ||
| 1409 | while node.name != token["name"]: | ||
| 1410 | node = self.tree.openElements.pop() | ||
| 1411 | |||
| 1412 | def endTagForm(self, token): | ||
| 1413 | node = self.tree.formPointer | ||
| 1414 | self.tree.formPointer = None | ||
| 1415 | if node is None or not self.tree.elementInScope(node): | ||
| 1416 | self.parser.parseError("unexpected-end-tag", | ||
| 1417 | {"name": "form"}) | ||
| 1418 | else: | ||
| 1419 | self.tree.generateImpliedEndTags() | ||
| 1420 | if self.tree.openElements[-1] != node: | ||
| 1421 | self.parser.parseError("end-tag-too-early-ignored", | ||
| 1422 | {"name": "form"}) | ||
| 1423 | self.tree.openElements.remove(node) | ||
| 1424 | |||
| 1425 | def endTagListItem(self, token): | ||
| 1426 | if token["name"] == "li": | ||
| 1427 | variant = "list" | ||
| 1428 | else: | ||
| 1429 | variant = None | ||
| 1430 | if not self.tree.elementInScope(token["name"], variant=variant): | ||
| 1431 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
| 1432 | else: | ||
| 1433 | self.tree.generateImpliedEndTags(exclude=token["name"]) | ||
| 1434 | if self.tree.openElements[-1].name != token["name"]: | ||
| 1435 | self.parser.parseError( | ||
| 1436 | "end-tag-too-early", | ||
| 1437 | {"name": token["name"]}) | ||
| 1438 | node = self.tree.openElements.pop() | ||
| 1439 | while node.name != token["name"]: | ||
| 1440 | node = self.tree.openElements.pop() | ||
| 1441 | |||
| 1442 | def endTagHeading(self, token): | ||
| 1443 | for item in headingElements: | ||
| 1444 | if self.tree.elementInScope(item): | ||
| 1445 | self.tree.generateImpliedEndTags() | ||
| 1446 | break | ||
| 1447 | if self.tree.openElements[-1].name != token["name"]: | ||
| 1448 | self.parser.parseError("end-tag-too-early", {"name": token["name"]}) | ||
| 1449 | |||
| 1450 | for item in headingElements: | ||
| 1451 | if self.tree.elementInScope(item): | ||
| 1452 | item = self.tree.openElements.pop() | ||
| 1453 | while item.name not in headingElements: | ||
| 1454 | item = self.tree.openElements.pop() | ||
| 1455 | break | ||
| 1456 | |||
| 1457 | def endTagFormatting(self, token): | ||
| 1458 | """The much-feared adoption agency algorithm""" | ||
| 1459 | # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867 | ||
| 1460 | # XXX Better parseError messages appreciated. | ||
| 1461 | |||
| 1462 | # Step 1 | ||
| 1463 | outerLoopCounter = 0 | ||
| 1464 | |||
| 1465 | # Step 2 | ||
| 1466 | while outerLoopCounter < 8: | ||
| 1467 | |||
| 1468 | # Step 3 | ||
| 1469 | outerLoopCounter += 1 | ||
| 1470 | |||
| 1471 | # Step 4: | ||
| 1472 | |||
| 1473 | # Let the formatting element be the last element in | ||
| 1474 | # the list of active formatting elements that: | ||
| 1475 | # - is between the end of the list and the last scope | ||
| 1476 | # marker in the list, if any, or the start of the list | ||
| 1477 | # otherwise, and | ||
| 1478 | # - has the same tag name as the token. | ||
| 1479 | formattingElement = self.tree.elementInActiveFormattingElements( | ||
| 1480 | token["name"]) | ||
| 1481 | if (not formattingElement or | ||
| 1482 | (formattingElement in self.tree.openElements and | ||
| 1483 | not self.tree.elementInScope(formattingElement.name))): | ||
| 1484 | # If there is no such node, then abort these steps | ||
| 1485 | # and instead act as described in the "any other | ||
| 1486 | # end tag" entry below. | ||
| 1487 | self.endTagOther(token) | ||
| 1488 | return | ||
| 1489 | |||
| 1490 | # Otherwise, if there is such a node, but that node is | ||
| 1491 | # not in the stack of open elements, then this is a | ||
| 1492 | # parse error; remove the element from the list, and | ||
| 1493 | # abort these steps. | ||
| 1494 | elif formattingElement not in self.tree.openElements: | ||
| 1495 | self.parser.parseError("adoption-agency-1.2", {"name": token["name"]}) | ||
| 1496 | self.tree.activeFormattingElements.remove(formattingElement) | ||
| 1497 | return | ||
| 1498 | |||
| 1499 | # Otherwise, if there is such a node, and that node is | ||
| 1500 | # also in the stack of open elements, but the element | ||
| 1501 | # is not in scope, then this is a parse error; ignore | ||
| 1502 | # the token, and abort these steps. | ||
| 1503 | elif not self.tree.elementInScope(formattingElement.name): | ||
| 1504 | self.parser.parseError("adoption-agency-4.4", {"name": token["name"]}) | ||
| 1505 | return | ||
| 1506 | |||
| 1507 | # Otherwise, there is a formatting element and that | ||
| 1508 | # element is in the stack and is in scope. If the | ||
| 1509 | # element is not the current node, this is a parse | ||
| 1510 | # error. In any case, proceed with the algorithm as | ||
| 1511 | # written in the following steps. | ||
| 1512 | else: | ||
| 1513 | if formattingElement != self.tree.openElements[-1]: | ||
| 1514 | self.parser.parseError("adoption-agency-1.3", {"name": token["name"]}) | ||
| 1515 | |||
| 1516 | # Step 5: | ||
| 1517 | |||
| 1518 | # Let the furthest block be the topmost node in the | ||
| 1519 | # stack of open elements that is lower in the stack | ||
| 1520 | # than the formatting element, and is an element in | ||
| 1521 | # the special category. There might not be one. | ||
| 1522 | afeIndex = self.tree.openElements.index(formattingElement) | ||
| 1523 | furthestBlock = None | ||
| 1524 | for element in self.tree.openElements[afeIndex:]: | ||
| 1525 | if element.nameTuple in specialElements: | ||
| 1526 | furthestBlock = element | ||
| 1527 | break | ||
| 1528 | |||
| 1529 | # Step 6: | ||
| 1530 | |||
| 1531 | # If there is no furthest block, then the UA must | ||
| 1532 | # first pop all the nodes from the bottom of the stack | ||
| 1533 | # of open elements, from the current node up to and | ||
| 1534 | # including the formatting element, then remove the | ||
| 1535 | # formatting element from the list of active | ||
| 1536 | # formatting elements, and finally abort these steps. | ||
| 1537 | if furthestBlock is None: | ||
| 1538 | element = self.tree.openElements.pop() | ||
| 1539 | while element != formattingElement: | ||
| 1540 | element = self.tree.openElements.pop() | ||
| 1541 | self.tree.activeFormattingElements.remove(element) | ||
| 1542 | return | ||
| 1543 | |||
| 1544 | # Step 7 | ||
| 1545 | commonAncestor = self.tree.openElements[afeIndex - 1] | ||
| 1546 | |||
| 1547 | # Step 8: | ||
| 1548 | # The bookmark is supposed to help us identify where to reinsert | ||
| 1549 | # nodes in step 15. We have to ensure that we reinsert nodes after | ||
| 1550 | # the node before the active formatting element. Note the bookmark | ||
| 1551 | # can move in step 9.7 | ||
| 1552 | bookmark = self.tree.activeFormattingElements.index(formattingElement) | ||
| 1553 | |||
| 1554 | # Step 9 | ||
| 1555 | lastNode = node = furthestBlock | ||
| 1556 | innerLoopCounter = 0 | ||
| 1557 | |||
| 1558 | index = self.tree.openElements.index(node) | ||
| 1559 | while innerLoopCounter < 3: | ||
| 1560 | innerLoopCounter += 1 | ||
| 1561 | # Node is element before node in open elements | ||
| 1562 | index -= 1 | ||
| 1563 | node = self.tree.openElements[index] | ||
| 1564 | if node not in self.tree.activeFormattingElements: | ||
| 1565 | self.tree.openElements.remove(node) | ||
| 1566 | continue | ||
| 1567 | # Step 9.6 | ||
| 1568 | if node == formattingElement: | ||
| 1569 | break | ||
| 1570 | # Step 9.7 | ||
| 1571 | if lastNode == furthestBlock: | ||
| 1572 | bookmark = self.tree.activeFormattingElements.index(node) + 1 | ||
| 1573 | # Step 9.8 | ||
| 1574 | clone = node.cloneNode() | ||
| 1575 | # Replace node with clone | ||
| 1576 | self.tree.activeFormattingElements[ | ||
| 1577 | self.tree.activeFormattingElements.index(node)] = clone | ||
| 1578 | self.tree.openElements[ | ||
| 1579 | self.tree.openElements.index(node)] = clone | ||
| 1580 | node = clone | ||
| 1581 | # Step 9.9 | ||
| 1582 | # Remove lastNode from its parents, if any | ||
| 1583 | if lastNode.parent: | ||
| 1584 | lastNode.parent.removeChild(lastNode) | ||
| 1585 | node.appendChild(lastNode) | ||
| 1586 | # Step 9.10 | ||
| 1587 | lastNode = node | ||
| 1588 | |||
| 1589 | # Step 10 | ||
| 1590 | # Foster parent lastNode if commonAncestor is a | ||
| 1591 | # table, tbody, tfoot, thead, or tr we need to foster | ||
| 1592 | # parent the lastNode | ||
| 1593 | if lastNode.parent: | ||
| 1594 | lastNode.parent.removeChild(lastNode) | ||
| 1595 | |||
| 1596 | if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")): | ||
| 1597 | parent, insertBefore = self.tree.getTableMisnestedNodePosition() | ||
| 1598 | parent.insertBefore(lastNode, insertBefore) | ||
| 1599 | else: | ||
| 1600 | commonAncestor.appendChild(lastNode) | ||
| 1601 | |||
| 1602 | # Step 11 | ||
| 1603 | clone = formattingElement.cloneNode() | ||
| 1604 | |||
| 1605 | # Step 12 | ||
| 1606 | furthestBlock.reparentChildren(clone) | ||
| 1607 | |||
| 1608 | # Step 13 | ||
| 1609 | furthestBlock.appendChild(clone) | ||
| 1610 | |||
| 1611 | # Step 14 | ||
| 1612 | self.tree.activeFormattingElements.remove(formattingElement) | ||
| 1613 | self.tree.activeFormattingElements.insert(bookmark, clone) | ||
| 1614 | |||
| 1615 | # Step 15 | ||
| 1616 | self.tree.openElements.remove(formattingElement) | ||
| 1617 | self.tree.openElements.insert( | ||
| 1618 | self.tree.openElements.index(furthestBlock) + 1, clone) | ||
| 1619 | |||
| 1620 | def endTagAppletMarqueeObject(self, token): | ||
| 1621 | if self.tree.elementInScope(token["name"]): | ||
| 1622 | self.tree.generateImpliedEndTags() | ||
| 1623 | if self.tree.openElements[-1].name != token["name"]: | ||
| 1624 | self.parser.parseError("end-tag-too-early", {"name": token["name"]}) | ||
| 1625 | |||
| 1626 | if self.tree.elementInScope(token["name"]): | ||
| 1627 | element = self.tree.openElements.pop() | ||
| 1628 | while element.name != token["name"]: | ||
| 1629 | element = self.tree.openElements.pop() | ||
| 1630 | self.tree.clearActiveFormattingElements() | ||
| 1631 | |||
| 1632 | def endTagBr(self, token): | ||
| 1633 | self.parser.parseError("unexpected-end-tag-treated-as", | ||
| 1634 | {"originalName": "br", "newName": "br element"}) | ||
| 1635 | self.tree.reconstructActiveFormattingElements() | ||
| 1636 | self.tree.insertElement(impliedTagToken("br", "StartTag")) | ||
| 1637 | self.tree.openElements.pop() | ||
| 1638 | |||
| 1639 | def endTagOther(self, token): | ||
| 1640 | for node in self.tree.openElements[::-1]: | ||
| 1641 | if node.name == token["name"]: | ||
| 1642 | self.tree.generateImpliedEndTags(exclude=token["name"]) | ||
| 1643 | if self.tree.openElements[-1].name != token["name"]: | ||
| 1644 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
| 1645 | while self.tree.openElements.pop() != node: | ||
| 1646 | pass | ||
| 1647 | break | ||
| 1648 | else: | ||
| 1649 | if node.nameTuple in specialElements: | ||
| 1650 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
| 1651 | break | ||
| 1652 | |||
| 1653 | class TextPhase(Phase): | ||
| 1654 | def __init__(self, parser, tree): | ||
| 1655 | Phase.__init__(self, parser, tree) | ||
| 1656 | self.startTagHandler = _utils.MethodDispatcher([]) | ||
| 1657 | self.startTagHandler.default = self.startTagOther | ||
| 1658 | self.endTagHandler = _utils.MethodDispatcher([ | ||
| 1659 | ("script", self.endTagScript)]) | ||
| 1660 | self.endTagHandler.default = self.endTagOther | ||
| 1661 | |||
| 1662 | def processCharacters(self, token): | ||
| 1663 | self.tree.insertText(token["data"]) | ||
| 1664 | |||
| 1665 | def processEOF(self): | ||
| 1666 | self.parser.parseError("expected-named-closing-tag-but-got-eof", | ||
| 1667 | {"name": self.tree.openElements[-1].name}) | ||
| 1668 | self.tree.openElements.pop() | ||
| 1669 | self.parser.phase = self.parser.originalPhase | ||
| 1670 | return True | ||
| 1671 | |||
| 1672 | def startTagOther(self, token): | ||
| 1673 | assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name'] | ||
| 1674 | |||
| 1675 | def endTagScript(self, token): | ||
| 1676 | node = self.tree.openElements.pop() | ||
| 1677 | assert node.name == "script" | ||
| 1678 | self.parser.phase = self.parser.originalPhase | ||
| 1679 | # The rest of this method is all stuff that only happens if | ||
| 1680 | # document.write works | ||
| 1681 | |||
| 1682 | def endTagOther(self, token): | ||
| 1683 | self.tree.openElements.pop() | ||
| 1684 | self.parser.phase = self.parser.originalPhase | ||
| 1685 | |||
| 1686 | class InTablePhase(Phase): | ||
| 1687 | # http://www.whatwg.org/specs/web-apps/current-work/#in-table | ||
| 1688 | def __init__(self, parser, tree): | ||
| 1689 | Phase.__init__(self, parser, tree) | ||
| 1690 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 1691 | ("html", self.startTagHtml), | ||
| 1692 | ("caption", self.startTagCaption), | ||
| 1693 | ("colgroup", self.startTagColgroup), | ||
| 1694 | ("col", self.startTagCol), | ||
| 1695 | (("tbody", "tfoot", "thead"), self.startTagRowGroup), | ||
| 1696 | (("td", "th", "tr"), self.startTagImplyTbody), | ||
| 1697 | ("table", self.startTagTable), | ||
| 1698 | (("style", "script"), self.startTagStyleScript), | ||
| 1699 | ("input", self.startTagInput), | ||
| 1700 | ("form", self.startTagForm) | ||
| 1701 | ]) | ||
| 1702 | self.startTagHandler.default = self.startTagOther | ||
| 1703 | |||
| 1704 | self.endTagHandler = _utils.MethodDispatcher([ | ||
| 1705 | ("table", self.endTagTable), | ||
| 1706 | (("body", "caption", "col", "colgroup", "html", "tbody", "td", | ||
| 1707 | "tfoot", "th", "thead", "tr"), self.endTagIgnore) | ||
| 1708 | ]) | ||
| 1709 | self.endTagHandler.default = self.endTagOther | ||
| 1710 | |||
| 1711 | # helper methods | ||
| 1712 | def clearStackToTableContext(self): | ||
| 1713 | # "clear the stack back to a table context" | ||
| 1714 | while self.tree.openElements[-1].name not in ("table", "html"): | ||
| 1715 | # self.parser.parseError("unexpected-implied-end-tag-in-table", | ||
| 1716 | # {"name": self.tree.openElements[-1].name}) | ||
| 1717 | self.tree.openElements.pop() | ||
| 1718 | # When the current node is <html> it's an innerHTML case | ||
| 1719 | |||
| 1720 | # processing methods | ||
| 1721 | def processEOF(self): | ||
| 1722 | if self.tree.openElements[-1].name != "html": | ||
| 1723 | self.parser.parseError("eof-in-table") | ||
| 1724 | else: | ||
| 1725 | assert self.parser.innerHTML | ||
| 1726 | # Stop parsing | ||
| 1727 | |||
| 1728 | def processSpaceCharacters(self, token): | ||
| 1729 | originalPhase = self.parser.phase | ||
| 1730 | self.parser.phase = self.parser.phases["inTableText"] | ||
| 1731 | self.parser.phase.originalPhase = originalPhase | ||
| 1732 | self.parser.phase.processSpaceCharacters(token) | ||
| 1733 | |||
| 1734 | def processCharacters(self, token): | ||
| 1735 | originalPhase = self.parser.phase | ||
| 1736 | self.parser.phase = self.parser.phases["inTableText"] | ||
| 1737 | self.parser.phase.originalPhase = originalPhase | ||
| 1738 | self.parser.phase.processCharacters(token) | ||
| 1739 | |||
| 1740 | def insertText(self, token): | ||
| 1741 | # If we get here there must be at least one non-whitespace character | ||
| 1742 | # Do the table magic! | ||
| 1743 | self.tree.insertFromTable = True | ||
| 1744 | self.parser.phases["inBody"].processCharacters(token) | ||
| 1745 | self.tree.insertFromTable = False | ||
| 1746 | |||
| 1747 | def startTagCaption(self, token): | ||
| 1748 | self.clearStackToTableContext() | ||
| 1749 | self.tree.activeFormattingElements.append(Marker) | ||
| 1750 | self.tree.insertElement(token) | ||
| 1751 | self.parser.phase = self.parser.phases["inCaption"] | ||
| 1752 | |||
| 1753 | def startTagColgroup(self, token): | ||
| 1754 | self.clearStackToTableContext() | ||
| 1755 | self.tree.insertElement(token) | ||
| 1756 | self.parser.phase = self.parser.phases["inColumnGroup"] | ||
| 1757 | |||
| 1758 | def startTagCol(self, token): | ||
| 1759 | self.startTagColgroup(impliedTagToken("colgroup", "StartTag")) | ||
| 1760 | return token | ||
| 1761 | |||
| 1762 | def startTagRowGroup(self, token): | ||
| 1763 | self.clearStackToTableContext() | ||
| 1764 | self.tree.insertElement(token) | ||
| 1765 | self.parser.phase = self.parser.phases["inTableBody"] | ||
| 1766 | |||
| 1767 | def startTagImplyTbody(self, token): | ||
| 1768 | self.startTagRowGroup(impliedTagToken("tbody", "StartTag")) | ||
| 1769 | return token | ||
| 1770 | |||
| 1771 | def startTagTable(self, token): | ||
| 1772 | self.parser.parseError("unexpected-start-tag-implies-end-tag", | ||
| 1773 | {"startName": "table", "endName": "table"}) | ||
| 1774 | self.parser.phase.processEndTag(impliedTagToken("table")) | ||
| 1775 | if not self.parser.innerHTML: | ||
| 1776 | return token | ||
| 1777 | |||
| 1778 | def startTagStyleScript(self, token): | ||
| 1779 | return self.parser.phases["inHead"].processStartTag(token) | ||
| 1780 | |||
| 1781 | def startTagInput(self, token): | ||
| 1782 | if ("type" in token["data"] and | ||
| 1783 | token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): | ||
| 1784 | self.parser.parseError("unexpected-hidden-input-in-table") | ||
| 1785 | self.tree.insertElement(token) | ||
| 1786 | # XXX associate with form | ||
| 1787 | self.tree.openElements.pop() | ||
| 1788 | else: | ||
| 1789 | self.startTagOther(token) | ||
| 1790 | |||
| 1791 | def startTagForm(self, token): | ||
| 1792 | self.parser.parseError("unexpected-form-in-table") | ||
| 1793 | if self.tree.formPointer is None: | ||
| 1794 | self.tree.insertElement(token) | ||
| 1795 | self.tree.formPointer = self.tree.openElements[-1] | ||
| 1796 | self.tree.openElements.pop() | ||
| 1797 | |||
| 1798 | def startTagOther(self, token): | ||
| 1799 | self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]}) | ||
| 1800 | # Do the table magic! | ||
| 1801 | self.tree.insertFromTable = True | ||
| 1802 | self.parser.phases["inBody"].processStartTag(token) | ||
| 1803 | self.tree.insertFromTable = False | ||
| 1804 | |||
| 1805 | def endTagTable(self, token): | ||
| 1806 | if self.tree.elementInScope("table", variant="table"): | ||
| 1807 | self.tree.generateImpliedEndTags() | ||
| 1808 | if self.tree.openElements[-1].name != "table": | ||
| 1809 | self.parser.parseError("end-tag-too-early-named", | ||
| 1810 | {"gotName": "table", | ||
| 1811 | "expectedName": self.tree.openElements[-1].name}) | ||
| 1812 | while self.tree.openElements[-1].name != "table": | ||
| 1813 | self.tree.openElements.pop() | ||
| 1814 | self.tree.openElements.pop() | ||
| 1815 | self.parser.resetInsertionMode() | ||
| 1816 | else: | ||
| 1817 | # innerHTML case | ||
| 1818 | assert self.parser.innerHTML | ||
| 1819 | self.parser.parseError() | ||
| 1820 | |||
| 1821 | def endTagIgnore(self, token): | ||
| 1822 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
| 1823 | |||
| 1824 | def endTagOther(self, token): | ||
| 1825 | self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]}) | ||
| 1826 | # Do the table magic! | ||
| 1827 | self.tree.insertFromTable = True | ||
| 1828 | self.parser.phases["inBody"].processEndTag(token) | ||
| 1829 | self.tree.insertFromTable = False | ||
| 1830 | |||
| 1831 | class InTableTextPhase(Phase): | ||
| 1832 | def __init__(self, parser, tree): | ||
| 1833 | Phase.__init__(self, parser, tree) | ||
| 1834 | self.originalPhase = None | ||
| 1835 | self.characterTokens = [] | ||
| 1836 | |||
| 1837 | def flushCharacters(self): | ||
| 1838 | data = "".join([item["data"] for item in self.characterTokens]) | ||
| 1839 | if any([item not in spaceCharacters for item in data]): | ||
| 1840 | token = {"type": tokenTypes["Characters"], "data": data} | ||
| 1841 | self.parser.phases["inTable"].insertText(token) | ||
| 1842 | elif data: | ||
| 1843 | self.tree.insertText(data) | ||
| 1844 | self.characterTokens = [] | ||
| 1845 | |||
| 1846 | def processComment(self, token): | ||
| 1847 | self.flushCharacters() | ||
| 1848 | self.parser.phase = self.originalPhase | ||
| 1849 | return token | ||
| 1850 | |||
| 1851 | def processEOF(self): | ||
| 1852 | self.flushCharacters() | ||
| 1853 | self.parser.phase = self.originalPhase | ||
| 1854 | return True | ||
| 1855 | |||
| 1856 | def processCharacters(self, token): | ||
| 1857 | if token["data"] == "\u0000": | ||
| 1858 | return | ||
| 1859 | self.characterTokens.append(token) | ||
| 1860 | |||
| 1861 | def processSpaceCharacters(self, token): | ||
| 1862 | # pretty sure we should never reach here | ||
| 1863 | self.characterTokens.append(token) | ||
| 1864 | # assert False | ||
| 1865 | |||
| 1866 | def processStartTag(self, token): | ||
| 1867 | self.flushCharacters() | ||
| 1868 | self.parser.phase = self.originalPhase | ||
| 1869 | return token | ||
| 1870 | |||
| 1871 | def processEndTag(self, token): | ||
| 1872 | self.flushCharacters() | ||
| 1873 | self.parser.phase = self.originalPhase | ||
| 1874 | return token | ||
| 1875 | |||
| 1876 | class InCaptionPhase(Phase): | ||
| 1877 | # http://www.whatwg.org/specs/web-apps/current-work/#in-caption | ||
| 1878 | def __init__(self, parser, tree): | ||
| 1879 | Phase.__init__(self, parser, tree) | ||
| 1880 | |||
| 1881 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 1882 | ("html", self.startTagHtml), | ||
| 1883 | (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", | ||
| 1884 | "thead", "tr"), self.startTagTableElement) | ||
| 1885 | ]) | ||
| 1886 | self.startTagHandler.default = self.startTagOther | ||
| 1887 | |||
| 1888 | self.endTagHandler = _utils.MethodDispatcher([ | ||
| 1889 | ("caption", self.endTagCaption), | ||
| 1890 | ("table", self.endTagTable), | ||
| 1891 | (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", | ||
| 1892 | "thead", "tr"), self.endTagIgnore) | ||
| 1893 | ]) | ||
| 1894 | self.endTagHandler.default = self.endTagOther | ||
| 1895 | |||
| 1896 | def ignoreEndTagCaption(self): | ||
| 1897 | return not self.tree.elementInScope("caption", variant="table") | ||
| 1898 | |||
| 1899 | def processEOF(self): | ||
| 1900 | self.parser.phases["inBody"].processEOF() | ||
| 1901 | |||
| 1902 | def processCharacters(self, token): | ||
| 1903 | return self.parser.phases["inBody"].processCharacters(token) | ||
| 1904 | |||
| 1905 | def startTagTableElement(self, token): | ||
| 1906 | self.parser.parseError() | ||
| 1907 | # XXX Have to duplicate logic here to find out if the tag is ignored | ||
| 1908 | ignoreEndTag = self.ignoreEndTagCaption() | ||
| 1909 | self.parser.phase.processEndTag(impliedTagToken("caption")) | ||
| 1910 | if not ignoreEndTag: | ||
| 1911 | return token | ||
| 1912 | |||
| 1913 | def startTagOther(self, token): | ||
| 1914 | return self.parser.phases["inBody"].processStartTag(token) | ||
| 1915 | |||
| 1916 | def endTagCaption(self, token): | ||
| 1917 | if not self.ignoreEndTagCaption(): | ||
| 1918 | # AT this code is quite similar to endTagTable in "InTable" | ||
| 1919 | self.tree.generateImpliedEndTags() | ||
| 1920 | if self.tree.openElements[-1].name != "caption": | ||
| 1921 | self.parser.parseError("expected-one-end-tag-but-got-another", | ||
| 1922 | {"gotName": "caption", | ||
| 1923 | "expectedName": self.tree.openElements[-1].name}) | ||
| 1924 | while self.tree.openElements[-1].name != "caption": | ||
| 1925 | self.tree.openElements.pop() | ||
| 1926 | self.tree.openElements.pop() | ||
| 1927 | self.tree.clearActiveFormattingElements() | ||
| 1928 | self.parser.phase = self.parser.phases["inTable"] | ||
| 1929 | else: | ||
| 1930 | # innerHTML case | ||
| 1931 | assert self.parser.innerHTML | ||
| 1932 | self.parser.parseError() | ||
| 1933 | |||
| 1934 | def endTagTable(self, token): | ||
| 1935 | self.parser.parseError() | ||
| 1936 | ignoreEndTag = self.ignoreEndTagCaption() | ||
| 1937 | self.parser.phase.processEndTag(impliedTagToken("caption")) | ||
| 1938 | if not ignoreEndTag: | ||
| 1939 | return token | ||
| 1940 | |||
| 1941 | def endTagIgnore(self, token): | ||
| 1942 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
| 1943 | |||
| 1944 | def endTagOther(self, token): | ||
| 1945 | return self.parser.phases["inBody"].processEndTag(token) | ||
| 1946 | |||
| 1947 | class InColumnGroupPhase(Phase): | ||
| 1948 | # http://www.whatwg.org/specs/web-apps/current-work/#in-column | ||
| 1949 | |||
| 1950 | def __init__(self, parser, tree): | ||
| 1951 | Phase.__init__(self, parser, tree) | ||
| 1952 | |||
| 1953 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 1954 | ("html", self.startTagHtml), | ||
| 1955 | ("col", self.startTagCol) | ||
| 1956 | ]) | ||
| 1957 | self.startTagHandler.default = self.startTagOther | ||
| 1958 | |||
| 1959 | self.endTagHandler = _utils.MethodDispatcher([ | ||
| 1960 | ("colgroup", self.endTagColgroup), | ||
| 1961 | ("col", self.endTagCol) | ||
| 1962 | ]) | ||
| 1963 | self.endTagHandler.default = self.endTagOther | ||
| 1964 | |||
| 1965 | def ignoreEndTagColgroup(self): | ||
| 1966 | return self.tree.openElements[-1].name == "html" | ||
| 1967 | |||
| 1968 | def processEOF(self): | ||
| 1969 | if self.tree.openElements[-1].name == "html": | ||
| 1970 | assert self.parser.innerHTML | ||
| 1971 | return | ||
| 1972 | else: | ||
| 1973 | ignoreEndTag = self.ignoreEndTagColgroup() | ||
| 1974 | self.endTagColgroup(impliedTagToken("colgroup")) | ||
| 1975 | if not ignoreEndTag: | ||
| 1976 | return True | ||
| 1977 | |||
| 1978 | def processCharacters(self, token): | ||
| 1979 | ignoreEndTag = self.ignoreEndTagColgroup() | ||
| 1980 | self.endTagColgroup(impliedTagToken("colgroup")) | ||
| 1981 | if not ignoreEndTag: | ||
| 1982 | return token | ||
| 1983 | |||
| 1984 | def startTagCol(self, token): | ||
| 1985 | self.tree.insertElement(token) | ||
| 1986 | self.tree.openElements.pop() | ||
| 1987 | token["selfClosingAcknowledged"] = True | ||
| 1988 | |||
| 1989 | def startTagOther(self, token): | ||
| 1990 | ignoreEndTag = self.ignoreEndTagColgroup() | ||
| 1991 | self.endTagColgroup(impliedTagToken("colgroup")) | ||
| 1992 | if not ignoreEndTag: | ||
| 1993 | return token | ||
| 1994 | |||
| 1995 | def endTagColgroup(self, token): | ||
| 1996 | if self.ignoreEndTagColgroup(): | ||
| 1997 | # innerHTML case | ||
| 1998 | assert self.parser.innerHTML | ||
| 1999 | self.parser.parseError() | ||
| 2000 | else: | ||
| 2001 | self.tree.openElements.pop() | ||
| 2002 | self.parser.phase = self.parser.phases["inTable"] | ||
| 2003 | |||
| 2004 | def endTagCol(self, token): | ||
| 2005 | self.parser.parseError("no-end-tag", {"name": "col"}) | ||
| 2006 | |||
| 2007 | def endTagOther(self, token): | ||
| 2008 | ignoreEndTag = self.ignoreEndTagColgroup() | ||
| 2009 | self.endTagColgroup(impliedTagToken("colgroup")) | ||
| 2010 | if not ignoreEndTag: | ||
| 2011 | return token | ||
| 2012 | |||
| 2013 | class InTableBodyPhase(Phase): | ||
| 2014 | # http://www.whatwg.org/specs/web-apps/current-work/#in-table0 | ||
| 2015 | def __init__(self, parser, tree): | ||
| 2016 | Phase.__init__(self, parser, tree) | ||
| 2017 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 2018 | ("html", self.startTagHtml), | ||
| 2019 | ("tr", self.startTagTr), | ||
| 2020 | (("td", "th"), self.startTagTableCell), | ||
| 2021 | (("caption", "col", "colgroup", "tbody", "tfoot", "thead"), | ||
| 2022 | self.startTagTableOther) | ||
| 2023 | ]) | ||
| 2024 | self.startTagHandler.default = self.startTagOther | ||
| 2025 | |||
| 2026 | self.endTagHandler = _utils.MethodDispatcher([ | ||
| 2027 | (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), | ||
| 2028 | ("table", self.endTagTable), | ||
| 2029 | (("body", "caption", "col", "colgroup", "html", "td", "th", | ||
| 2030 | "tr"), self.endTagIgnore) | ||
| 2031 | ]) | ||
| 2032 | self.endTagHandler.default = self.endTagOther | ||
| 2033 | |||
| 2034 | # helper methods | ||
| 2035 | def clearStackToTableBodyContext(self): | ||
| 2036 | while self.tree.openElements[-1].name not in ("tbody", "tfoot", | ||
| 2037 | "thead", "html"): | ||
| 2038 | # self.parser.parseError("unexpected-implied-end-tag-in-table", | ||
| 2039 | # {"name": self.tree.openElements[-1].name}) | ||
| 2040 | self.tree.openElements.pop() | ||
| 2041 | if self.tree.openElements[-1].name == "html": | ||
| 2042 | assert self.parser.innerHTML | ||
| 2043 | |||
| 2044 | # the rest | ||
| 2045 | def processEOF(self): | ||
| 2046 | self.parser.phases["inTable"].processEOF() | ||
| 2047 | |||
| 2048 | def processSpaceCharacters(self, token): | ||
| 2049 | return self.parser.phases["inTable"].processSpaceCharacters(token) | ||
| 2050 | |||
| 2051 | def processCharacters(self, token): | ||
| 2052 | return self.parser.phases["inTable"].processCharacters(token) | ||
| 2053 | |||
| 2054 | def startTagTr(self, token): | ||
| 2055 | self.clearStackToTableBodyContext() | ||
| 2056 | self.tree.insertElement(token) | ||
| 2057 | self.parser.phase = self.parser.phases["inRow"] | ||
| 2058 | |||
| 2059 | def startTagTableCell(self, token): | ||
| 2060 | self.parser.parseError("unexpected-cell-in-table-body", | ||
| 2061 | {"name": token["name"]}) | ||
| 2062 | self.startTagTr(impliedTagToken("tr", "StartTag")) | ||
| 2063 | return token | ||
| 2064 | |||
| 2065 | def startTagTableOther(self, token): | ||
| 2066 | # XXX AT Any ideas on how to share this with endTagTable? | ||
| 2067 | if (self.tree.elementInScope("tbody", variant="table") or | ||
| 2068 | self.tree.elementInScope("thead", variant="table") or | ||
| 2069 | self.tree.elementInScope("tfoot", variant="table")): | ||
| 2070 | self.clearStackToTableBodyContext() | ||
| 2071 | self.endTagTableRowGroup( | ||
| 2072 | impliedTagToken(self.tree.openElements[-1].name)) | ||
| 2073 | return token | ||
| 2074 | else: | ||
| 2075 | # innerHTML case | ||
| 2076 | assert self.parser.innerHTML | ||
| 2077 | self.parser.parseError() | ||
| 2078 | |||
| 2079 | def startTagOther(self, token): | ||
| 2080 | return self.parser.phases["inTable"].processStartTag(token) | ||
| 2081 | |||
| 2082 | def endTagTableRowGroup(self, token): | ||
| 2083 | if self.tree.elementInScope(token["name"], variant="table"): | ||
| 2084 | self.clearStackToTableBodyContext() | ||
| 2085 | self.tree.openElements.pop() | ||
| 2086 | self.parser.phase = self.parser.phases["inTable"] | ||
| 2087 | else: | ||
| 2088 | self.parser.parseError("unexpected-end-tag-in-table-body", | ||
| 2089 | {"name": token["name"]}) | ||
| 2090 | |||
| 2091 | def endTagTable(self, token): | ||
| 2092 | if (self.tree.elementInScope("tbody", variant="table") or | ||
| 2093 | self.tree.elementInScope("thead", variant="table") or | ||
| 2094 | self.tree.elementInScope("tfoot", variant="table")): | ||
| 2095 | self.clearStackToTableBodyContext() | ||
| 2096 | self.endTagTableRowGroup( | ||
| 2097 | impliedTagToken(self.tree.openElements[-1].name)) | ||
| 2098 | return token | ||
| 2099 | else: | ||
| 2100 | # innerHTML case | ||
| 2101 | assert self.parser.innerHTML | ||
| 2102 | self.parser.parseError() | ||
| 2103 | |||
| 2104 | def endTagIgnore(self, token): | ||
| 2105 | self.parser.parseError("unexpected-end-tag-in-table-body", | ||
| 2106 | {"name": token["name"]}) | ||
| 2107 | |||
| 2108 | def endTagOther(self, token): | ||
| 2109 | return self.parser.phases["inTable"].processEndTag(token) | ||
| 2110 | |||
| 2111 | class InRowPhase(Phase): | ||
| 2112 | # http://www.whatwg.org/specs/web-apps/current-work/#in-row | ||
| 2113 | def __init__(self, parser, tree): | ||
| 2114 | Phase.__init__(self, parser, tree) | ||
| 2115 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 2116 | ("html", self.startTagHtml), | ||
| 2117 | (("td", "th"), self.startTagTableCell), | ||
| 2118 | (("caption", "col", "colgroup", "tbody", "tfoot", "thead", | ||
| 2119 | "tr"), self.startTagTableOther) | ||
| 2120 | ]) | ||
| 2121 | self.startTagHandler.default = self.startTagOther | ||
| 2122 | |||
| 2123 | self.endTagHandler = _utils.MethodDispatcher([ | ||
| 2124 | ("tr", self.endTagTr), | ||
| 2125 | ("table", self.endTagTable), | ||
| 2126 | (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), | ||
| 2127 | (("body", "caption", "col", "colgroup", "html", "td", "th"), | ||
| 2128 | self.endTagIgnore) | ||
| 2129 | ]) | ||
| 2130 | self.endTagHandler.default = self.endTagOther | ||
| 2131 | |||
| 2132 | # helper methods (XXX unify this with other table helper methods) | ||
| 2133 | def clearStackToTableRowContext(self): | ||
| 2134 | while self.tree.openElements[-1].name not in ("tr", "html"): | ||
| 2135 | self.parser.parseError("unexpected-implied-end-tag-in-table-row", | ||
| 2136 | {"name": self.tree.openElements[-1].name}) | ||
| 2137 | self.tree.openElements.pop() | ||
| 2138 | |||
| 2139 | def ignoreEndTagTr(self): | ||
| 2140 | return not self.tree.elementInScope("tr", variant="table") | ||
| 2141 | |||
| 2142 | # the rest | ||
| 2143 | def processEOF(self): | ||
| 2144 | self.parser.phases["inTable"].processEOF() | ||
| 2145 | |||
| 2146 | def processSpaceCharacters(self, token): | ||
| 2147 | return self.parser.phases["inTable"].processSpaceCharacters(token) | ||
| 2148 | |||
| 2149 | def processCharacters(self, token): | ||
| 2150 | return self.parser.phases["inTable"].processCharacters(token) | ||
| 2151 | |||
| 2152 | def startTagTableCell(self, token): | ||
| 2153 | self.clearStackToTableRowContext() | ||
| 2154 | self.tree.insertElement(token) | ||
| 2155 | self.parser.phase = self.parser.phases["inCell"] | ||
| 2156 | self.tree.activeFormattingElements.append(Marker) | ||
| 2157 | |||
| 2158 | def startTagTableOther(self, token): | ||
| 2159 | ignoreEndTag = self.ignoreEndTagTr() | ||
| 2160 | self.endTagTr(impliedTagToken("tr")) | ||
| 2161 | # XXX how are we sure it's always ignored in the innerHTML case? | ||
| 2162 | if not ignoreEndTag: | ||
| 2163 | return token | ||
| 2164 | |||
| 2165 | def startTagOther(self, token): | ||
| 2166 | return self.parser.phases["inTable"].processStartTag(token) | ||
| 2167 | |||
| 2168 | def endTagTr(self, token): | ||
| 2169 | if not self.ignoreEndTagTr(): | ||
| 2170 | self.clearStackToTableRowContext() | ||
| 2171 | self.tree.openElements.pop() | ||
| 2172 | self.parser.phase = self.parser.phases["inTableBody"] | ||
| 2173 | else: | ||
| 2174 | # innerHTML case | ||
| 2175 | assert self.parser.innerHTML | ||
| 2176 | self.parser.parseError() | ||
| 2177 | |||
| 2178 | def endTagTable(self, token): | ||
| 2179 | ignoreEndTag = self.ignoreEndTagTr() | ||
| 2180 | self.endTagTr(impliedTagToken("tr")) | ||
| 2181 | # Reprocess the current tag if the tr end tag was not ignored | ||
| 2182 | # XXX how are we sure it's always ignored in the innerHTML case? | ||
| 2183 | if not ignoreEndTag: | ||
| 2184 | return token | ||
| 2185 | |||
| 2186 | def endTagTableRowGroup(self, token): | ||
| 2187 | if self.tree.elementInScope(token["name"], variant="table"): | ||
| 2188 | self.endTagTr(impliedTagToken("tr")) | ||
| 2189 | return token | ||
| 2190 | else: | ||
| 2191 | self.parser.parseError() | ||
| 2192 | |||
| 2193 | def endTagIgnore(self, token): | ||
| 2194 | self.parser.parseError("unexpected-end-tag-in-table-row", | ||
| 2195 | {"name": token["name"]}) | ||
| 2196 | |||
| 2197 | def endTagOther(self, token): | ||
| 2198 | return self.parser.phases["inTable"].processEndTag(token) | ||
| 2199 | |||
| 2200 | class InCellPhase(Phase): | ||
| 2201 | # http://www.whatwg.org/specs/web-apps/current-work/#in-cell | ||
| 2202 | def __init__(self, parser, tree): | ||
| 2203 | Phase.__init__(self, parser, tree) | ||
| 2204 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 2205 | ("html", self.startTagHtml), | ||
| 2206 | (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", | ||
| 2207 | "thead", "tr"), self.startTagTableOther) | ||
| 2208 | ]) | ||
| 2209 | self.startTagHandler.default = self.startTagOther | ||
| 2210 | |||
| 2211 | self.endTagHandler = _utils.MethodDispatcher([ | ||
| 2212 | (("td", "th"), self.endTagTableCell), | ||
| 2213 | (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore), | ||
| 2214 | (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply) | ||
| 2215 | ]) | ||
| 2216 | self.endTagHandler.default = self.endTagOther | ||
| 2217 | |||
| 2218 | # helper | ||
| 2219 | def closeCell(self): | ||
| 2220 | if self.tree.elementInScope("td", variant="table"): | ||
| 2221 | self.endTagTableCell(impliedTagToken("td")) | ||
| 2222 | elif self.tree.elementInScope("th", variant="table"): | ||
| 2223 | self.endTagTableCell(impliedTagToken("th")) | ||
| 2224 | |||
| 2225 | # the rest | ||
| 2226 | def processEOF(self): | ||
| 2227 | self.parser.phases["inBody"].processEOF() | ||
| 2228 | |||
| 2229 | def processCharacters(self, token): | ||
| 2230 | return self.parser.phases["inBody"].processCharacters(token) | ||
| 2231 | |||
| 2232 | def startTagTableOther(self, token): | ||
| 2233 | if (self.tree.elementInScope("td", variant="table") or | ||
| 2234 | self.tree.elementInScope("th", variant="table")): | ||
| 2235 | self.closeCell() | ||
| 2236 | return token | ||
| 2237 | else: | ||
| 2238 | # innerHTML case | ||
| 2239 | assert self.parser.innerHTML | ||
| 2240 | self.parser.parseError() | ||
| 2241 | |||
| 2242 | def startTagOther(self, token): | ||
| 2243 | return self.parser.phases["inBody"].processStartTag(token) | ||
| 2244 | |||
| 2245 | def endTagTableCell(self, token): | ||
| 2246 | if self.tree.elementInScope(token["name"], variant="table"): | ||
| 2247 | self.tree.generateImpliedEndTags(token["name"]) | ||
| 2248 | if self.tree.openElements[-1].name != token["name"]: | ||
| 2249 | self.parser.parseError("unexpected-cell-end-tag", | ||
| 2250 | {"name": token["name"]}) | ||
| 2251 | while True: | ||
| 2252 | node = self.tree.openElements.pop() | ||
| 2253 | if node.name == token["name"]: | ||
| 2254 | break | ||
| 2255 | else: | ||
| 2256 | self.tree.openElements.pop() | ||
| 2257 | self.tree.clearActiveFormattingElements() | ||
| 2258 | self.parser.phase = self.parser.phases["inRow"] | ||
| 2259 | else: | ||
| 2260 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
| 2261 | |||
| 2262 | def endTagIgnore(self, token): | ||
| 2263 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
| 2264 | |||
| 2265 | def endTagImply(self, token): | ||
| 2266 | if self.tree.elementInScope(token["name"], variant="table"): | ||
| 2267 | self.closeCell() | ||
| 2268 | return token | ||
| 2269 | else: | ||
| 2270 | # sometimes innerHTML case | ||
| 2271 | self.parser.parseError() | ||
| 2272 | |||
| 2273 | def endTagOther(self, token): | ||
| 2274 | return self.parser.phases["inBody"].processEndTag(token) | ||
| 2275 | |||
| 2276 | class InSelectPhase(Phase): | ||
| 2277 | def __init__(self, parser, tree): | ||
| 2278 | Phase.__init__(self, parser, tree) | ||
| 2279 | |||
| 2280 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 2281 | ("html", self.startTagHtml), | ||
| 2282 | ("option", self.startTagOption), | ||
| 2283 | ("optgroup", self.startTagOptgroup), | ||
| 2284 | ("select", self.startTagSelect), | ||
| 2285 | (("input", "keygen", "textarea"), self.startTagInput), | ||
| 2286 | ("script", self.startTagScript) | ||
| 2287 | ]) | ||
| 2288 | self.startTagHandler.default = self.startTagOther | ||
| 2289 | |||
| 2290 | self.endTagHandler = _utils.MethodDispatcher([ | ||
| 2291 | ("option", self.endTagOption), | ||
| 2292 | ("optgroup", self.endTagOptgroup), | ||
| 2293 | ("select", self.endTagSelect) | ||
| 2294 | ]) | ||
| 2295 | self.endTagHandler.default = self.endTagOther | ||
| 2296 | |||
| 2297 | # http://www.whatwg.org/specs/web-apps/current-work/#in-select | ||
| 2298 | def processEOF(self): | ||
| 2299 | if self.tree.openElements[-1].name != "html": | ||
| 2300 | self.parser.parseError("eof-in-select") | ||
| 2301 | else: | ||
| 2302 | assert self.parser.innerHTML | ||
| 2303 | |||
| 2304 | def processCharacters(self, token): | ||
| 2305 | if token["data"] == "\u0000": | ||
| 2306 | return | ||
| 2307 | self.tree.insertText(token["data"]) | ||
| 2308 | |||
| 2309 | def startTagOption(self, token): | ||
| 2310 | # We need to imply </option> if <option> is the current node. | ||
| 2311 | if self.tree.openElements[-1].name == "option": | ||
| 2312 | self.tree.openElements.pop() | ||
| 2313 | self.tree.insertElement(token) | ||
| 2314 | |||
| 2315 | def startTagOptgroup(self, token): | ||
| 2316 | if self.tree.openElements[-1].name == "option": | ||
| 2317 | self.tree.openElements.pop() | ||
| 2318 | if self.tree.openElements[-1].name == "optgroup": | ||
| 2319 | self.tree.openElements.pop() | ||
| 2320 | self.tree.insertElement(token) | ||
| 2321 | |||
| 2322 | def startTagSelect(self, token): | ||
| 2323 | self.parser.parseError("unexpected-select-in-select") | ||
| 2324 | self.endTagSelect(impliedTagToken("select")) | ||
| 2325 | |||
| 2326 | def startTagInput(self, token): | ||
| 2327 | self.parser.parseError("unexpected-input-in-select") | ||
| 2328 | if self.tree.elementInScope("select", variant="select"): | ||
| 2329 | self.endTagSelect(impliedTagToken("select")) | ||
| 2330 | return token | ||
| 2331 | else: | ||
| 2332 | assert self.parser.innerHTML | ||
| 2333 | |||
| 2334 | def startTagScript(self, token): | ||
| 2335 | return self.parser.phases["inHead"].processStartTag(token) | ||
| 2336 | |||
| 2337 | def startTagOther(self, token): | ||
| 2338 | self.parser.parseError("unexpected-start-tag-in-select", | ||
| 2339 | {"name": token["name"]}) | ||
| 2340 | |||
| 2341 | def endTagOption(self, token): | ||
| 2342 | if self.tree.openElements[-1].name == "option": | ||
| 2343 | self.tree.openElements.pop() | ||
| 2344 | else: | ||
| 2345 | self.parser.parseError("unexpected-end-tag-in-select", | ||
| 2346 | {"name": "option"}) | ||
| 2347 | |||
| 2348 | def endTagOptgroup(self, token): | ||
| 2349 | # </optgroup> implicitly closes <option> | ||
| 2350 | if (self.tree.openElements[-1].name == "option" and | ||
| 2351 | self.tree.openElements[-2].name == "optgroup"): | ||
| 2352 | self.tree.openElements.pop() | ||
| 2353 | # It also closes </optgroup> | ||
| 2354 | if self.tree.openElements[-1].name == "optgroup": | ||
| 2355 | self.tree.openElements.pop() | ||
| 2356 | # But nothing else | ||
| 2357 | else: | ||
| 2358 | self.parser.parseError("unexpected-end-tag-in-select", | ||
| 2359 | {"name": "optgroup"}) | ||
| 2360 | |||
| 2361 | def endTagSelect(self, token): | ||
| 2362 | if self.tree.elementInScope("select", variant="select"): | ||
| 2363 | node = self.tree.openElements.pop() | ||
| 2364 | while node.name != "select": | ||
| 2365 | node = self.tree.openElements.pop() | ||
| 2366 | self.parser.resetInsertionMode() | ||
| 2367 | else: | ||
| 2368 | # innerHTML case | ||
| 2369 | assert self.parser.innerHTML | ||
| 2370 | self.parser.parseError() | ||
| 2371 | |||
| 2372 | def endTagOther(self, token): | ||
| 2373 | self.parser.parseError("unexpected-end-tag-in-select", | ||
| 2374 | {"name": token["name"]}) | ||
| 2375 | |||
| 2376 | class InSelectInTablePhase(Phase): | ||
| 2377 | def __init__(self, parser, tree): | ||
| 2378 | Phase.__init__(self, parser, tree) | ||
| 2379 | |||
| 2380 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 2381 | (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), | ||
| 2382 | self.startTagTable) | ||
| 2383 | ]) | ||
| 2384 | self.startTagHandler.default = self.startTagOther | ||
| 2385 | |||
| 2386 | self.endTagHandler = _utils.MethodDispatcher([ | ||
| 2387 | (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), | ||
| 2388 | self.endTagTable) | ||
| 2389 | ]) | ||
| 2390 | self.endTagHandler.default = self.endTagOther | ||
| 2391 | |||
| 2392 | def processEOF(self): | ||
| 2393 | self.parser.phases["inSelect"].processEOF() | ||
| 2394 | |||
| 2395 | def processCharacters(self, token): | ||
| 2396 | return self.parser.phases["inSelect"].processCharacters(token) | ||
| 2397 | |||
| 2398 | def startTagTable(self, token): | ||
| 2399 | self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]}) | ||
| 2400 | self.endTagOther(impliedTagToken("select")) | ||
| 2401 | return token | ||
| 2402 | |||
| 2403 | def startTagOther(self, token): | ||
| 2404 | return self.parser.phases["inSelect"].processStartTag(token) | ||
| 2405 | |||
| 2406 | def endTagTable(self, token): | ||
| 2407 | self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]}) | ||
| 2408 | if self.tree.elementInScope(token["name"], variant="table"): | ||
| 2409 | self.endTagOther(impliedTagToken("select")) | ||
| 2410 | return token | ||
| 2411 | |||
| 2412 | def endTagOther(self, token): | ||
| 2413 | return self.parser.phases["inSelect"].processEndTag(token) | ||
| 2414 | |||
| 2415 | class InForeignContentPhase(Phase): | ||
| 2416 | breakoutElements = frozenset(["b", "big", "blockquote", "body", "br", | ||
| 2417 | "center", "code", "dd", "div", "dl", "dt", | ||
| 2418 | "em", "embed", "h1", "h2", "h3", | ||
| 2419 | "h4", "h5", "h6", "head", "hr", "i", "img", | ||
| 2420 | "li", "listing", "menu", "meta", "nobr", | ||
| 2421 | "ol", "p", "pre", "ruby", "s", "small", | ||
| 2422 | "span", "strong", "strike", "sub", "sup", | ||
| 2423 | "table", "tt", "u", "ul", "var"]) | ||
| 2424 | |||
| 2425 | def __init__(self, parser, tree): | ||
| 2426 | Phase.__init__(self, parser, tree) | ||
| 2427 | |||
| 2428 | def adjustSVGTagNames(self, token): | ||
| 2429 | replacements = {"altglyph": "altGlyph", | ||
| 2430 | "altglyphdef": "altGlyphDef", | ||
| 2431 | "altglyphitem": "altGlyphItem", | ||
| 2432 | "animatecolor": "animateColor", | ||
| 2433 | "animatemotion": "animateMotion", | ||
| 2434 | "animatetransform": "animateTransform", | ||
| 2435 | "clippath": "clipPath", | ||
| 2436 | "feblend": "feBlend", | ||
| 2437 | "fecolormatrix": "feColorMatrix", | ||
| 2438 | "fecomponenttransfer": "feComponentTransfer", | ||
| 2439 | "fecomposite": "feComposite", | ||
| 2440 | "feconvolvematrix": "feConvolveMatrix", | ||
| 2441 | "fediffuselighting": "feDiffuseLighting", | ||
| 2442 | "fedisplacementmap": "feDisplacementMap", | ||
| 2443 | "fedistantlight": "feDistantLight", | ||
| 2444 | "feflood": "feFlood", | ||
| 2445 | "fefunca": "feFuncA", | ||
| 2446 | "fefuncb": "feFuncB", | ||
| 2447 | "fefuncg": "feFuncG", | ||
| 2448 | "fefuncr": "feFuncR", | ||
| 2449 | "fegaussianblur": "feGaussianBlur", | ||
| 2450 | "feimage": "feImage", | ||
| 2451 | "femerge": "feMerge", | ||
| 2452 | "femergenode": "feMergeNode", | ||
| 2453 | "femorphology": "feMorphology", | ||
| 2454 | "feoffset": "feOffset", | ||
| 2455 | "fepointlight": "fePointLight", | ||
| 2456 | "fespecularlighting": "feSpecularLighting", | ||
| 2457 | "fespotlight": "feSpotLight", | ||
| 2458 | "fetile": "feTile", | ||
| 2459 | "feturbulence": "feTurbulence", | ||
| 2460 | "foreignobject": "foreignObject", | ||
| 2461 | "glyphref": "glyphRef", | ||
| 2462 | "lineargradient": "linearGradient", | ||
| 2463 | "radialgradient": "radialGradient", | ||
| 2464 | "textpath": "textPath"} | ||
| 2465 | |||
| 2466 | if token["name"] in replacements: | ||
| 2467 | token["name"] = replacements[token["name"]] | ||
| 2468 | |||
| 2469 | def processCharacters(self, token): | ||
| 2470 | if token["data"] == "\u0000": | ||
| 2471 | token["data"] = "\uFFFD" | ||
| 2472 | elif (self.parser.framesetOK and | ||
| 2473 | any(char not in spaceCharacters for char in token["data"])): | ||
| 2474 | self.parser.framesetOK = False | ||
| 2475 | Phase.processCharacters(self, token) | ||
| 2476 | |||
| 2477 | def processStartTag(self, token): | ||
| 2478 | currentNode = self.tree.openElements[-1] | ||
| 2479 | if (token["name"] in self.breakoutElements or | ||
| 2480 | (token["name"] == "font" and | ||
| 2481 | set(token["data"].keys()) & set(["color", "face", "size"]))): | ||
| 2482 | self.parser.parseError("unexpected-html-element-in-foreign-content", | ||
| 2483 | {"name": token["name"]}) | ||
| 2484 | while (self.tree.openElements[-1].namespace != | ||
| 2485 | self.tree.defaultNamespace and | ||
| 2486 | not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and | ||
| 2487 | not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])): | ||
| 2488 | self.tree.openElements.pop() | ||
| 2489 | return token | ||
| 2490 | |||
| 2491 | else: | ||
| 2492 | if currentNode.namespace == namespaces["mathml"]: | ||
| 2493 | self.parser.adjustMathMLAttributes(token) | ||
| 2494 | elif currentNode.namespace == namespaces["svg"]: | ||
| 2495 | self.adjustSVGTagNames(token) | ||
| 2496 | self.parser.adjustSVGAttributes(token) | ||
| 2497 | self.parser.adjustForeignAttributes(token) | ||
| 2498 | token["namespace"] = currentNode.namespace | ||
| 2499 | self.tree.insertElement(token) | ||
| 2500 | if token["selfClosing"]: | ||
| 2501 | self.tree.openElements.pop() | ||
| 2502 | token["selfClosingAcknowledged"] = True | ||
| 2503 | |||
| 2504 | def processEndTag(self, token): | ||
| 2505 | nodeIndex = len(self.tree.openElements) - 1 | ||
| 2506 | node = self.tree.openElements[-1] | ||
| 2507 | if node.name.translate(asciiUpper2Lower) != token["name"]: | ||
| 2508 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
| 2509 | |||
| 2510 | while True: | ||
| 2511 | if node.name.translate(asciiUpper2Lower) == token["name"]: | ||
| 2512 | # XXX this isn't in the spec but it seems necessary | ||
| 2513 | if self.parser.phase == self.parser.phases["inTableText"]: | ||
| 2514 | self.parser.phase.flushCharacters() | ||
| 2515 | self.parser.phase = self.parser.phase.originalPhase | ||
| 2516 | while self.tree.openElements.pop() != node: | ||
| 2517 | assert self.tree.openElements | ||
| 2518 | new_token = None | ||
| 2519 | break | ||
| 2520 | nodeIndex -= 1 | ||
| 2521 | |||
| 2522 | node = self.tree.openElements[nodeIndex] | ||
| 2523 | if node.namespace != self.tree.defaultNamespace: | ||
| 2524 | continue | ||
| 2525 | else: | ||
| 2526 | new_token = self.parser.phase.processEndTag(token) | ||
| 2527 | break | ||
| 2528 | return new_token | ||
| 2529 | |||
| 2530 | class AfterBodyPhase(Phase): | ||
| 2531 | def __init__(self, parser, tree): | ||
| 2532 | Phase.__init__(self, parser, tree) | ||
| 2533 | |||
| 2534 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 2535 | ("html", self.startTagHtml) | ||
| 2536 | ]) | ||
| 2537 | self.startTagHandler.default = self.startTagOther | ||
| 2538 | |||
| 2539 | self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)]) | ||
| 2540 | self.endTagHandler.default = self.endTagOther | ||
| 2541 | |||
| 2542 | def processEOF(self): | ||
| 2543 | # Stop parsing | ||
| 2544 | pass | ||
| 2545 | |||
| 2546 | def processComment(self, token): | ||
| 2547 | # This is needed because data is to be appended to the <html> element | ||
| 2548 | # here and not to whatever is currently open. | ||
| 2549 | self.tree.insertComment(token, self.tree.openElements[0]) | ||
| 2550 | |||
| 2551 | def processCharacters(self, token): | ||
| 2552 | self.parser.parseError("unexpected-char-after-body") | ||
| 2553 | self.parser.phase = self.parser.phases["inBody"] | ||
| 2554 | return token | ||
| 2555 | |||
| 2556 | def startTagHtml(self, token): | ||
| 2557 | return self.parser.phases["inBody"].processStartTag(token) | ||
| 2558 | |||
| 2559 | def startTagOther(self, token): | ||
| 2560 | self.parser.parseError("unexpected-start-tag-after-body", | ||
| 2561 | {"name": token["name"]}) | ||
| 2562 | self.parser.phase = self.parser.phases["inBody"] | ||
| 2563 | return token | ||
| 2564 | |||
| 2565 | def endTagHtml(self, name): | ||
| 2566 | if self.parser.innerHTML: | ||
| 2567 | self.parser.parseError("unexpected-end-tag-after-body-innerhtml") | ||
| 2568 | else: | ||
| 2569 | self.parser.phase = self.parser.phases["afterAfterBody"] | ||
| 2570 | |||
| 2571 | def endTagOther(self, token): | ||
| 2572 | self.parser.parseError("unexpected-end-tag-after-body", | ||
| 2573 | {"name": token["name"]}) | ||
| 2574 | self.parser.phase = self.parser.phases["inBody"] | ||
| 2575 | return token | ||
| 2576 | |||
| 2577 | class InFramesetPhase(Phase): | ||
| 2578 | # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset | ||
| 2579 | def __init__(self, parser, tree): | ||
| 2580 | Phase.__init__(self, parser, tree) | ||
| 2581 | |||
| 2582 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 2583 | ("html", self.startTagHtml), | ||
| 2584 | ("frameset", self.startTagFrameset), | ||
| 2585 | ("frame", self.startTagFrame), | ||
| 2586 | ("noframes", self.startTagNoframes) | ||
| 2587 | ]) | ||
| 2588 | self.startTagHandler.default = self.startTagOther | ||
| 2589 | |||
| 2590 | self.endTagHandler = _utils.MethodDispatcher([ | ||
| 2591 | ("frameset", self.endTagFrameset) | ||
| 2592 | ]) | ||
| 2593 | self.endTagHandler.default = self.endTagOther | ||
| 2594 | |||
| 2595 | def processEOF(self): | ||
| 2596 | if self.tree.openElements[-1].name != "html": | ||
| 2597 | self.parser.parseError("eof-in-frameset") | ||
| 2598 | else: | ||
| 2599 | assert self.parser.innerHTML | ||
| 2600 | |||
| 2601 | def processCharacters(self, token): | ||
| 2602 | self.parser.parseError("unexpected-char-in-frameset") | ||
| 2603 | |||
| 2604 | def startTagFrameset(self, token): | ||
| 2605 | self.tree.insertElement(token) | ||
| 2606 | |||
| 2607 | def startTagFrame(self, token): | ||
| 2608 | self.tree.insertElement(token) | ||
| 2609 | self.tree.openElements.pop() | ||
| 2610 | |||
| 2611 | def startTagNoframes(self, token): | ||
| 2612 | return self.parser.phases["inBody"].processStartTag(token) | ||
| 2613 | |||
| 2614 | def startTagOther(self, token): | ||
| 2615 | self.parser.parseError("unexpected-start-tag-in-frameset", | ||
| 2616 | {"name": token["name"]}) | ||
| 2617 | |||
| 2618 | def endTagFrameset(self, token): | ||
| 2619 | if self.tree.openElements[-1].name == "html": | ||
| 2620 | # innerHTML case | ||
| 2621 | self.parser.parseError("unexpected-frameset-in-frameset-innerhtml") | ||
| 2622 | else: | ||
| 2623 | self.tree.openElements.pop() | ||
| 2624 | if (not self.parser.innerHTML and | ||
| 2625 | self.tree.openElements[-1].name != "frameset"): | ||
| 2626 | # If we're not in innerHTML mode and the current node is not a | ||
| 2627 | # "frameset" element (anymore) then switch. | ||
| 2628 | self.parser.phase = self.parser.phases["afterFrameset"] | ||
| 2629 | |||
| 2630 | def endTagOther(self, token): | ||
| 2631 | self.parser.parseError("unexpected-end-tag-in-frameset", | ||
| 2632 | {"name": token["name"]}) | ||
| 2633 | |||
| 2634 | class AfterFramesetPhase(Phase): | ||
| 2635 | # http://www.whatwg.org/specs/web-apps/current-work/#after3 | ||
| 2636 | def __init__(self, parser, tree): | ||
| 2637 | Phase.__init__(self, parser, tree) | ||
| 2638 | |||
| 2639 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 2640 | ("html", self.startTagHtml), | ||
| 2641 | ("noframes", self.startTagNoframes) | ||
| 2642 | ]) | ||
| 2643 | self.startTagHandler.default = self.startTagOther | ||
| 2644 | |||
| 2645 | self.endTagHandler = _utils.MethodDispatcher([ | ||
| 2646 | ("html", self.endTagHtml) | ||
| 2647 | ]) | ||
| 2648 | self.endTagHandler.default = self.endTagOther | ||
| 2649 | |||
| 2650 | def processEOF(self): | ||
| 2651 | # Stop parsing | ||
| 2652 | pass | ||
| 2653 | |||
| 2654 | def processCharacters(self, token): | ||
| 2655 | self.parser.parseError("unexpected-char-after-frameset") | ||
| 2656 | |||
| 2657 | def startTagNoframes(self, token): | ||
| 2658 | return self.parser.phases["inHead"].processStartTag(token) | ||
| 2659 | |||
| 2660 | def startTagOther(self, token): | ||
| 2661 | self.parser.parseError("unexpected-start-tag-after-frameset", | ||
| 2662 | {"name": token["name"]}) | ||
| 2663 | |||
| 2664 | def endTagHtml(self, token): | ||
| 2665 | self.parser.phase = self.parser.phases["afterAfterFrameset"] | ||
| 2666 | |||
| 2667 | def endTagOther(self, token): | ||
| 2668 | self.parser.parseError("unexpected-end-tag-after-frameset", | ||
| 2669 | {"name": token["name"]}) | ||
| 2670 | |||
| 2671 | class AfterAfterBodyPhase(Phase): | ||
| 2672 | def __init__(self, parser, tree): | ||
| 2673 | Phase.__init__(self, parser, tree) | ||
| 2674 | |||
| 2675 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 2676 | ("html", self.startTagHtml) | ||
| 2677 | ]) | ||
| 2678 | self.startTagHandler.default = self.startTagOther | ||
| 2679 | |||
| 2680 | def processEOF(self): | ||
| 2681 | pass | ||
| 2682 | |||
| 2683 | def processComment(self, token): | ||
| 2684 | self.tree.insertComment(token, self.tree.document) | ||
| 2685 | |||
| 2686 | def processSpaceCharacters(self, token): | ||
| 2687 | return self.parser.phases["inBody"].processSpaceCharacters(token) | ||
| 2688 | |||
| 2689 | def processCharacters(self, token): | ||
| 2690 | self.parser.parseError("expected-eof-but-got-char") | ||
| 2691 | self.parser.phase = self.parser.phases["inBody"] | ||
| 2692 | return token | ||
| 2693 | |||
| 2694 | def startTagHtml(self, token): | ||
| 2695 | return self.parser.phases["inBody"].processStartTag(token) | ||
| 2696 | |||
| 2697 | def startTagOther(self, token): | ||
| 2698 | self.parser.parseError("expected-eof-but-got-start-tag", | ||
| 2699 | {"name": token["name"]}) | ||
| 2700 | self.parser.phase = self.parser.phases["inBody"] | ||
| 2701 | return token | ||
| 2702 | |||
| 2703 | def processEndTag(self, token): | ||
| 2704 | self.parser.parseError("expected-eof-but-got-end-tag", | ||
| 2705 | {"name": token["name"]}) | ||
| 2706 | self.parser.phase = self.parser.phases["inBody"] | ||
| 2707 | return token | ||
| 2708 | |||
| 2709 | class AfterAfterFramesetPhase(Phase): | ||
| 2710 | def __init__(self, parser, tree): | ||
| 2711 | Phase.__init__(self, parser, tree) | ||
| 2712 | |||
| 2713 | self.startTagHandler = _utils.MethodDispatcher([ | ||
| 2714 | ("html", self.startTagHtml), | ||
| 2715 | ("noframes", self.startTagNoFrames) | ||
| 2716 | ]) | ||
| 2717 | self.startTagHandler.default = self.startTagOther | ||
| 2718 | |||
| 2719 | def processEOF(self): | ||
| 2720 | pass | ||
| 2721 | |||
| 2722 | def processComment(self, token): | ||
| 2723 | self.tree.insertComment(token, self.tree.document) | ||
| 2724 | |||
| 2725 | def processSpaceCharacters(self, token): | ||
| 2726 | return self.parser.phases["inBody"].processSpaceCharacters(token) | ||
| 2727 | |||
| 2728 | def processCharacters(self, token): | ||
| 2729 | self.parser.parseError("expected-eof-but-got-char") | ||
| 2730 | |||
| 2731 | def startTagHtml(self, token): | ||
| 2732 | return self.parser.phases["inBody"].processStartTag(token) | ||
| 2733 | |||
| 2734 | def startTagNoFrames(self, token): | ||
| 2735 | return self.parser.phases["inHead"].processStartTag(token) | ||
| 2736 | |||
| 2737 | def startTagOther(self, token): | ||
| 2738 | self.parser.parseError("expected-eof-but-got-start-tag", | ||
| 2739 | {"name": token["name"]}) | ||
| 2740 | |||
| 2741 | def processEndTag(self, token): | ||
| 2742 | self.parser.parseError("expected-eof-but-got-end-tag", | ||
| 2743 | {"name": token["name"]}) | ||
| 2744 | # pylint:enable=unused-argument | ||
| 2745 | |||
| 2746 | return { | ||
| 2747 | "initial": InitialPhase, | ||
| 2748 | "beforeHtml": BeforeHtmlPhase, | ||
| 2749 | "beforeHead": BeforeHeadPhase, | ||
| 2750 | "inHead": InHeadPhase, | ||
| 2751 | "inHeadNoscript": InHeadNoscriptPhase, | ||
| 2752 | "afterHead": AfterHeadPhase, | ||
| 2753 | "inBody": InBodyPhase, | ||
| 2754 | "text": TextPhase, | ||
| 2755 | "inTable": InTablePhase, | ||
| 2756 | "inTableText": InTableTextPhase, | ||
| 2757 | "inCaption": InCaptionPhase, | ||
| 2758 | "inColumnGroup": InColumnGroupPhase, | ||
| 2759 | "inTableBody": InTableBodyPhase, | ||
| 2760 | "inRow": InRowPhase, | ||
| 2761 | "inCell": InCellPhase, | ||
| 2762 | "inSelect": InSelectPhase, | ||
| 2763 | "inSelectInTable": InSelectInTablePhase, | ||
| 2764 | "inForeignContent": InForeignContentPhase, | ||
| 2765 | "afterBody": AfterBodyPhase, | ||
| 2766 | "inFrameset": InFramesetPhase, | ||
| 2767 | "afterFrameset": AfterFramesetPhase, | ||
| 2768 | "afterAfterBody": AfterAfterBodyPhase, | ||
| 2769 | "afterAfterFrameset": AfterAfterFramesetPhase, | ||
| 2770 | # XXX after after frameset | ||
| 2771 | } | ||
| 2772 | |||
| 2773 | |||
| 2774 | def adjust_attributes(token, replacements): | ||
| 2775 | needs_adjustment = viewkeys(token['data']) & viewkeys(replacements) | ||
| 2776 | if needs_adjustment: | ||
| 2777 | token['data'] = OrderedDict((replacements.get(k, k), v) | ||
| 2778 | for k, v in token['data'].items()) | ||
| 2779 | |||
| 2780 | |||
| 2781 | def impliedTagToken(name, type="EndTag", attributes=None, | ||
| 2782 | selfClosing=False): | ||
| 2783 | if attributes is None: | ||
| 2784 | attributes = {} | ||
| 2785 | return {"type": tokenTypes[type], "name": name, "data": attributes, | ||
| 2786 | "selfClosing": selfClosing} | ||
| 2787 | |||
| 2788 | |||
| 2789 | class ParseError(Exception): | ||
| 2790 | """Error in parsed document""" | ||
| 2791 | pass | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/serializer.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/serializer.py new file mode 100644 index 0000000..641323e --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/serializer.py | |||
| @@ -0,0 +1,409 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | from pip._vendor.six import text_type | ||
| 3 | |||
| 4 | import re | ||
| 5 | |||
| 6 | from codecs import register_error, xmlcharrefreplace_errors | ||
| 7 | |||
| 8 | from .constants import voidElements, booleanAttributes, spaceCharacters | ||
| 9 | from .constants import rcdataElements, entities, xmlEntities | ||
| 10 | from . import treewalkers, _utils | ||
| 11 | from xml.sax.saxutils import escape | ||
| 12 | |||
| 13 | _quoteAttributeSpecChars = "".join(spaceCharacters) + "\"'=<>`" | ||
| 14 | _quoteAttributeSpec = re.compile("[" + _quoteAttributeSpecChars + "]") | ||
| 15 | _quoteAttributeLegacy = re.compile("[" + _quoteAttributeSpecChars + | ||
| 16 | "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n" | ||
| 17 | "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15" | ||
| 18 | "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" | ||
| 19 | "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000" | ||
| 20 | "\u2001\u2002\u2003\u2004\u2005\u2006\u2007" | ||
| 21 | "\u2008\u2009\u200a\u2028\u2029\u202f\u205f" | ||
| 22 | "\u3000]") | ||
| 23 | |||
| 24 | |||
| 25 | _encode_entity_map = {} | ||
| 26 | _is_ucs4 = len("\U0010FFFF") == 1 | ||
| 27 | for k, v in list(entities.items()): | ||
| 28 | # skip multi-character entities | ||
| 29 | if ((_is_ucs4 and len(v) > 1) or | ||
| 30 | (not _is_ucs4 and len(v) > 2)): | ||
| 31 | continue | ||
| 32 | if v != "&": | ||
| 33 | if len(v) == 2: | ||
| 34 | v = _utils.surrogatePairToCodepoint(v) | ||
| 35 | else: | ||
| 36 | v = ord(v) | ||
| 37 | if v not in _encode_entity_map or k.islower(): | ||
| 38 | # prefer < over < and similarly for &, >, etc. | ||
| 39 | _encode_entity_map[v] = k | ||
| 40 | |||
| 41 | |||
| 42 | def htmlentityreplace_errors(exc): | ||
| 43 | if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)): | ||
| 44 | res = [] | ||
| 45 | codepoints = [] | ||
| 46 | skip = False | ||
| 47 | for i, c in enumerate(exc.object[exc.start:exc.end]): | ||
| 48 | if skip: | ||
| 49 | skip = False | ||
| 50 | continue | ||
| 51 | index = i + exc.start | ||
| 52 | if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]): | ||
| 53 | codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2]) | ||
| 54 | skip = True | ||
| 55 | else: | ||
| 56 | codepoint = ord(c) | ||
| 57 | codepoints.append(codepoint) | ||
| 58 | for cp in codepoints: | ||
| 59 | e = _encode_entity_map.get(cp) | ||
| 60 | if e: | ||
| 61 | res.append("&") | ||
| 62 | res.append(e) | ||
| 63 | if not e.endswith(";"): | ||
| 64 | res.append(";") | ||
| 65 | else: | ||
| 66 | res.append("&#x%s;" % (hex(cp)[2:])) | ||
| 67 | return ("".join(res), exc.end) | ||
| 68 | else: | ||
| 69 | return xmlcharrefreplace_errors(exc) | ||
| 70 | |||
| 71 | |||
| 72 | register_error("htmlentityreplace", htmlentityreplace_errors) | ||
| 73 | |||
| 74 | |||
| 75 | def serialize(input, tree="etree", encoding=None, **serializer_opts): | ||
| 76 | """Serializes the input token stream using the specified treewalker | ||
| 77 | |||
| 78 | :arg input: the token stream to serialize | ||
| 79 | |||
| 80 | :arg tree: the treewalker to use | ||
| 81 | |||
| 82 | :arg encoding: the encoding to use | ||
| 83 | |||
| 84 | :arg serializer_opts: any options to pass to the | ||
| 85 | :py:class:`html5lib.serializer.HTMLSerializer` that gets created | ||
| 86 | |||
| 87 | :returns: the tree serialized as a string | ||
| 88 | |||
| 89 | Example: | ||
| 90 | |||
| 91 | >>> from html5lib.html5parser import parse | ||
| 92 | >>> from html5lib.serializer import serialize | ||
| 93 | >>> token_stream = parse('<html><body><p>Hi!</p></body></html>') | ||
| 94 | >>> serialize(token_stream, omit_optional_tags=False) | ||
| 95 | '<html><head></head><body><p>Hi!</p></body></html>' | ||
| 96 | |||
| 97 | """ | ||
| 98 | # XXX: Should we cache this? | ||
| 99 | walker = treewalkers.getTreeWalker(tree) | ||
| 100 | s = HTMLSerializer(**serializer_opts) | ||
| 101 | return s.render(walker(input), encoding) | ||
| 102 | |||
| 103 | |||
| 104 | class HTMLSerializer(object): | ||
| 105 | |||
| 106 | # attribute quoting options | ||
| 107 | quote_attr_values = "legacy" # be secure by default | ||
| 108 | quote_char = '"' | ||
| 109 | use_best_quote_char = True | ||
| 110 | |||
| 111 | # tag syntax options | ||
| 112 | omit_optional_tags = True | ||
| 113 | minimize_boolean_attributes = True | ||
| 114 | use_trailing_solidus = False | ||
| 115 | space_before_trailing_solidus = True | ||
| 116 | |||
| 117 | # escaping options | ||
| 118 | escape_lt_in_attrs = False | ||
| 119 | escape_rcdata = False | ||
| 120 | resolve_entities = True | ||
| 121 | |||
| 122 | # miscellaneous options | ||
| 123 | alphabetical_attributes = False | ||
| 124 | inject_meta_charset = True | ||
| 125 | strip_whitespace = False | ||
| 126 | sanitize = False | ||
| 127 | |||
| 128 | options = ("quote_attr_values", "quote_char", "use_best_quote_char", | ||
| 129 | "omit_optional_tags", "minimize_boolean_attributes", | ||
| 130 | "use_trailing_solidus", "space_before_trailing_solidus", | ||
| 131 | "escape_lt_in_attrs", "escape_rcdata", "resolve_entities", | ||
| 132 | "alphabetical_attributes", "inject_meta_charset", | ||
| 133 | "strip_whitespace", "sanitize") | ||
| 134 | |||
| 135 | def __init__(self, **kwargs): | ||
| 136 | """Initialize HTMLSerializer | ||
| 137 | |||
| 138 | :arg inject_meta_charset: Whether or not to inject the meta charset. | ||
| 139 | |||
| 140 | Defaults to ``True``. | ||
| 141 | |||
| 142 | :arg quote_attr_values: Whether to quote attribute values that don't | ||
| 143 | require quoting per legacy browser behavior (``"legacy"``), when | ||
| 144 | required by the standard (``"spec"``), or always (``"always"``). | ||
| 145 | |||
| 146 | Defaults to ``"legacy"``. | ||
| 147 | |||
| 148 | :arg quote_char: Use given quote character for attribute quoting. | ||
| 149 | |||
| 150 | Defaults to ``"`` which will use double quotes unless attribute | ||
| 151 | value contains a double quote, in which case single quotes are | ||
| 152 | used. | ||
| 153 | |||
| 154 | :arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute | ||
| 155 | values. | ||
| 156 | |||
| 157 | Defaults to ``False``. | ||
| 158 | |||
| 159 | :arg escape_rcdata: Whether to escape characters that need to be | ||
| 160 | escaped within normal elements within rcdata elements such as | ||
| 161 | style. | ||
| 162 | |||
| 163 | Defaults to ``False``. | ||
| 164 | |||
| 165 | :arg resolve_entities: Whether to resolve named character entities that | ||
| 166 | appear in the source tree. The XML predefined entities < > | ||
| 167 | & " ' are unaffected by this setting. | ||
| 168 | |||
| 169 | Defaults to ``True``. | ||
| 170 | |||
| 171 | :arg strip_whitespace: Whether to remove semantically meaningless | ||
| 172 | whitespace. (This compresses all whitespace to a single space | ||
| 173 | except within ``pre``.) | ||
| 174 | |||
| 175 | Defaults to ``False``. | ||
| 176 | |||
| 177 | :arg minimize_boolean_attributes: Shortens boolean attributes to give | ||
| 178 | just the attribute value, for example:: | ||
| 179 | |||
| 180 | <input disabled="disabled"> | ||
| 181 | |||
| 182 | becomes:: | ||
| 183 | |||
| 184 | <input disabled> | ||
| 185 | |||
| 186 | Defaults to ``True``. | ||
| 187 | |||
| 188 | :arg use_trailing_solidus: Includes a close-tag slash at the end of the | ||
| 189 | start tag of void elements (empty elements whose end tag is | ||
| 190 | forbidden). E.g. ``<hr/>``. | ||
| 191 | |||
| 192 | Defaults to ``False``. | ||
| 193 | |||
| 194 | :arg space_before_trailing_solidus: Places a space immediately before | ||
| 195 | the closing slash in a tag using a trailing solidus. E.g. | ||
| 196 | ``<hr />``. Requires ``use_trailing_solidus=True``. | ||
| 197 | |||
| 198 | Defaults to ``True``. | ||
| 199 | |||
| 200 | :arg sanitize: Strip all unsafe or unknown constructs from output. | ||
| 201 | See :py:class:`html5lib.filters.sanitizer.Filter`. | ||
| 202 | |||
| 203 | Defaults to ``False``. | ||
| 204 | |||
| 205 | :arg omit_optional_tags: Omit start/end tags that are optional. | ||
| 206 | |||
| 207 | Defaults to ``True``. | ||
| 208 | |||
| 209 | :arg alphabetical_attributes: Reorder attributes to be in alphabetical order. | ||
| 210 | |||
| 211 | Defaults to ``False``. | ||
| 212 | |||
| 213 | """ | ||
| 214 | unexpected_args = frozenset(kwargs) - frozenset(self.options) | ||
| 215 | if len(unexpected_args) > 0: | ||
| 216 | raise TypeError("__init__() got an unexpected keyword argument '%s'" % next(iter(unexpected_args))) | ||
| 217 | if 'quote_char' in kwargs: | ||
| 218 | self.use_best_quote_char = False | ||
| 219 | for attr in self.options: | ||
| 220 | setattr(self, attr, kwargs.get(attr, getattr(self, attr))) | ||
| 221 | self.errors = [] | ||
| 222 | self.strict = False | ||
| 223 | |||
| 224 | def encode(self, string): | ||
| 225 | assert(isinstance(string, text_type)) | ||
| 226 | if self.encoding: | ||
| 227 | return string.encode(self.encoding, "htmlentityreplace") | ||
| 228 | else: | ||
| 229 | return string | ||
| 230 | |||
| 231 | def encodeStrict(self, string): | ||
| 232 | assert(isinstance(string, text_type)) | ||
| 233 | if self.encoding: | ||
| 234 | return string.encode(self.encoding, "strict") | ||
| 235 | else: | ||
| 236 | return string | ||
| 237 | |||
| 238 | def serialize(self, treewalker, encoding=None): | ||
| 239 | # pylint:disable=too-many-nested-blocks | ||
| 240 | self.encoding = encoding | ||
| 241 | in_cdata = False | ||
| 242 | self.errors = [] | ||
| 243 | |||
| 244 | if encoding and self.inject_meta_charset: | ||
| 245 | from .filters.inject_meta_charset import Filter | ||
| 246 | treewalker = Filter(treewalker, encoding) | ||
| 247 | # Alphabetical attributes is here under the assumption that none of | ||
| 248 | # the later filters add or change order of attributes; it needs to be | ||
| 249 | # before the sanitizer so escaped elements come out correctly | ||
| 250 | if self.alphabetical_attributes: | ||
| 251 | from .filters.alphabeticalattributes import Filter | ||
| 252 | treewalker = Filter(treewalker) | ||
| 253 | # WhitespaceFilter should be used before OptionalTagFilter | ||
| 254 | # for maximum efficiently of this latter filter | ||
| 255 | if self.strip_whitespace: | ||
| 256 | from .filters.whitespace import Filter | ||
| 257 | treewalker = Filter(treewalker) | ||
| 258 | if self.sanitize: | ||
| 259 | from .filters.sanitizer import Filter | ||
| 260 | treewalker = Filter(treewalker) | ||
| 261 | if self.omit_optional_tags: | ||
| 262 | from .filters.optionaltags import Filter | ||
| 263 | treewalker = Filter(treewalker) | ||
| 264 | |||
| 265 | for token in treewalker: | ||
| 266 | type = token["type"] | ||
| 267 | if type == "Doctype": | ||
| 268 | doctype = "<!DOCTYPE %s" % token["name"] | ||
| 269 | |||
| 270 | if token["publicId"]: | ||
| 271 | doctype += ' PUBLIC "%s"' % token["publicId"] | ||
| 272 | elif token["systemId"]: | ||
| 273 | doctype += " SYSTEM" | ||
| 274 | if token["systemId"]: | ||
| 275 | if token["systemId"].find('"') >= 0: | ||
| 276 | if token["systemId"].find("'") >= 0: | ||
| 277 | self.serializeError("System identifer contains both single and double quote characters") | ||
| 278 | quote_char = "'" | ||
| 279 | else: | ||
| 280 | quote_char = '"' | ||
| 281 | doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char) | ||
| 282 | |||
| 283 | doctype += ">" | ||
| 284 | yield self.encodeStrict(doctype) | ||
| 285 | |||
| 286 | elif type in ("Characters", "SpaceCharacters"): | ||
| 287 | if type == "SpaceCharacters" or in_cdata: | ||
| 288 | if in_cdata and token["data"].find("</") >= 0: | ||
| 289 | self.serializeError("Unexpected </ in CDATA") | ||
| 290 | yield self.encode(token["data"]) | ||
| 291 | else: | ||
| 292 | yield self.encode(escape(token["data"])) | ||
| 293 | |||
| 294 | elif type in ("StartTag", "EmptyTag"): | ||
| 295 | name = token["name"] | ||
| 296 | yield self.encodeStrict("<%s" % name) | ||
| 297 | if name in rcdataElements and not self.escape_rcdata: | ||
| 298 | in_cdata = True | ||
| 299 | elif in_cdata: | ||
| 300 | self.serializeError("Unexpected child element of a CDATA element") | ||
| 301 | for (_, attr_name), attr_value in token["data"].items(): | ||
| 302 | # TODO: Add namespace support here | ||
| 303 | k = attr_name | ||
| 304 | v = attr_value | ||
| 305 | yield self.encodeStrict(' ') | ||
| 306 | |||
| 307 | yield self.encodeStrict(k) | ||
| 308 | if not self.minimize_boolean_attributes or \ | ||
| 309 | (k not in booleanAttributes.get(name, tuple()) and | ||
| 310 | k not in booleanAttributes.get("", tuple())): | ||
| 311 | yield self.encodeStrict("=") | ||
| 312 | if self.quote_attr_values == "always" or len(v) == 0: | ||
| 313 | quote_attr = True | ||
| 314 | elif self.quote_attr_values == "spec": | ||
| 315 | quote_attr = _quoteAttributeSpec.search(v) is not None | ||
| 316 | elif self.quote_attr_values == "legacy": | ||
| 317 | quote_attr = _quoteAttributeLegacy.search(v) is not None | ||
| 318 | else: | ||
| 319 | raise ValueError("quote_attr_values must be one of: " | ||
| 320 | "'always', 'spec', or 'legacy'") | ||
| 321 | v = v.replace("&", "&") | ||
| 322 | if self.escape_lt_in_attrs: | ||
| 323 | v = v.replace("<", "<") | ||
| 324 | if quote_attr: | ||
| 325 | quote_char = self.quote_char | ||
| 326 | if self.use_best_quote_char: | ||
| 327 | if "'" in v and '"' not in v: | ||
| 328 | quote_char = '"' | ||
| 329 | elif '"' in v and "'" not in v: | ||
| 330 | quote_char = "'" | ||
| 331 | if quote_char == "'": | ||
| 332 | v = v.replace("'", "'") | ||
| 333 | else: | ||
| 334 | v = v.replace('"', """) | ||
| 335 | yield self.encodeStrict(quote_char) | ||
| 336 | yield self.encode(v) | ||
| 337 | yield self.encodeStrict(quote_char) | ||
| 338 | else: | ||
| 339 | yield self.encode(v) | ||
| 340 | if name in voidElements and self.use_trailing_solidus: | ||
| 341 | if self.space_before_trailing_solidus: | ||
| 342 | yield self.encodeStrict(" /") | ||
| 343 | else: | ||
| 344 | yield self.encodeStrict("/") | ||
| 345 | yield self.encode(">") | ||
| 346 | |||
| 347 | elif type == "EndTag": | ||
| 348 | name = token["name"] | ||
| 349 | if name in rcdataElements: | ||
| 350 | in_cdata = False | ||
| 351 | elif in_cdata: | ||
| 352 | self.serializeError("Unexpected child element of a CDATA element") | ||
| 353 | yield self.encodeStrict("</%s>" % name) | ||
| 354 | |||
| 355 | elif type == "Comment": | ||
| 356 | data = token["data"] | ||
| 357 | if data.find("--") >= 0: | ||
| 358 | self.serializeError("Comment contains --") | ||
| 359 | yield self.encodeStrict("<!--%s-->" % token["data"]) | ||
| 360 | |||
| 361 | elif type == "Entity": | ||
| 362 | name = token["name"] | ||
| 363 | key = name + ";" | ||
| 364 | if key not in entities: | ||
| 365 | self.serializeError("Entity %s not recognized" % name) | ||
| 366 | if self.resolve_entities and key not in xmlEntities: | ||
| 367 | data = entities[key] | ||
| 368 | else: | ||
| 369 | data = "&%s;" % name | ||
| 370 | yield self.encodeStrict(data) | ||
| 371 | |||
| 372 | else: | ||
| 373 | self.serializeError(token["data"]) | ||
| 374 | |||
| 375 | def render(self, treewalker, encoding=None): | ||
| 376 | """Serializes the stream from the treewalker into a string | ||
| 377 | |||
| 378 | :arg treewalker: the treewalker to serialize | ||
| 379 | |||
| 380 | :arg encoding: the string encoding to use | ||
| 381 | |||
| 382 | :returns: the serialized tree | ||
| 383 | |||
| 384 | Example: | ||
| 385 | |||
| 386 | >>> from html5lib import parse, getTreeWalker | ||
| 387 | >>> from html5lib.serializer import HTMLSerializer | ||
| 388 | >>> token_stream = parse('<html><body>Hi!</body></html>') | ||
| 389 | >>> walker = getTreeWalker('etree') | ||
| 390 | >>> serializer = HTMLSerializer(omit_optional_tags=False) | ||
| 391 | >>> serializer.render(walker(token_stream)) | ||
| 392 | '<html><head></head><body>Hi!</body></html>' | ||
| 393 | |||
| 394 | """ | ||
| 395 | if encoding: | ||
| 396 | return b"".join(list(self.serialize(treewalker, encoding))) | ||
| 397 | else: | ||
| 398 | return "".join(list(self.serialize(treewalker))) | ||
| 399 | |||
| 400 | def serializeError(self, data="XXX ERROR MESSAGE NEEDED"): | ||
| 401 | # XXX The idea is to make data mandatory. | ||
| 402 | self.errors.append(data) | ||
| 403 | if self.strict: | ||
| 404 | raise SerializeError | ||
| 405 | |||
| 406 | |||
| 407 | class SerializeError(Exception): | ||
| 408 | """Error in serialized tree""" | ||
| 409 | pass | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/__init__.py new file mode 100644 index 0000000..8767fb0 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/__init__.py | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | """Tree adapters let you convert from one tree structure to another | ||
| 2 | |||
| 3 | Example: | ||
| 4 | |||
| 5 | .. code-block:: python | ||
| 6 | |||
| 7 | from pip._vendor import html5lib | ||
| 8 | from pip._vendor.html5lib.treeadapters import genshi | ||
| 9 | |||
| 10 | doc = '<html><body>Hi!</body></html>' | ||
| 11 | treebuilder = html5lib.getTreeBuilder('etree') | ||
| 12 | parser = html5lib.HTMLParser(tree=treebuilder) | ||
| 13 | tree = parser.parse(doc) | ||
| 14 | TreeWalker = html5lib.getTreeWalker('etree') | ||
| 15 | |||
| 16 | genshi_tree = genshi.to_genshi(TreeWalker(tree)) | ||
| 17 | |||
| 18 | """ | ||
| 19 | from __future__ import absolute_import, division, unicode_literals | ||
| 20 | |||
| 21 | from . import sax | ||
| 22 | |||
| 23 | __all__ = ["sax"] | ||
| 24 | |||
| 25 | try: | ||
| 26 | from . import genshi # noqa | ||
| 27 | except ImportError: | ||
| 28 | pass | ||
| 29 | else: | ||
| 30 | __all__.append("genshi") | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/genshi.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/genshi.py new file mode 100644 index 0000000..73c70c6 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/genshi.py | |||
| @@ -0,0 +1,54 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | from genshi.core import QName, Attrs | ||
| 4 | from genshi.core import START, END, TEXT, COMMENT, DOCTYPE | ||
| 5 | |||
| 6 | |||
| 7 | def to_genshi(walker): | ||
| 8 | """Convert a tree to a genshi tree | ||
| 9 | |||
| 10 | :arg walker: the treewalker to use to walk the tree to convert it | ||
| 11 | |||
| 12 | :returns: generator of genshi nodes | ||
| 13 | |||
| 14 | """ | ||
| 15 | text = [] | ||
| 16 | for token in walker: | ||
| 17 | type = token["type"] | ||
| 18 | if type in ("Characters", "SpaceCharacters"): | ||
| 19 | text.append(token["data"]) | ||
| 20 | elif text: | ||
| 21 | yield TEXT, "".join(text), (None, -1, -1) | ||
| 22 | text = [] | ||
| 23 | |||
| 24 | if type in ("StartTag", "EmptyTag"): | ||
| 25 | if token["namespace"]: | ||
| 26 | name = "{%s}%s" % (token["namespace"], token["name"]) | ||
| 27 | else: | ||
| 28 | name = token["name"] | ||
| 29 | attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value) | ||
| 30 | for attr, value in token["data"].items()]) | ||
| 31 | yield (START, (QName(name), attrs), (None, -1, -1)) | ||
| 32 | if type == "EmptyTag": | ||
| 33 | type = "EndTag" | ||
| 34 | |||
| 35 | if type == "EndTag": | ||
| 36 | if token["namespace"]: | ||
| 37 | name = "{%s}%s" % (token["namespace"], token["name"]) | ||
| 38 | else: | ||
| 39 | name = token["name"] | ||
| 40 | |||
| 41 | yield END, QName(name), (None, -1, -1) | ||
| 42 | |||
| 43 | elif type == "Comment": | ||
| 44 | yield COMMENT, token["data"], (None, -1, -1) | ||
| 45 | |||
| 46 | elif type == "Doctype": | ||
| 47 | yield DOCTYPE, (token["name"], token["publicId"], | ||
| 48 | token["systemId"]), (None, -1, -1) | ||
| 49 | |||
| 50 | else: | ||
| 51 | pass # FIXME: What to do? | ||
| 52 | |||
| 53 | if text: | ||
| 54 | yield TEXT, "".join(text), (None, -1, -1) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/sax.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/sax.py new file mode 100644 index 0000000..1f06d13 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/sax.py | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | from xml.sax.xmlreader import AttributesNSImpl | ||
| 4 | |||
| 5 | from ..constants import adjustForeignAttributes, unadjustForeignAttributes | ||
| 6 | |||
| 7 | prefix_mapping = {} | ||
| 8 | for prefix, localName, namespace in adjustForeignAttributes.values(): | ||
| 9 | if prefix is not None: | ||
| 10 | prefix_mapping[prefix] = namespace | ||
| 11 | |||
| 12 | |||
| 13 | def to_sax(walker, handler): | ||
| 14 | """Call SAX-like content handler based on treewalker walker | ||
| 15 | |||
| 16 | :arg walker: the treewalker to use to walk the tree to convert it | ||
| 17 | |||
| 18 | :arg handler: SAX handler to use | ||
| 19 | |||
| 20 | """ | ||
| 21 | handler.startDocument() | ||
| 22 | for prefix, namespace in prefix_mapping.items(): | ||
| 23 | handler.startPrefixMapping(prefix, namespace) | ||
| 24 | |||
| 25 | for token in walker: | ||
| 26 | type = token["type"] | ||
| 27 | if type == "Doctype": | ||
| 28 | continue | ||
| 29 | elif type in ("StartTag", "EmptyTag"): | ||
| 30 | attrs = AttributesNSImpl(token["data"], | ||
| 31 | unadjustForeignAttributes) | ||
| 32 | handler.startElementNS((token["namespace"], token["name"]), | ||
| 33 | token["name"], | ||
| 34 | attrs) | ||
| 35 | if type == "EmptyTag": | ||
| 36 | handler.endElementNS((token["namespace"], token["name"]), | ||
| 37 | token["name"]) | ||
| 38 | elif type == "EndTag": | ||
| 39 | handler.endElementNS((token["namespace"], token["name"]), | ||
| 40 | token["name"]) | ||
| 41 | elif type in ("Characters", "SpaceCharacters"): | ||
| 42 | handler.characters(token["data"]) | ||
| 43 | elif type == "Comment": | ||
| 44 | pass | ||
| 45 | else: | ||
| 46 | assert False, "Unknown token type" | ||
| 47 | |||
| 48 | for prefix, namespace in prefix_mapping.items(): | ||
| 49 | handler.endPrefixMapping(prefix) | ||
| 50 | handler.endDocument() | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/__init__.py new file mode 100644 index 0000000..2ce5c87 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/__init__.py | |||
| @@ -0,0 +1,88 @@ | |||
| 1 | """A collection of modules for building different kinds of trees from HTML | ||
| 2 | documents. | ||
| 3 | |||
| 4 | To create a treebuilder for a new type of tree, you need to do | ||
| 5 | implement several things: | ||
| 6 | |||
| 7 | 1. A set of classes for various types of elements: Document, Doctype, Comment, | ||
| 8 | Element. These must implement the interface of ``base.treebuilders.Node`` | ||
| 9 | (although comment nodes have a different signature for their constructor, | ||
| 10 | see ``treebuilders.etree.Comment``) Textual content may also be implemented | ||
| 11 | as another node type, or not, as your tree implementation requires. | ||
| 12 | |||
| 13 | 2. A treebuilder object (called ``TreeBuilder`` by convention) that inherits | ||
| 14 | from ``treebuilders.base.TreeBuilder``. This has 4 required attributes: | ||
| 15 | |||
| 16 | * ``documentClass`` - the class to use for the bottommost node of a document | ||
| 17 | * ``elementClass`` - the class to use for HTML Elements | ||
| 18 | * ``commentClass`` - the class to use for comments | ||
| 19 | * ``doctypeClass`` - the class to use for doctypes | ||
| 20 | |||
| 21 | It also has one required method: | ||
| 22 | |||
| 23 | * ``getDocument`` - Returns the root node of the complete document tree | ||
| 24 | |||
| 25 | 3. If you wish to run the unit tests, you must also create a ``testSerializer`` | ||
| 26 | method on your treebuilder which accepts a node and returns a string | ||
| 27 | containing Node and its children serialized according to the format used in | ||
| 28 | the unittests | ||
| 29 | |||
| 30 | """ | ||
| 31 | |||
| 32 | from __future__ import absolute_import, division, unicode_literals | ||
| 33 | |||
| 34 | from .._utils import default_etree | ||
| 35 | |||
| 36 | treeBuilderCache = {} | ||
| 37 | |||
| 38 | |||
| 39 | def getTreeBuilder(treeType, implementation=None, **kwargs): | ||
| 40 | """Get a TreeBuilder class for various types of trees with built-in support | ||
| 41 | |||
| 42 | :arg treeType: the name of the tree type required (case-insensitive). Supported | ||
| 43 | values are: | ||
| 44 | |||
| 45 | * "dom" - A generic builder for DOM implementations, defaulting to a | ||
| 46 | xml.dom.minidom based implementation. | ||
| 47 | * "etree" - A generic builder for tree implementations exposing an | ||
| 48 | ElementTree-like interface, defaulting to xml.etree.cElementTree if | ||
| 49 | available and xml.etree.ElementTree if not. | ||
| 50 | * "lxml" - A etree-based builder for lxml.etree, handling limitations | ||
| 51 | of lxml's implementation. | ||
| 52 | |||
| 53 | :arg implementation: (Currently applies to the "etree" and "dom" tree | ||
| 54 | types). A module implementing the tree type e.g. xml.etree.ElementTree | ||
| 55 | or xml.etree.cElementTree. | ||
| 56 | |||
| 57 | :arg kwargs: Any additional options to pass to the TreeBuilder when | ||
| 58 | creating it. | ||
| 59 | |||
| 60 | Example: | ||
| 61 | |||
| 62 | >>> from html5lib.treebuilders import getTreeBuilder | ||
| 63 | >>> builder = getTreeBuilder('etree') | ||
| 64 | |||
| 65 | """ | ||
| 66 | |||
| 67 | treeType = treeType.lower() | ||
| 68 | if treeType not in treeBuilderCache: | ||
| 69 | if treeType == "dom": | ||
| 70 | from . import dom | ||
| 71 | # Come up with a sane default (pref. from the stdlib) | ||
| 72 | if implementation is None: | ||
| 73 | from xml.dom import minidom | ||
| 74 | implementation = minidom | ||
| 75 | # NEVER cache here, caching is done in the dom submodule | ||
| 76 | return dom.getDomModule(implementation, **kwargs).TreeBuilder | ||
| 77 | elif treeType == "lxml": | ||
| 78 | from . import etree_lxml | ||
| 79 | treeBuilderCache[treeType] = etree_lxml.TreeBuilder | ||
| 80 | elif treeType == "etree": | ||
| 81 | from . import etree | ||
| 82 | if implementation is None: | ||
| 83 | implementation = default_etree | ||
| 84 | # NEVER cache here, caching is done in the etree submodule | ||
| 85 | return etree.getETreeModule(implementation, **kwargs).TreeBuilder | ||
| 86 | else: | ||
| 87 | raise ValueError("""Unrecognised treebuilder "%s" """ % treeType) | ||
| 88 | return treeBuilderCache.get(treeType) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/base.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/base.py new file mode 100644 index 0000000..ed32fcb --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/base.py | |||
| @@ -0,0 +1,417 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | from pip._vendor.six import text_type | ||
| 3 | |||
| 4 | from ..constants import scopingElements, tableInsertModeElements, namespaces | ||
| 5 | |||
| 6 | # The scope markers are inserted when entering object elements, | ||
| 7 | # marquees, table cells, and table captions, and are used to prevent formatting | ||
| 8 | # from "leaking" into tables, object elements, and marquees. | ||
| 9 | Marker = None | ||
| 10 | |||
| 11 | listElementsMap = { | ||
| 12 | None: (frozenset(scopingElements), False), | ||
| 13 | "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False), | ||
| 14 | "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"), | ||
| 15 | (namespaces["html"], "ul")])), False), | ||
| 16 | "table": (frozenset([(namespaces["html"], "html"), | ||
| 17 | (namespaces["html"], "table")]), False), | ||
| 18 | "select": (frozenset([(namespaces["html"], "optgroup"), | ||
| 19 | (namespaces["html"], "option")]), True) | ||
| 20 | } | ||
| 21 | |||
| 22 | |||
| 23 | class Node(object): | ||
| 24 | """Represents an item in the tree""" | ||
| 25 | def __init__(self, name): | ||
| 26 | """Creates a Node | ||
| 27 | |||
| 28 | :arg name: The tag name associated with the node | ||
| 29 | |||
| 30 | """ | ||
| 31 | # The tag name assocaited with the node | ||
| 32 | self.name = name | ||
| 33 | # The parent of the current node (or None for the document node) | ||
| 34 | self.parent = None | ||
| 35 | # The value of the current node (applies to text nodes and comments) | ||
| 36 | self.value = None | ||
| 37 | # A dict holding name -> value pairs for attributes of the node | ||
| 38 | self.attributes = {} | ||
| 39 | # A list of child nodes of the current node. This must include all | ||
| 40 | # elements but not necessarily other node types. | ||
| 41 | self.childNodes = [] | ||
| 42 | # A list of miscellaneous flags that can be set on the node. | ||
| 43 | self._flags = [] | ||
| 44 | |||
| 45 | def __str__(self): | ||
| 46 | attributesStr = " ".join(["%s=\"%s\"" % (name, value) | ||
| 47 | for name, value in | ||
| 48 | self.attributes.items()]) | ||
| 49 | if attributesStr: | ||
| 50 | return "<%s %s>" % (self.name, attributesStr) | ||
| 51 | else: | ||
| 52 | return "<%s>" % (self.name) | ||
| 53 | |||
| 54 | def __repr__(self): | ||
| 55 | return "<%s>" % (self.name) | ||
| 56 | |||
| 57 | def appendChild(self, node): | ||
| 58 | """Insert node as a child of the current node | ||
| 59 | |||
| 60 | :arg node: the node to insert | ||
| 61 | |||
| 62 | """ | ||
| 63 | raise NotImplementedError | ||
| 64 | |||
| 65 | def insertText(self, data, insertBefore=None): | ||
| 66 | """Insert data as text in the current node, positioned before the | ||
| 67 | start of node insertBefore or to the end of the node's text. | ||
| 68 | |||
| 69 | :arg data: the data to insert | ||
| 70 | |||
| 71 | :arg insertBefore: True if you want to insert the text before the node | ||
| 72 | and False if you want to insert it after the node | ||
| 73 | |||
| 74 | """ | ||
| 75 | raise NotImplementedError | ||
| 76 | |||
| 77 | def insertBefore(self, node, refNode): | ||
| 78 | """Insert node as a child of the current node, before refNode in the | ||
| 79 | list of child nodes. Raises ValueError if refNode is not a child of | ||
| 80 | the current node | ||
| 81 | |||
| 82 | :arg node: the node to insert | ||
| 83 | |||
| 84 | :arg refNode: the child node to insert the node before | ||
| 85 | |||
| 86 | """ | ||
| 87 | raise NotImplementedError | ||
| 88 | |||
| 89 | def removeChild(self, node): | ||
| 90 | """Remove node from the children of the current node | ||
| 91 | |||
| 92 | :arg node: the child node to remove | ||
| 93 | |||
| 94 | """ | ||
| 95 | raise NotImplementedError | ||
| 96 | |||
| 97 | def reparentChildren(self, newParent): | ||
| 98 | """Move all the children of the current node to newParent. | ||
| 99 | This is needed so that trees that don't store text as nodes move the | ||
| 100 | text in the correct way | ||
| 101 | |||
| 102 | :arg newParent: the node to move all this node's children to | ||
| 103 | |||
| 104 | """ | ||
| 105 | # XXX - should this method be made more general? | ||
| 106 | for child in self.childNodes: | ||
| 107 | newParent.appendChild(child) | ||
| 108 | self.childNodes = [] | ||
| 109 | |||
| 110 | def cloneNode(self): | ||
| 111 | """Return a shallow copy of the current node i.e. a node with the same | ||
| 112 | name and attributes but with no parent or child nodes | ||
| 113 | """ | ||
| 114 | raise NotImplementedError | ||
| 115 | |||
| 116 | def hasContent(self): | ||
| 117 | """Return true if the node has children or text, false otherwise | ||
| 118 | """ | ||
| 119 | raise NotImplementedError | ||
| 120 | |||
| 121 | |||
| 122 | class ActiveFormattingElements(list): | ||
| 123 | def append(self, node): | ||
| 124 | equalCount = 0 | ||
| 125 | if node != Marker: | ||
| 126 | for element in self[::-1]: | ||
| 127 | if element == Marker: | ||
| 128 | break | ||
| 129 | if self.nodesEqual(element, node): | ||
| 130 | equalCount += 1 | ||
| 131 | if equalCount == 3: | ||
| 132 | self.remove(element) | ||
| 133 | break | ||
| 134 | list.append(self, node) | ||
| 135 | |||
| 136 | def nodesEqual(self, node1, node2): | ||
| 137 | if not node1.nameTuple == node2.nameTuple: | ||
| 138 | return False | ||
| 139 | |||
| 140 | if not node1.attributes == node2.attributes: | ||
| 141 | return False | ||
| 142 | |||
| 143 | return True | ||
| 144 | |||
| 145 | |||
| 146 | class TreeBuilder(object): | ||
| 147 | """Base treebuilder implementation | ||
| 148 | |||
| 149 | * documentClass - the class to use for the bottommost node of a document | ||
| 150 | * elementClass - the class to use for HTML Elements | ||
| 151 | * commentClass - the class to use for comments | ||
| 152 | * doctypeClass - the class to use for doctypes | ||
| 153 | |||
| 154 | """ | ||
| 155 | # pylint:disable=not-callable | ||
| 156 | |||
| 157 | # Document class | ||
| 158 | documentClass = None | ||
| 159 | |||
| 160 | # The class to use for creating a node | ||
| 161 | elementClass = None | ||
| 162 | |||
| 163 | # The class to use for creating comments | ||
| 164 | commentClass = None | ||
| 165 | |||
| 166 | # The class to use for creating doctypes | ||
| 167 | doctypeClass = None | ||
| 168 | |||
| 169 | # Fragment class | ||
| 170 | fragmentClass = None | ||
| 171 | |||
| 172 | def __init__(self, namespaceHTMLElements): | ||
| 173 | """Create a TreeBuilder | ||
| 174 | |||
| 175 | :arg namespaceHTMLElements: whether or not to namespace HTML elements | ||
| 176 | |||
| 177 | """ | ||
| 178 | if namespaceHTMLElements: | ||
| 179 | self.defaultNamespace = "http://www.w3.org/1999/xhtml" | ||
| 180 | else: | ||
| 181 | self.defaultNamespace = None | ||
| 182 | self.reset() | ||
| 183 | |||
| 184 | def reset(self): | ||
| 185 | self.openElements = [] | ||
| 186 | self.activeFormattingElements = ActiveFormattingElements() | ||
| 187 | |||
| 188 | # XXX - rename these to headElement, formElement | ||
| 189 | self.headPointer = None | ||
| 190 | self.formPointer = None | ||
| 191 | |||
| 192 | self.insertFromTable = False | ||
| 193 | |||
| 194 | self.document = self.documentClass() | ||
| 195 | |||
| 196 | def elementInScope(self, target, variant=None): | ||
| 197 | |||
| 198 | # If we pass a node in we match that. if we pass a string | ||
| 199 | # match any node with that name | ||
| 200 | exactNode = hasattr(target, "nameTuple") | ||
| 201 | if not exactNode: | ||
| 202 | if isinstance(target, text_type): | ||
| 203 | target = (namespaces["html"], target) | ||
| 204 | assert isinstance(target, tuple) | ||
| 205 | |||
| 206 | listElements, invert = listElementsMap[variant] | ||
| 207 | |||
| 208 | for node in reversed(self.openElements): | ||
| 209 | if exactNode and node == target: | ||
| 210 | return True | ||
| 211 | elif not exactNode and node.nameTuple == target: | ||
| 212 | return True | ||
| 213 | elif (invert ^ (node.nameTuple in listElements)): | ||
| 214 | return False | ||
| 215 | |||
| 216 | assert False # We should never reach this point | ||
| 217 | |||
| 218 | def reconstructActiveFormattingElements(self): | ||
| 219 | # Within this algorithm the order of steps described in the | ||
| 220 | # specification is not quite the same as the order of steps in the | ||
| 221 | # code. It should still do the same though. | ||
| 222 | |||
| 223 | # Step 1: stop the algorithm when there's nothing to do. | ||
| 224 | if not self.activeFormattingElements: | ||
| 225 | return | ||
| 226 | |||
| 227 | # Step 2 and step 3: we start with the last element. So i is -1. | ||
| 228 | i = len(self.activeFormattingElements) - 1 | ||
| 229 | entry = self.activeFormattingElements[i] | ||
| 230 | if entry == Marker or entry in self.openElements: | ||
| 231 | return | ||
| 232 | |||
| 233 | # Step 6 | ||
| 234 | while entry != Marker and entry not in self.openElements: | ||
| 235 | if i == 0: | ||
| 236 | # This will be reset to 0 below | ||
| 237 | i = -1 | ||
| 238 | break | ||
| 239 | i -= 1 | ||
| 240 | # Step 5: let entry be one earlier in the list. | ||
| 241 | entry = self.activeFormattingElements[i] | ||
| 242 | |||
| 243 | while True: | ||
| 244 | # Step 7 | ||
| 245 | i += 1 | ||
| 246 | |||
| 247 | # Step 8 | ||
| 248 | entry = self.activeFormattingElements[i] | ||
| 249 | clone = entry.cloneNode() # Mainly to get a new copy of the attributes | ||
| 250 | |||
| 251 | # Step 9 | ||
| 252 | element = self.insertElement({"type": "StartTag", | ||
| 253 | "name": clone.name, | ||
| 254 | "namespace": clone.namespace, | ||
| 255 | "data": clone.attributes}) | ||
| 256 | |||
| 257 | # Step 10 | ||
| 258 | self.activeFormattingElements[i] = element | ||
| 259 | |||
| 260 | # Step 11 | ||
| 261 | if element == self.activeFormattingElements[-1]: | ||
| 262 | break | ||
| 263 | |||
| 264 | def clearActiveFormattingElements(self): | ||
| 265 | entry = self.activeFormattingElements.pop() | ||
| 266 | while self.activeFormattingElements and entry != Marker: | ||
| 267 | entry = self.activeFormattingElements.pop() | ||
| 268 | |||
| 269 | def elementInActiveFormattingElements(self, name): | ||
| 270 | """Check if an element exists between the end of the active | ||
| 271 | formatting elements and the last marker. If it does, return it, else | ||
| 272 | return false""" | ||
| 273 | |||
| 274 | for item in self.activeFormattingElements[::-1]: | ||
| 275 | # Check for Marker first because if it's a Marker it doesn't have a | ||
| 276 | # name attribute. | ||
| 277 | if item == Marker: | ||
| 278 | break | ||
| 279 | elif item.name == name: | ||
| 280 | return item | ||
| 281 | return False | ||
| 282 | |||
| 283 | def insertRoot(self, token): | ||
| 284 | element = self.createElement(token) | ||
| 285 | self.openElements.append(element) | ||
| 286 | self.document.appendChild(element) | ||
| 287 | |||
| 288 | def insertDoctype(self, token): | ||
| 289 | name = token["name"] | ||
| 290 | publicId = token["publicId"] | ||
| 291 | systemId = token["systemId"] | ||
| 292 | |||
| 293 | doctype = self.doctypeClass(name, publicId, systemId) | ||
| 294 | self.document.appendChild(doctype) | ||
| 295 | |||
| 296 | def insertComment(self, token, parent=None): | ||
| 297 | if parent is None: | ||
| 298 | parent = self.openElements[-1] | ||
| 299 | parent.appendChild(self.commentClass(token["data"])) | ||
| 300 | |||
| 301 | def createElement(self, token): | ||
| 302 | """Create an element but don't insert it anywhere""" | ||
| 303 | name = token["name"] | ||
| 304 | namespace = token.get("namespace", self.defaultNamespace) | ||
| 305 | element = self.elementClass(name, namespace) | ||
| 306 | element.attributes = token["data"] | ||
| 307 | return element | ||
| 308 | |||
| 309 | def _getInsertFromTable(self): | ||
| 310 | return self._insertFromTable | ||
| 311 | |||
| 312 | def _setInsertFromTable(self, value): | ||
| 313 | """Switch the function used to insert an element from the | ||
| 314 | normal one to the misnested table one and back again""" | ||
| 315 | self._insertFromTable = value | ||
| 316 | if value: | ||
| 317 | self.insertElement = self.insertElementTable | ||
| 318 | else: | ||
| 319 | self.insertElement = self.insertElementNormal | ||
| 320 | |||
| 321 | insertFromTable = property(_getInsertFromTable, _setInsertFromTable) | ||
| 322 | |||
| 323 | def insertElementNormal(self, token): | ||
| 324 | name = token["name"] | ||
| 325 | assert isinstance(name, text_type), "Element %s not unicode" % name | ||
| 326 | namespace = token.get("namespace", self.defaultNamespace) | ||
| 327 | element = self.elementClass(name, namespace) | ||
| 328 | element.attributes = token["data"] | ||
| 329 | self.openElements[-1].appendChild(element) | ||
| 330 | self.openElements.append(element) | ||
| 331 | return element | ||
| 332 | |||
| 333 | def insertElementTable(self, token): | ||
| 334 | """Create an element and insert it into the tree""" | ||
| 335 | element = self.createElement(token) | ||
| 336 | if self.openElements[-1].name not in tableInsertModeElements: | ||
| 337 | return self.insertElementNormal(token) | ||
| 338 | else: | ||
| 339 | # We should be in the InTable mode. This means we want to do | ||
| 340 | # special magic element rearranging | ||
| 341 | parent, insertBefore = self.getTableMisnestedNodePosition() | ||
| 342 | if insertBefore is None: | ||
| 343 | parent.appendChild(element) | ||
| 344 | else: | ||
| 345 | parent.insertBefore(element, insertBefore) | ||
| 346 | self.openElements.append(element) | ||
| 347 | return element | ||
| 348 | |||
| 349 | def insertText(self, data, parent=None): | ||
| 350 | """Insert text data.""" | ||
| 351 | if parent is None: | ||
| 352 | parent = self.openElements[-1] | ||
| 353 | |||
| 354 | if (not self.insertFromTable or (self.insertFromTable and | ||
| 355 | self.openElements[-1].name | ||
| 356 | not in tableInsertModeElements)): | ||
| 357 | parent.insertText(data) | ||
| 358 | else: | ||
| 359 | # We should be in the InTable mode. This means we want to do | ||
| 360 | # special magic element rearranging | ||
| 361 | parent, insertBefore = self.getTableMisnestedNodePosition() | ||
| 362 | parent.insertText(data, insertBefore) | ||
| 363 | |||
| 364 | def getTableMisnestedNodePosition(self): | ||
| 365 | """Get the foster parent element, and sibling to insert before | ||
| 366 | (or None) when inserting a misnested table node""" | ||
| 367 | # The foster parent element is the one which comes before the most | ||
| 368 | # recently opened table element | ||
| 369 | # XXX - this is really inelegant | ||
| 370 | lastTable = None | ||
| 371 | fosterParent = None | ||
| 372 | insertBefore = None | ||
| 373 | for elm in self.openElements[::-1]: | ||
| 374 | if elm.name == "table": | ||
| 375 | lastTable = elm | ||
| 376 | break | ||
| 377 | if lastTable: | ||
| 378 | # XXX - we should really check that this parent is actually a | ||
| 379 | # node here | ||
| 380 | if lastTable.parent: | ||
| 381 | fosterParent = lastTable.parent | ||
| 382 | insertBefore = lastTable | ||
| 383 | else: | ||
| 384 | fosterParent = self.openElements[ | ||
| 385 | self.openElements.index(lastTable) - 1] | ||
| 386 | else: | ||
| 387 | fosterParent = self.openElements[0] | ||
| 388 | return fosterParent, insertBefore | ||
| 389 | |||
| 390 | def generateImpliedEndTags(self, exclude=None): | ||
| 391 | name = self.openElements[-1].name | ||
| 392 | # XXX td, th and tr are not actually needed | ||
| 393 | if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and | ||
| 394 | name != exclude): | ||
| 395 | self.openElements.pop() | ||
| 396 | # XXX This is not entirely what the specification says. We should | ||
| 397 | # investigate it more closely. | ||
| 398 | self.generateImpliedEndTags(exclude) | ||
| 399 | |||
| 400 | def getDocument(self): | ||
| 401 | """Return the final tree""" | ||
| 402 | return self.document | ||
| 403 | |||
| 404 | def getFragment(self): | ||
| 405 | """Return the final fragment""" | ||
| 406 | # assert self.innerHTML | ||
| 407 | fragment = self.fragmentClass() | ||
| 408 | self.openElements[0].reparentChildren(fragment) | ||
| 409 | return fragment | ||
| 410 | |||
| 411 | def testSerializer(self, node): | ||
| 412 | """Serialize the subtree of node in the format required by unit tests | ||
| 413 | |||
| 414 | :arg node: the node from which to start serializing | ||
| 415 | |||
| 416 | """ | ||
| 417 | raise NotImplementedError | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/dom.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/dom.py new file mode 100644 index 0000000..8117b2d --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/dom.py | |||
| @@ -0,0 +1,236 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | |||
| 4 | from collections import MutableMapping | ||
| 5 | from xml.dom import minidom, Node | ||
| 6 | import weakref | ||
| 7 | |||
| 8 | from . import base | ||
| 9 | from .. import constants | ||
| 10 | from ..constants import namespaces | ||
| 11 | from .._utils import moduleFactoryFactory | ||
| 12 | |||
| 13 | |||
| 14 | def getDomBuilder(DomImplementation): | ||
| 15 | Dom = DomImplementation | ||
| 16 | |||
| 17 | class AttrList(MutableMapping): | ||
| 18 | def __init__(self, element): | ||
| 19 | self.element = element | ||
| 20 | |||
| 21 | def __iter__(self): | ||
| 22 | return iter(self.element.attributes.keys()) | ||
| 23 | |||
| 24 | def __setitem__(self, name, value): | ||
| 25 | if isinstance(name, tuple): | ||
| 26 | raise NotImplementedError | ||
| 27 | else: | ||
| 28 | attr = self.element.ownerDocument.createAttribute(name) | ||
| 29 | attr.value = value | ||
| 30 | self.element.attributes[name] = attr | ||
| 31 | |||
| 32 | def __len__(self): | ||
| 33 | return len(self.element.attributes) | ||
| 34 | |||
| 35 | def items(self): | ||
| 36 | return list(self.element.attributes.items()) | ||
| 37 | |||
| 38 | def values(self): | ||
| 39 | return list(self.element.attributes.values()) | ||
| 40 | |||
| 41 | def __getitem__(self, name): | ||
| 42 | if isinstance(name, tuple): | ||
| 43 | raise NotImplementedError | ||
| 44 | else: | ||
| 45 | return self.element.attributes[name].value | ||
| 46 | |||
| 47 | def __delitem__(self, name): | ||
| 48 | if isinstance(name, tuple): | ||
| 49 | raise NotImplementedError | ||
| 50 | else: | ||
| 51 | del self.element.attributes[name] | ||
| 52 | |||
| 53 | class NodeBuilder(base.Node): | ||
| 54 | def __init__(self, element): | ||
| 55 | base.Node.__init__(self, element.nodeName) | ||
| 56 | self.element = element | ||
| 57 | |||
| 58 | namespace = property(lambda self: hasattr(self.element, "namespaceURI") and | ||
| 59 | self.element.namespaceURI or None) | ||
| 60 | |||
| 61 | def appendChild(self, node): | ||
| 62 | node.parent = self | ||
| 63 | self.element.appendChild(node.element) | ||
| 64 | |||
| 65 | def insertText(self, data, insertBefore=None): | ||
| 66 | text = self.element.ownerDocument.createTextNode(data) | ||
| 67 | if insertBefore: | ||
| 68 | self.element.insertBefore(text, insertBefore.element) | ||
| 69 | else: | ||
| 70 | self.element.appendChild(text) | ||
| 71 | |||
| 72 | def insertBefore(self, node, refNode): | ||
| 73 | self.element.insertBefore(node.element, refNode.element) | ||
| 74 | node.parent = self | ||
| 75 | |||
| 76 | def removeChild(self, node): | ||
| 77 | if node.element.parentNode == self.element: | ||
| 78 | self.element.removeChild(node.element) | ||
| 79 | node.parent = None | ||
| 80 | |||
| 81 | def reparentChildren(self, newParent): | ||
| 82 | while self.element.hasChildNodes(): | ||
| 83 | child = self.element.firstChild | ||
| 84 | self.element.removeChild(child) | ||
| 85 | newParent.element.appendChild(child) | ||
| 86 | self.childNodes = [] | ||
| 87 | |||
| 88 | def getAttributes(self): | ||
| 89 | return AttrList(self.element) | ||
| 90 | |||
| 91 | def setAttributes(self, attributes): | ||
| 92 | if attributes: | ||
| 93 | for name, value in list(attributes.items()): | ||
| 94 | if isinstance(name, tuple): | ||
| 95 | if name[0] is not None: | ||
| 96 | qualifiedName = (name[0] + ":" + name[1]) | ||
| 97 | else: | ||
| 98 | qualifiedName = name[1] | ||
| 99 | self.element.setAttributeNS(name[2], qualifiedName, | ||
| 100 | value) | ||
| 101 | else: | ||
| 102 | self.element.setAttribute( | ||
| 103 | name, value) | ||
| 104 | attributes = property(getAttributes, setAttributes) | ||
| 105 | |||
| 106 | def cloneNode(self): | ||
| 107 | return NodeBuilder(self.element.cloneNode(False)) | ||
| 108 | |||
| 109 | def hasContent(self): | ||
| 110 | return self.element.hasChildNodes() | ||
| 111 | |||
| 112 | def getNameTuple(self): | ||
| 113 | if self.namespace is None: | ||
| 114 | return namespaces["html"], self.name | ||
| 115 | else: | ||
| 116 | return self.namespace, self.name | ||
| 117 | |||
| 118 | nameTuple = property(getNameTuple) | ||
| 119 | |||
| 120 | class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable | ||
| 121 | def documentClass(self): | ||
| 122 | self.dom = Dom.getDOMImplementation().createDocument(None, None, None) | ||
| 123 | return weakref.proxy(self) | ||
| 124 | |||
| 125 | def insertDoctype(self, token): | ||
| 126 | name = token["name"] | ||
| 127 | publicId = token["publicId"] | ||
| 128 | systemId = token["systemId"] | ||
| 129 | |||
| 130 | domimpl = Dom.getDOMImplementation() | ||
| 131 | doctype = domimpl.createDocumentType(name, publicId, systemId) | ||
| 132 | self.document.appendChild(NodeBuilder(doctype)) | ||
| 133 | if Dom == minidom: | ||
| 134 | doctype.ownerDocument = self.dom | ||
| 135 | |||
| 136 | def elementClass(self, name, namespace=None): | ||
| 137 | if namespace is None and self.defaultNamespace is None: | ||
| 138 | node = self.dom.createElement(name) | ||
| 139 | else: | ||
| 140 | node = self.dom.createElementNS(namespace, name) | ||
| 141 | |||
| 142 | return NodeBuilder(node) | ||
| 143 | |||
| 144 | def commentClass(self, data): | ||
| 145 | return NodeBuilder(self.dom.createComment(data)) | ||
| 146 | |||
| 147 | def fragmentClass(self): | ||
| 148 | return NodeBuilder(self.dom.createDocumentFragment()) | ||
| 149 | |||
| 150 | def appendChild(self, node): | ||
| 151 | self.dom.appendChild(node.element) | ||
| 152 | |||
| 153 | def testSerializer(self, element): | ||
| 154 | return testSerializer(element) | ||
| 155 | |||
| 156 | def getDocument(self): | ||
| 157 | return self.dom | ||
| 158 | |||
| 159 | def getFragment(self): | ||
| 160 | return base.TreeBuilder.getFragment(self).element | ||
| 161 | |||
| 162 | def insertText(self, data, parent=None): | ||
| 163 | data = data | ||
| 164 | if parent != self: | ||
| 165 | base.TreeBuilder.insertText(self, data, parent) | ||
| 166 | else: | ||
| 167 | # HACK: allow text nodes as children of the document node | ||
| 168 | if hasattr(self.dom, '_child_node_types'): | ||
| 169 | # pylint:disable=protected-access | ||
| 170 | if Node.TEXT_NODE not in self.dom._child_node_types: | ||
| 171 | self.dom._child_node_types = list(self.dom._child_node_types) | ||
| 172 | self.dom._child_node_types.append(Node.TEXT_NODE) | ||
| 173 | self.dom.appendChild(self.dom.createTextNode(data)) | ||
| 174 | |||
| 175 | implementation = DomImplementation | ||
| 176 | name = None | ||
| 177 | |||
| 178 | def testSerializer(element): | ||
| 179 | element.normalize() | ||
| 180 | rv = [] | ||
| 181 | |||
| 182 | def serializeElement(element, indent=0): | ||
| 183 | if element.nodeType == Node.DOCUMENT_TYPE_NODE: | ||
| 184 | if element.name: | ||
| 185 | if element.publicId or element.systemId: | ||
| 186 | publicId = element.publicId or "" | ||
| 187 | systemId = element.systemId or "" | ||
| 188 | rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" % | ||
| 189 | (' ' * indent, element.name, publicId, systemId)) | ||
| 190 | else: | ||
| 191 | rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name)) | ||
| 192 | else: | ||
| 193 | rv.append("|%s<!DOCTYPE >" % (' ' * indent,)) | ||
| 194 | elif element.nodeType == Node.DOCUMENT_NODE: | ||
| 195 | rv.append("#document") | ||
| 196 | elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE: | ||
| 197 | rv.append("#document-fragment") | ||
| 198 | elif element.nodeType == Node.COMMENT_NODE: | ||
| 199 | rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue)) | ||
| 200 | elif element.nodeType == Node.TEXT_NODE: | ||
| 201 | rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue)) | ||
| 202 | else: | ||
| 203 | if (hasattr(element, "namespaceURI") and | ||
| 204 | element.namespaceURI is not None): | ||
| 205 | name = "%s %s" % (constants.prefixes[element.namespaceURI], | ||
| 206 | element.nodeName) | ||
| 207 | else: | ||
| 208 | name = element.nodeName | ||
| 209 | rv.append("|%s<%s>" % (' ' * indent, name)) | ||
| 210 | if element.hasAttributes(): | ||
| 211 | attributes = [] | ||
| 212 | for i in range(len(element.attributes)): | ||
| 213 | attr = element.attributes.item(i) | ||
| 214 | name = attr.nodeName | ||
| 215 | value = attr.value | ||
| 216 | ns = attr.namespaceURI | ||
| 217 | if ns: | ||
| 218 | name = "%s %s" % (constants.prefixes[ns], attr.localName) | ||
| 219 | else: | ||
| 220 | name = attr.nodeName | ||
| 221 | attributes.append((name, value)) | ||
| 222 | |||
| 223 | for name, value in sorted(attributes): | ||
| 224 | rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) | ||
| 225 | indent += 2 | ||
| 226 | for child in element.childNodes: | ||
| 227 | serializeElement(child, indent) | ||
| 228 | serializeElement(element, 0) | ||
| 229 | |||
| 230 | return "\n".join(rv) | ||
| 231 | |||
| 232 | return locals() | ||
| 233 | |||
| 234 | |||
| 235 | # The actual means to get a module! | ||
| 236 | getDomModule = moduleFactoryFactory(getDomBuilder) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree.py new file mode 100644 index 0000000..9a4aa95 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree.py | |||
| @@ -0,0 +1,340 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | # pylint:disable=protected-access | ||
| 3 | |||
| 4 | from pip._vendor.six import text_type | ||
| 5 | |||
| 6 | import re | ||
| 7 | |||
| 8 | from . import base | ||
| 9 | from .. import _ihatexml | ||
| 10 | from .. import constants | ||
| 11 | from ..constants import namespaces | ||
| 12 | from .._utils import moduleFactoryFactory | ||
| 13 | |||
| 14 | tag_regexp = re.compile("{([^}]*)}(.*)") | ||
| 15 | |||
| 16 | |||
| 17 | def getETreeBuilder(ElementTreeImplementation, fullTree=False): | ||
| 18 | ElementTree = ElementTreeImplementation | ||
| 19 | ElementTreeCommentType = ElementTree.Comment("asd").tag | ||
| 20 | |||
| 21 | class Element(base.Node): | ||
| 22 | def __init__(self, name, namespace=None): | ||
| 23 | self._name = name | ||
| 24 | self._namespace = namespace | ||
| 25 | self._element = ElementTree.Element(self._getETreeTag(name, | ||
| 26 | namespace)) | ||
| 27 | if namespace is None: | ||
| 28 | self.nameTuple = namespaces["html"], self._name | ||
| 29 | else: | ||
| 30 | self.nameTuple = self._namespace, self._name | ||
| 31 | self.parent = None | ||
| 32 | self._childNodes = [] | ||
| 33 | self._flags = [] | ||
| 34 | |||
| 35 | def _getETreeTag(self, name, namespace): | ||
| 36 | if namespace is None: | ||
| 37 | etree_tag = name | ||
| 38 | else: | ||
| 39 | etree_tag = "{%s}%s" % (namespace, name) | ||
| 40 | return etree_tag | ||
| 41 | |||
| 42 | def _setName(self, name): | ||
| 43 | self._name = name | ||
| 44 | self._element.tag = self._getETreeTag(self._name, self._namespace) | ||
| 45 | |||
| 46 | def _getName(self): | ||
| 47 | return self._name | ||
| 48 | |||
| 49 | name = property(_getName, _setName) | ||
| 50 | |||
| 51 | def _setNamespace(self, namespace): | ||
| 52 | self._namespace = namespace | ||
| 53 | self._element.tag = self._getETreeTag(self._name, self._namespace) | ||
| 54 | |||
| 55 | def _getNamespace(self): | ||
| 56 | return self._namespace | ||
| 57 | |||
| 58 | namespace = property(_getNamespace, _setNamespace) | ||
| 59 | |||
| 60 | def _getAttributes(self): | ||
| 61 | return self._element.attrib | ||
| 62 | |||
| 63 | def _setAttributes(self, attributes): | ||
| 64 | # Delete existing attributes first | ||
| 65 | # XXX - there may be a better way to do this... | ||
| 66 | for key in list(self._element.attrib.keys()): | ||
| 67 | del self._element.attrib[key] | ||
| 68 | for key, value in attributes.items(): | ||
| 69 | if isinstance(key, tuple): | ||
| 70 | name = "{%s}%s" % (key[2], key[1]) | ||
| 71 | else: | ||
| 72 | name = key | ||
| 73 | self._element.set(name, value) | ||
| 74 | |||
| 75 | attributes = property(_getAttributes, _setAttributes) | ||
| 76 | |||
| 77 | def _getChildNodes(self): | ||
| 78 | return self._childNodes | ||
| 79 | |||
| 80 | def _setChildNodes(self, value): | ||
| 81 | del self._element[:] | ||
| 82 | self._childNodes = [] | ||
| 83 | for element in value: | ||
| 84 | self.insertChild(element) | ||
| 85 | |||
| 86 | childNodes = property(_getChildNodes, _setChildNodes) | ||
| 87 | |||
| 88 | def hasContent(self): | ||
| 89 | """Return true if the node has children or text""" | ||
| 90 | return bool(self._element.text or len(self._element)) | ||
| 91 | |||
| 92 | def appendChild(self, node): | ||
| 93 | self._childNodes.append(node) | ||
| 94 | self._element.append(node._element) | ||
| 95 | node.parent = self | ||
| 96 | |||
| 97 | def insertBefore(self, node, refNode): | ||
| 98 | index = list(self._element).index(refNode._element) | ||
| 99 | self._element.insert(index, node._element) | ||
| 100 | node.parent = self | ||
| 101 | |||
| 102 | def removeChild(self, node): | ||
| 103 | self._childNodes.remove(node) | ||
| 104 | self._element.remove(node._element) | ||
| 105 | node.parent = None | ||
| 106 | |||
| 107 | def insertText(self, data, insertBefore=None): | ||
| 108 | if not(len(self._element)): | ||
| 109 | if not self._element.text: | ||
| 110 | self._element.text = "" | ||
| 111 | self._element.text += data | ||
| 112 | elif insertBefore is None: | ||
| 113 | # Insert the text as the tail of the last child element | ||
| 114 | if not self._element[-1].tail: | ||
| 115 | self._element[-1].tail = "" | ||
| 116 | self._element[-1].tail += data | ||
| 117 | else: | ||
| 118 | # Insert the text before the specified node | ||
| 119 | children = list(self._element) | ||
| 120 | index = children.index(insertBefore._element) | ||
| 121 | if index > 0: | ||
| 122 | if not self._element[index - 1].tail: | ||
| 123 | self._element[index - 1].tail = "" | ||
| 124 | self._element[index - 1].tail += data | ||
| 125 | else: | ||
| 126 | if not self._element.text: | ||
| 127 | self._element.text = "" | ||
| 128 | self._element.text += data | ||
| 129 | |||
| 130 | def cloneNode(self): | ||
| 131 | element = type(self)(self.name, self.namespace) | ||
| 132 | for name, value in self.attributes.items(): | ||
| 133 | element.attributes[name] = value | ||
| 134 | return element | ||
| 135 | |||
| 136 | def reparentChildren(self, newParent): | ||
| 137 | if newParent.childNodes: | ||
| 138 | newParent.childNodes[-1]._element.tail += self._element.text | ||
| 139 | else: | ||
| 140 | if not newParent._element.text: | ||
| 141 | newParent._element.text = "" | ||
| 142 | if self._element.text is not None: | ||
| 143 | newParent._element.text += self._element.text | ||
| 144 | self._element.text = "" | ||
| 145 | base.Node.reparentChildren(self, newParent) | ||
| 146 | |||
| 147 | class Comment(Element): | ||
| 148 | def __init__(self, data): | ||
| 149 | # Use the superclass constructor to set all properties on the | ||
| 150 | # wrapper element | ||
| 151 | self._element = ElementTree.Comment(data) | ||
| 152 | self.parent = None | ||
| 153 | self._childNodes = [] | ||
| 154 | self._flags = [] | ||
| 155 | |||
| 156 | def _getData(self): | ||
| 157 | return self._element.text | ||
| 158 | |||
| 159 | def _setData(self, value): | ||
| 160 | self._element.text = value | ||
| 161 | |||
| 162 | data = property(_getData, _setData) | ||
| 163 | |||
| 164 | class DocumentType(Element): | ||
| 165 | def __init__(self, name, publicId, systemId): | ||
| 166 | Element.__init__(self, "<!DOCTYPE>") | ||
| 167 | self._element.text = name | ||
| 168 | self.publicId = publicId | ||
| 169 | self.systemId = systemId | ||
| 170 | |||
| 171 | def _getPublicId(self): | ||
| 172 | return self._element.get("publicId", "") | ||
| 173 | |||
| 174 | def _setPublicId(self, value): | ||
| 175 | if value is not None: | ||
| 176 | self._element.set("publicId", value) | ||
| 177 | |||
| 178 | publicId = property(_getPublicId, _setPublicId) | ||
| 179 | |||
| 180 | def _getSystemId(self): | ||
| 181 | return self._element.get("systemId", "") | ||
| 182 | |||
| 183 | def _setSystemId(self, value): | ||
| 184 | if value is not None: | ||
| 185 | self._element.set("systemId", value) | ||
| 186 | |||
| 187 | systemId = property(_getSystemId, _setSystemId) | ||
| 188 | |||
| 189 | class Document(Element): | ||
| 190 | def __init__(self): | ||
| 191 | Element.__init__(self, "DOCUMENT_ROOT") | ||
| 192 | |||
| 193 | class DocumentFragment(Element): | ||
| 194 | def __init__(self): | ||
| 195 | Element.__init__(self, "DOCUMENT_FRAGMENT") | ||
| 196 | |||
| 197 | def testSerializer(element): | ||
| 198 | rv = [] | ||
| 199 | |||
| 200 | def serializeElement(element, indent=0): | ||
| 201 | if not(hasattr(element, "tag")): | ||
| 202 | element = element.getroot() | ||
| 203 | if element.tag == "<!DOCTYPE>": | ||
| 204 | if element.get("publicId") or element.get("systemId"): | ||
| 205 | publicId = element.get("publicId") or "" | ||
| 206 | systemId = element.get("systemId") or "" | ||
| 207 | rv.append("""<!DOCTYPE %s "%s" "%s">""" % | ||
| 208 | (element.text, publicId, systemId)) | ||
| 209 | else: | ||
| 210 | rv.append("<!DOCTYPE %s>" % (element.text,)) | ||
| 211 | elif element.tag == "DOCUMENT_ROOT": | ||
| 212 | rv.append("#document") | ||
| 213 | if element.text is not None: | ||
| 214 | rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) | ||
| 215 | if element.tail is not None: | ||
| 216 | raise TypeError("Document node cannot have tail") | ||
| 217 | if hasattr(element, "attrib") and len(element.attrib): | ||
| 218 | raise TypeError("Document node cannot have attributes") | ||
| 219 | elif element.tag == ElementTreeCommentType: | ||
| 220 | rv.append("|%s<!-- %s -->" % (' ' * indent, element.text)) | ||
| 221 | else: | ||
| 222 | assert isinstance(element.tag, text_type), \ | ||
| 223 | "Expected unicode, got %s, %s" % (type(element.tag), element.tag) | ||
| 224 | nsmatch = tag_regexp.match(element.tag) | ||
| 225 | |||
| 226 | if nsmatch is None: | ||
| 227 | name = element.tag | ||
| 228 | else: | ||
| 229 | ns, name = nsmatch.groups() | ||
| 230 | prefix = constants.prefixes[ns] | ||
| 231 | name = "%s %s" % (prefix, name) | ||
| 232 | rv.append("|%s<%s>" % (' ' * indent, name)) | ||
| 233 | |||
| 234 | if hasattr(element, "attrib"): | ||
| 235 | attributes = [] | ||
| 236 | for name, value in element.attrib.items(): | ||
| 237 | nsmatch = tag_regexp.match(name) | ||
| 238 | if nsmatch is not None: | ||
| 239 | ns, name = nsmatch.groups() | ||
| 240 | prefix = constants.prefixes[ns] | ||
| 241 | attr_string = "%s %s" % (prefix, name) | ||
| 242 | else: | ||
| 243 | attr_string = name | ||
| 244 | attributes.append((attr_string, value)) | ||
| 245 | |||
| 246 | for name, value in sorted(attributes): | ||
| 247 | rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) | ||
| 248 | if element.text: | ||
| 249 | rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) | ||
| 250 | indent += 2 | ||
| 251 | for child in element: | ||
| 252 | serializeElement(child, indent) | ||
| 253 | if element.tail: | ||
| 254 | rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) | ||
| 255 | serializeElement(element, 0) | ||
| 256 | |||
| 257 | return "\n".join(rv) | ||
| 258 | |||
| 259 | def tostring(element): # pylint:disable=unused-variable | ||
| 260 | """Serialize an element and its child nodes to a string""" | ||
| 261 | rv = [] | ||
| 262 | filter = _ihatexml.InfosetFilter() | ||
| 263 | |||
| 264 | def serializeElement(element): | ||
| 265 | if isinstance(element, ElementTree.ElementTree): | ||
| 266 | element = element.getroot() | ||
| 267 | |||
| 268 | if element.tag == "<!DOCTYPE>": | ||
| 269 | if element.get("publicId") or element.get("systemId"): | ||
| 270 | publicId = element.get("publicId") or "" | ||
| 271 | systemId = element.get("systemId") or "" | ||
| 272 | rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" % | ||
| 273 | (element.text, publicId, systemId)) | ||
| 274 | else: | ||
| 275 | rv.append("<!DOCTYPE %s>" % (element.text,)) | ||
| 276 | elif element.tag == "DOCUMENT_ROOT": | ||
| 277 | if element.text is not None: | ||
| 278 | rv.append(element.text) | ||
| 279 | if element.tail is not None: | ||
| 280 | raise TypeError("Document node cannot have tail") | ||
| 281 | if hasattr(element, "attrib") and len(element.attrib): | ||
| 282 | raise TypeError("Document node cannot have attributes") | ||
| 283 | |||
| 284 | for child in element: | ||
| 285 | serializeElement(child) | ||
| 286 | |||
| 287 | elif element.tag == ElementTreeCommentType: | ||
| 288 | rv.append("<!--%s-->" % (element.text,)) | ||
| 289 | else: | ||
| 290 | # This is assumed to be an ordinary element | ||
| 291 | if not element.attrib: | ||
| 292 | rv.append("<%s>" % (filter.fromXmlName(element.tag),)) | ||
| 293 | else: | ||
| 294 | attr = " ".join(["%s=\"%s\"" % ( | ||
| 295 | filter.fromXmlName(name), value) | ||
| 296 | for name, value in element.attrib.items()]) | ||
| 297 | rv.append("<%s %s>" % (element.tag, attr)) | ||
| 298 | if element.text: | ||
| 299 | rv.append(element.text) | ||
| 300 | |||
| 301 | for child in element: | ||
| 302 | serializeElement(child) | ||
| 303 | |||
| 304 | rv.append("</%s>" % (element.tag,)) | ||
| 305 | |||
| 306 | if element.tail: | ||
| 307 | rv.append(element.tail) | ||
| 308 | |||
| 309 | serializeElement(element) | ||
| 310 | |||
| 311 | return "".join(rv) | ||
| 312 | |||
| 313 | class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable | ||
| 314 | documentClass = Document | ||
| 315 | doctypeClass = DocumentType | ||
| 316 | elementClass = Element | ||
| 317 | commentClass = Comment | ||
| 318 | fragmentClass = DocumentFragment | ||
| 319 | implementation = ElementTreeImplementation | ||
| 320 | |||
| 321 | def testSerializer(self, element): | ||
| 322 | return testSerializer(element) | ||
| 323 | |||
| 324 | def getDocument(self): | ||
| 325 | if fullTree: | ||
| 326 | return self.document._element | ||
| 327 | else: | ||
| 328 | if self.defaultNamespace is not None: | ||
| 329 | return self.document._element.find( | ||
| 330 | "{%s}html" % self.defaultNamespace) | ||
| 331 | else: | ||
| 332 | return self.document._element.find("html") | ||
| 333 | |||
| 334 | def getFragment(self): | ||
| 335 | return base.TreeBuilder.getFragment(self)._element | ||
| 336 | |||
| 337 | return locals() | ||
| 338 | |||
| 339 | |||
| 340 | getETreeModule = moduleFactoryFactory(getETreeBuilder) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree_lxml.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree_lxml.py new file mode 100644 index 0000000..66a9ba3 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree_lxml.py | |||
| @@ -0,0 +1,366 @@ | |||
| 1 | """Module for supporting the lxml.etree library. The idea here is to use as much | ||
| 2 | of the native library as possible, without using fragile hacks like custom element | ||
| 3 | names that break between releases. The downside of this is that we cannot represent | ||
| 4 | all possible trees; specifically the following are known to cause problems: | ||
| 5 | |||
| 6 | Text or comments as siblings of the root element | ||
| 7 | Docypes with no name | ||
| 8 | |||
| 9 | When any of these things occur, we emit a DataLossWarning | ||
| 10 | """ | ||
| 11 | |||
| 12 | from __future__ import absolute_import, division, unicode_literals | ||
| 13 | # pylint:disable=protected-access | ||
| 14 | |||
| 15 | import warnings | ||
| 16 | import re | ||
| 17 | import sys | ||
| 18 | |||
| 19 | from . import base | ||
| 20 | from ..constants import DataLossWarning | ||
| 21 | from .. import constants | ||
| 22 | from . import etree as etree_builders | ||
| 23 | from .. import _ihatexml | ||
| 24 | |||
| 25 | import lxml.etree as etree | ||
| 26 | |||
| 27 | |||
| 28 | fullTree = True | ||
| 29 | tag_regexp = re.compile("{([^}]*)}(.*)") | ||
| 30 | |||
| 31 | comment_type = etree.Comment("asd").tag | ||
| 32 | |||
| 33 | |||
| 34 | class DocumentType(object): | ||
| 35 | def __init__(self, name, publicId, systemId): | ||
| 36 | self.name = name | ||
| 37 | self.publicId = publicId | ||
| 38 | self.systemId = systemId | ||
| 39 | |||
| 40 | |||
| 41 | class Document(object): | ||
| 42 | def __init__(self): | ||
| 43 | self._elementTree = None | ||
| 44 | self._childNodes = [] | ||
| 45 | |||
| 46 | def appendChild(self, element): | ||
| 47 | self._elementTree.getroot().addnext(element._element) | ||
| 48 | |||
| 49 | def _getChildNodes(self): | ||
| 50 | return self._childNodes | ||
| 51 | |||
| 52 | childNodes = property(_getChildNodes) | ||
| 53 | |||
| 54 | |||
| 55 | def testSerializer(element): | ||
| 56 | rv = [] | ||
| 57 | infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True) | ||
| 58 | |||
| 59 | def serializeElement(element, indent=0): | ||
| 60 | if not hasattr(element, "tag"): | ||
| 61 | if hasattr(element, "getroot"): | ||
| 62 | # Full tree case | ||
| 63 | rv.append("#document") | ||
| 64 | if element.docinfo.internalDTD: | ||
| 65 | if not (element.docinfo.public_id or | ||
| 66 | element.docinfo.system_url): | ||
| 67 | dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name | ||
| 68 | else: | ||
| 69 | dtd_str = """<!DOCTYPE %s "%s" "%s">""" % ( | ||
| 70 | element.docinfo.root_name, | ||
| 71 | element.docinfo.public_id, | ||
| 72 | element.docinfo.system_url) | ||
| 73 | rv.append("|%s%s" % (' ' * (indent + 2), dtd_str)) | ||
| 74 | next_element = element.getroot() | ||
| 75 | while next_element.getprevious() is not None: | ||
| 76 | next_element = next_element.getprevious() | ||
| 77 | while next_element is not None: | ||
| 78 | serializeElement(next_element, indent + 2) | ||
| 79 | next_element = next_element.getnext() | ||
| 80 | elif isinstance(element, str) or isinstance(element, bytes): | ||
| 81 | # Text in a fragment | ||
| 82 | assert isinstance(element, str) or sys.version_info[0] == 2 | ||
| 83 | rv.append("|%s\"%s\"" % (' ' * indent, element)) | ||
| 84 | else: | ||
| 85 | # Fragment case | ||
| 86 | rv.append("#document-fragment") | ||
| 87 | for next_element in element: | ||
| 88 | serializeElement(next_element, indent + 2) | ||
| 89 | elif element.tag == comment_type: | ||
| 90 | rv.append("|%s<!-- %s -->" % (' ' * indent, element.text)) | ||
| 91 | if hasattr(element, "tail") and element.tail: | ||
| 92 | rv.append("|%s\"%s\"" % (' ' * indent, element.tail)) | ||
| 93 | else: | ||
| 94 | assert isinstance(element, etree._Element) | ||
| 95 | nsmatch = etree_builders.tag_regexp.match(element.tag) | ||
| 96 | if nsmatch is not None: | ||
| 97 | ns = nsmatch.group(1) | ||
| 98 | tag = nsmatch.group(2) | ||
| 99 | prefix = constants.prefixes[ns] | ||
| 100 | rv.append("|%s<%s %s>" % (' ' * indent, prefix, | ||
| 101 | infosetFilter.fromXmlName(tag))) | ||
| 102 | else: | ||
| 103 | rv.append("|%s<%s>" % (' ' * indent, | ||
| 104 | infosetFilter.fromXmlName(element.tag))) | ||
| 105 | |||
| 106 | if hasattr(element, "attrib"): | ||
| 107 | attributes = [] | ||
| 108 | for name, value in element.attrib.items(): | ||
| 109 | nsmatch = tag_regexp.match(name) | ||
| 110 | if nsmatch is not None: | ||
| 111 | ns, name = nsmatch.groups() | ||
| 112 | name = infosetFilter.fromXmlName(name) | ||
| 113 | prefix = constants.prefixes[ns] | ||
| 114 | attr_string = "%s %s" % (prefix, name) | ||
| 115 | else: | ||
| 116 | attr_string = infosetFilter.fromXmlName(name) | ||
| 117 | attributes.append((attr_string, value)) | ||
| 118 | |||
| 119 | for name, value in sorted(attributes): | ||
| 120 | rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) | ||
| 121 | |||
| 122 | if element.text: | ||
| 123 | rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) | ||
| 124 | indent += 2 | ||
| 125 | for child in element: | ||
| 126 | serializeElement(child, indent) | ||
| 127 | if hasattr(element, "tail") and element.tail: | ||
| 128 | rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) | ||
| 129 | serializeElement(element, 0) | ||
| 130 | |||
| 131 | return "\n".join(rv) | ||
| 132 | |||
| 133 | |||
| 134 | def tostring(element): | ||
| 135 | """Serialize an element and its child nodes to a string""" | ||
| 136 | rv = [] | ||
| 137 | |||
| 138 | def serializeElement(element): | ||
| 139 | if not hasattr(element, "tag"): | ||
| 140 | if element.docinfo.internalDTD: | ||
| 141 | if element.docinfo.doctype: | ||
| 142 | dtd_str = element.docinfo.doctype | ||
| 143 | else: | ||
| 144 | dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name | ||
| 145 | rv.append(dtd_str) | ||
| 146 | serializeElement(element.getroot()) | ||
| 147 | |||
| 148 | elif element.tag == comment_type: | ||
| 149 | rv.append("<!--%s-->" % (element.text,)) | ||
| 150 | |||
| 151 | else: | ||
| 152 | # This is assumed to be an ordinary element | ||
| 153 | if not element.attrib: | ||
| 154 | rv.append("<%s>" % (element.tag,)) | ||
| 155 | else: | ||
| 156 | attr = " ".join(["%s=\"%s\"" % (name, value) | ||
| 157 | for name, value in element.attrib.items()]) | ||
| 158 | rv.append("<%s %s>" % (element.tag, attr)) | ||
| 159 | if element.text: | ||
| 160 | rv.append(element.text) | ||
| 161 | |||
| 162 | for child in element: | ||
| 163 | serializeElement(child) | ||
| 164 | |||
| 165 | rv.append("</%s>" % (element.tag,)) | ||
| 166 | |||
| 167 | if hasattr(element, "tail") and element.tail: | ||
| 168 | rv.append(element.tail) | ||
| 169 | |||
| 170 | serializeElement(element) | ||
| 171 | |||
| 172 | return "".join(rv) | ||
| 173 | |||
| 174 | |||
| 175 | class TreeBuilder(base.TreeBuilder): | ||
| 176 | documentClass = Document | ||
| 177 | doctypeClass = DocumentType | ||
| 178 | elementClass = None | ||
| 179 | commentClass = None | ||
| 180 | fragmentClass = Document | ||
| 181 | implementation = etree | ||
| 182 | |||
| 183 | def __init__(self, namespaceHTMLElements, fullTree=False): | ||
| 184 | builder = etree_builders.getETreeModule(etree, fullTree=fullTree) | ||
| 185 | infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True) | ||
| 186 | self.namespaceHTMLElements = namespaceHTMLElements | ||
| 187 | |||
| 188 | class Attributes(dict): | ||
| 189 | def __init__(self, element, value=None): | ||
| 190 | if value is None: | ||
| 191 | value = {} | ||
| 192 | self._element = element | ||
| 193 | dict.__init__(self, value) # pylint:disable=non-parent-init-called | ||
| 194 | for key, value in self.items(): | ||
| 195 | if isinstance(key, tuple): | ||
| 196 | name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) | ||
| 197 | else: | ||
| 198 | name = infosetFilter.coerceAttribute(key) | ||
| 199 | self._element._element.attrib[name] = value | ||
| 200 | |||
| 201 | def __setitem__(self, key, value): | ||
| 202 | dict.__setitem__(self, key, value) | ||
| 203 | if isinstance(key, tuple): | ||
| 204 | name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) | ||
| 205 | else: | ||
| 206 | name = infosetFilter.coerceAttribute(key) | ||
| 207 | self._element._element.attrib[name] = value | ||
| 208 | |||
| 209 | class Element(builder.Element): | ||
| 210 | def __init__(self, name, namespace): | ||
| 211 | name = infosetFilter.coerceElement(name) | ||
| 212 | builder.Element.__init__(self, name, namespace=namespace) | ||
| 213 | self._attributes = Attributes(self) | ||
| 214 | |||
| 215 | def _setName(self, name): | ||
| 216 | self._name = infosetFilter.coerceElement(name) | ||
| 217 | self._element.tag = self._getETreeTag( | ||
| 218 | self._name, self._namespace) | ||
| 219 | |||
| 220 | def _getName(self): | ||
| 221 | return infosetFilter.fromXmlName(self._name) | ||
| 222 | |||
| 223 | name = property(_getName, _setName) | ||
| 224 | |||
| 225 | def _getAttributes(self): | ||
| 226 | return self._attributes | ||
| 227 | |||
| 228 | def _setAttributes(self, attributes): | ||
| 229 | self._attributes = Attributes(self, attributes) | ||
| 230 | |||
| 231 | attributes = property(_getAttributes, _setAttributes) | ||
| 232 | |||
| 233 | def insertText(self, data, insertBefore=None): | ||
| 234 | data = infosetFilter.coerceCharacters(data) | ||
| 235 | builder.Element.insertText(self, data, insertBefore) | ||
| 236 | |||
| 237 | def appendChild(self, child): | ||
| 238 | builder.Element.appendChild(self, child) | ||
| 239 | |||
| 240 | class Comment(builder.Comment): | ||
| 241 | def __init__(self, data): | ||
| 242 | data = infosetFilter.coerceComment(data) | ||
| 243 | builder.Comment.__init__(self, data) | ||
| 244 | |||
| 245 | def _setData(self, data): | ||
| 246 | data = infosetFilter.coerceComment(data) | ||
| 247 | self._element.text = data | ||
| 248 | |||
| 249 | def _getData(self): | ||
| 250 | return self._element.text | ||
| 251 | |||
| 252 | data = property(_getData, _setData) | ||
| 253 | |||
| 254 | self.elementClass = Element | ||
| 255 | self.commentClass = Comment | ||
| 256 | # self.fragmentClass = builder.DocumentFragment | ||
| 257 | base.TreeBuilder.__init__(self, namespaceHTMLElements) | ||
| 258 | |||
| 259 | def reset(self): | ||
| 260 | base.TreeBuilder.reset(self) | ||
| 261 | self.insertComment = self.insertCommentInitial | ||
| 262 | self.initial_comments = [] | ||
| 263 | self.doctype = None | ||
| 264 | |||
| 265 | def testSerializer(self, element): | ||
| 266 | return testSerializer(element) | ||
| 267 | |||
| 268 | def getDocument(self): | ||
| 269 | if fullTree: | ||
| 270 | return self.document._elementTree | ||
| 271 | else: | ||
| 272 | return self.document._elementTree.getroot() | ||
| 273 | |||
| 274 | def getFragment(self): | ||
| 275 | fragment = [] | ||
| 276 | element = self.openElements[0]._element | ||
| 277 | if element.text: | ||
| 278 | fragment.append(element.text) | ||
| 279 | fragment.extend(list(element)) | ||
| 280 | if element.tail: | ||
| 281 | fragment.append(element.tail) | ||
| 282 | return fragment | ||
| 283 | |||
| 284 | def insertDoctype(self, token): | ||
| 285 | name = token["name"] | ||
| 286 | publicId = token["publicId"] | ||
| 287 | systemId = token["systemId"] | ||
| 288 | |||
| 289 | if not name: | ||
| 290 | warnings.warn("lxml cannot represent empty doctype", DataLossWarning) | ||
| 291 | self.doctype = None | ||
| 292 | else: | ||
| 293 | coercedName = self.infosetFilter.coerceElement(name) | ||
| 294 | if coercedName != name: | ||
| 295 | warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning) | ||
| 296 | |||
| 297 | doctype = self.doctypeClass(coercedName, publicId, systemId) | ||
| 298 | self.doctype = doctype | ||
| 299 | |||
| 300 | def insertCommentInitial(self, data, parent=None): | ||
| 301 | assert parent is None or parent is self.document | ||
| 302 | assert self.document._elementTree is None | ||
| 303 | self.initial_comments.append(data) | ||
| 304 | |||
| 305 | def insertCommentMain(self, data, parent=None): | ||
| 306 | if (parent == self.document and | ||
| 307 | self.document._elementTree.getroot()[-1].tag == comment_type): | ||
| 308 | warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning) | ||
| 309 | super(TreeBuilder, self).insertComment(data, parent) | ||
| 310 | |||
| 311 | def insertRoot(self, token): | ||
| 312 | # Because of the way libxml2 works, it doesn't seem to be possible to | ||
| 313 | # alter information like the doctype after the tree has been parsed. | ||
| 314 | # Therefore we need to use the built-in parser to create our initial | ||
| 315 | # tree, after which we can add elements like normal | ||
| 316 | docStr = "" | ||
| 317 | if self.doctype: | ||
| 318 | assert self.doctype.name | ||
| 319 | docStr += "<!DOCTYPE %s" % self.doctype.name | ||
| 320 | if (self.doctype.publicId is not None or | ||
| 321 | self.doctype.systemId is not None): | ||
| 322 | docStr += (' PUBLIC "%s" ' % | ||
| 323 | (self.infosetFilter.coercePubid(self.doctype.publicId or ""))) | ||
| 324 | if self.doctype.systemId: | ||
| 325 | sysid = self.doctype.systemId | ||
| 326 | if sysid.find("'") >= 0 and sysid.find('"') >= 0: | ||
| 327 | warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning) | ||
| 328 | sysid = sysid.replace("'", 'U00027') | ||
| 329 | if sysid.find("'") >= 0: | ||
| 330 | docStr += '"%s"' % sysid | ||
| 331 | else: | ||
| 332 | docStr += "'%s'" % sysid | ||
| 333 | else: | ||
| 334 | docStr += "''" | ||
| 335 | docStr += ">" | ||
| 336 | if self.doctype.name != token["name"]: | ||
| 337 | warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning) | ||
| 338 | docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>" | ||
| 339 | root = etree.fromstring(docStr) | ||
| 340 | |||
| 341 | # Append the initial comments: | ||
| 342 | for comment_token in self.initial_comments: | ||
| 343 | comment = self.commentClass(comment_token["data"]) | ||
| 344 | root.addprevious(comment._element) | ||
| 345 | |||
| 346 | # Create the root document and add the ElementTree to it | ||
| 347 | self.document = self.documentClass() | ||
| 348 | self.document._elementTree = root.getroottree() | ||
| 349 | |||
| 350 | # Give the root element the right name | ||
| 351 | name = token["name"] | ||
| 352 | namespace = token.get("namespace", self.defaultNamespace) | ||
| 353 | if namespace is None: | ||
| 354 | etree_tag = name | ||
| 355 | else: | ||
| 356 | etree_tag = "{%s}%s" % (namespace, name) | ||
| 357 | root.tag = etree_tag | ||
| 358 | |||
| 359 | # Add the root element to the internal child/open data structures | ||
| 360 | root_element = self.elementClass(name, namespace) | ||
| 361 | root_element._element = root | ||
| 362 | self.document._childNodes.append(root_element) | ||
| 363 | self.openElements.append(root_element) | ||
| 364 | |||
| 365 | # Reset to the default insert comment function | ||
| 366 | self.insertComment = self.insertCommentMain | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py new file mode 100644 index 0000000..31a173d --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py | |||
| @@ -0,0 +1,154 @@ | |||
| 1 | """A collection of modules for iterating through different kinds of | ||
| 2 | tree, generating tokens identical to those produced by the tokenizer | ||
| 3 | module. | ||
| 4 | |||
| 5 | To create a tree walker for a new type of tree, you need to do | ||
| 6 | implement a tree walker object (called TreeWalker by convention) that | ||
| 7 | implements a 'serialize' method taking a tree as sole argument and | ||
| 8 | returning an iterator generating tokens. | ||
| 9 | """ | ||
| 10 | |||
| 11 | from __future__ import absolute_import, division, unicode_literals | ||
| 12 | |||
| 13 | from .. import constants | ||
| 14 | from .._utils import default_etree | ||
| 15 | |||
| 16 | __all__ = ["getTreeWalker", "pprint"] | ||
| 17 | |||
| 18 | treeWalkerCache = {} | ||
| 19 | |||
| 20 | |||
| 21 | def getTreeWalker(treeType, implementation=None, **kwargs): | ||
| 22 | """Get a TreeWalker class for various types of tree with built-in support | ||
| 23 | |||
| 24 | :arg str treeType: the name of the tree type required (case-insensitive). | ||
| 25 | Supported values are: | ||
| 26 | |||
| 27 | * "dom": The xml.dom.minidom DOM implementation | ||
| 28 | * "etree": A generic walker for tree implementations exposing an | ||
| 29 | elementtree-like interface (known to work with ElementTree, | ||
| 30 | cElementTree and lxml.etree). | ||
| 31 | * "lxml": Optimized walker for lxml.etree | ||
| 32 | * "genshi": a Genshi stream | ||
| 33 | |||
| 34 | :arg implementation: A module implementing the tree type e.g. | ||
| 35 | xml.etree.ElementTree or cElementTree (Currently applies to the "etree" | ||
| 36 | tree type only). | ||
| 37 | |||
| 38 | :arg kwargs: keyword arguments passed to the etree walker--for other | ||
| 39 | walkers, this has no effect | ||
| 40 | |||
| 41 | :returns: a TreeWalker class | ||
| 42 | |||
| 43 | """ | ||
| 44 | |||
| 45 | treeType = treeType.lower() | ||
| 46 | if treeType not in treeWalkerCache: | ||
| 47 | if treeType == "dom": | ||
| 48 | from . import dom | ||
| 49 | treeWalkerCache[treeType] = dom.TreeWalker | ||
| 50 | elif treeType == "genshi": | ||
| 51 | from . import genshi | ||
| 52 | treeWalkerCache[treeType] = genshi.TreeWalker | ||
| 53 | elif treeType == "lxml": | ||
| 54 | from . import etree_lxml | ||
| 55 | treeWalkerCache[treeType] = etree_lxml.TreeWalker | ||
| 56 | elif treeType == "etree": | ||
| 57 | from . import etree | ||
| 58 | if implementation is None: | ||
| 59 | implementation = default_etree | ||
| 60 | # XXX: NEVER cache here, caching is done in the etree submodule | ||
| 61 | return etree.getETreeModule(implementation, **kwargs).TreeWalker | ||
| 62 | return treeWalkerCache.get(treeType) | ||
| 63 | |||
| 64 | |||
| 65 | def concatenateCharacterTokens(tokens): | ||
| 66 | pendingCharacters = [] | ||
| 67 | for token in tokens: | ||
| 68 | type = token["type"] | ||
| 69 | if type in ("Characters", "SpaceCharacters"): | ||
| 70 | pendingCharacters.append(token["data"]) | ||
| 71 | else: | ||
| 72 | if pendingCharacters: | ||
| 73 | yield {"type": "Characters", "data": "".join(pendingCharacters)} | ||
| 74 | pendingCharacters = [] | ||
| 75 | yield token | ||
| 76 | if pendingCharacters: | ||
| 77 | yield {"type": "Characters", "data": "".join(pendingCharacters)} | ||
| 78 | |||
| 79 | |||
| 80 | def pprint(walker): | ||
| 81 | """Pretty printer for tree walkers | ||
| 82 | |||
| 83 | Takes a TreeWalker instance and pretty prints the output of walking the tree. | ||
| 84 | |||
| 85 | :arg walker: a TreeWalker instance | ||
| 86 | |||
| 87 | """ | ||
| 88 | output = [] | ||
| 89 | indent = 0 | ||
| 90 | for token in concatenateCharacterTokens(walker): | ||
| 91 | type = token["type"] | ||
| 92 | if type in ("StartTag", "EmptyTag"): | ||
| 93 | # tag name | ||
| 94 | if token["namespace"] and token["namespace"] != constants.namespaces["html"]: | ||
| 95 | if token["namespace"] in constants.prefixes: | ||
| 96 | ns = constants.prefixes[token["namespace"]] | ||
| 97 | else: | ||
| 98 | ns = token["namespace"] | ||
| 99 | name = "%s %s" % (ns, token["name"]) | ||
| 100 | else: | ||
| 101 | name = token["name"] | ||
| 102 | output.append("%s<%s>" % (" " * indent, name)) | ||
| 103 | indent += 2 | ||
| 104 | # attributes (sorted for consistent ordering) | ||
| 105 | attrs = token["data"] | ||
| 106 | for (namespace, localname), value in sorted(attrs.items()): | ||
| 107 | if namespace: | ||
| 108 | if namespace in constants.prefixes: | ||
| 109 | ns = constants.prefixes[namespace] | ||
| 110 | else: | ||
| 111 | ns = namespace | ||
| 112 | name = "%s %s" % (ns, localname) | ||
| 113 | else: | ||
| 114 | name = localname | ||
| 115 | output.append("%s%s=\"%s\"" % (" " * indent, name, value)) | ||
| 116 | # self-closing | ||
| 117 | if type == "EmptyTag": | ||
| 118 | indent -= 2 | ||
| 119 | |||
| 120 | elif type == "EndTag": | ||
| 121 | indent -= 2 | ||
| 122 | |||
| 123 | elif type == "Comment": | ||
| 124 | output.append("%s<!-- %s -->" % (" " * indent, token["data"])) | ||
| 125 | |||
| 126 | elif type == "Doctype": | ||
| 127 | if token["name"]: | ||
| 128 | if token["publicId"]: | ||
| 129 | output.append("""%s<!DOCTYPE %s "%s" "%s">""" % | ||
| 130 | (" " * indent, | ||
| 131 | token["name"], | ||
| 132 | token["publicId"], | ||
| 133 | token["systemId"] if token["systemId"] else "")) | ||
| 134 | elif token["systemId"]: | ||
| 135 | output.append("""%s<!DOCTYPE %s "" "%s">""" % | ||
| 136 | (" " * indent, | ||
| 137 | token["name"], | ||
| 138 | token["systemId"])) | ||
| 139 | else: | ||
| 140 | output.append("%s<!DOCTYPE %s>" % (" " * indent, | ||
| 141 | token["name"])) | ||
| 142 | else: | ||
| 143 | output.append("%s<!DOCTYPE >" % (" " * indent,)) | ||
| 144 | |||
| 145 | elif type == "Characters": | ||
| 146 | output.append("%s\"%s\"" % (" " * indent, token["data"])) | ||
| 147 | |||
| 148 | elif type == "SpaceCharacters": | ||
| 149 | assert False, "concatenateCharacterTokens should have got rid of all Space tokens" | ||
| 150 | |||
| 151 | else: | ||
| 152 | raise ValueError("Unknown token type, %s" % type) | ||
| 153 | |||
| 154 | return "\n".join(output) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/base.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/base.py new file mode 100644 index 0000000..f82984b --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/base.py | |||
| @@ -0,0 +1,252 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | from xml.dom import Node | ||
| 4 | from ..constants import namespaces, voidElements, spaceCharacters | ||
| 5 | |||
| 6 | __all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN", | ||
| 7 | "TreeWalker", "NonRecursiveTreeWalker"] | ||
| 8 | |||
| 9 | DOCUMENT = Node.DOCUMENT_NODE | ||
| 10 | DOCTYPE = Node.DOCUMENT_TYPE_NODE | ||
| 11 | TEXT = Node.TEXT_NODE | ||
| 12 | ELEMENT = Node.ELEMENT_NODE | ||
| 13 | COMMENT = Node.COMMENT_NODE | ||
| 14 | ENTITY = Node.ENTITY_NODE | ||
| 15 | UNKNOWN = "<#UNKNOWN#>" | ||
| 16 | |||
| 17 | spaceCharacters = "".join(spaceCharacters) | ||
| 18 | |||
| 19 | |||
| 20 | class TreeWalker(object): | ||
| 21 | """Walks a tree yielding tokens | ||
| 22 | |||
| 23 | Tokens are dicts that all have a ``type`` field specifying the type of the | ||
| 24 | token. | ||
| 25 | |||
| 26 | """ | ||
| 27 | def __init__(self, tree): | ||
| 28 | """Creates a TreeWalker | ||
| 29 | |||
| 30 | :arg tree: the tree to walk | ||
| 31 | |||
| 32 | """ | ||
| 33 | self.tree = tree | ||
| 34 | |||
| 35 | def __iter__(self): | ||
| 36 | raise NotImplementedError | ||
| 37 | |||
| 38 | def error(self, msg): | ||
| 39 | """Generates an error token with the given message | ||
| 40 | |||
| 41 | :arg msg: the error message | ||
| 42 | |||
| 43 | :returns: SerializeError token | ||
| 44 | |||
| 45 | """ | ||
| 46 | return {"type": "SerializeError", "data": msg} | ||
| 47 | |||
| 48 | def emptyTag(self, namespace, name, attrs, hasChildren=False): | ||
| 49 | """Generates an EmptyTag token | ||
| 50 | |||
| 51 | :arg namespace: the namespace of the token--can be ``None`` | ||
| 52 | |||
| 53 | :arg name: the name of the element | ||
| 54 | |||
| 55 | :arg attrs: the attributes of the element as a dict | ||
| 56 | |||
| 57 | :arg hasChildren: whether or not to yield a SerializationError because | ||
| 58 | this tag shouldn't have children | ||
| 59 | |||
| 60 | :returns: EmptyTag token | ||
| 61 | |||
| 62 | """ | ||
| 63 | yield {"type": "EmptyTag", "name": name, | ||
| 64 | "namespace": namespace, | ||
| 65 | "data": attrs} | ||
| 66 | if hasChildren: | ||
| 67 | yield self.error("Void element has children") | ||
| 68 | |||
| 69 | def startTag(self, namespace, name, attrs): | ||
| 70 | """Generates a StartTag token | ||
| 71 | |||
| 72 | :arg namespace: the namespace of the token--can be ``None`` | ||
| 73 | |||
| 74 | :arg name: the name of the element | ||
| 75 | |||
| 76 | :arg attrs: the attributes of the element as a dict | ||
| 77 | |||
| 78 | :returns: StartTag token | ||
| 79 | |||
| 80 | """ | ||
| 81 | return {"type": "StartTag", | ||
| 82 | "name": name, | ||
| 83 | "namespace": namespace, | ||
| 84 | "data": attrs} | ||
| 85 | |||
| 86 | def endTag(self, namespace, name): | ||
| 87 | """Generates an EndTag token | ||
| 88 | |||
| 89 | :arg namespace: the namespace of the token--can be ``None`` | ||
| 90 | |||
| 91 | :arg name: the name of the element | ||
| 92 | |||
| 93 | :returns: EndTag token | ||
| 94 | |||
| 95 | """ | ||
| 96 | return {"type": "EndTag", | ||
| 97 | "name": name, | ||
| 98 | "namespace": namespace} | ||
| 99 | |||
| 100 | def text(self, data): | ||
| 101 | """Generates SpaceCharacters and Characters tokens | ||
| 102 | |||
| 103 | Depending on what's in the data, this generates one or more | ||
| 104 | ``SpaceCharacters`` and ``Characters`` tokens. | ||
| 105 | |||
| 106 | For example: | ||
| 107 | |||
| 108 | >>> from html5lib.treewalkers.base import TreeWalker | ||
| 109 | >>> # Give it an empty tree just so it instantiates | ||
| 110 | >>> walker = TreeWalker([]) | ||
| 111 | >>> list(walker.text('')) | ||
| 112 | [] | ||
| 113 | >>> list(walker.text(' ')) | ||
| 114 | [{u'data': ' ', u'type': u'SpaceCharacters'}] | ||
| 115 | >>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE | ||
| 116 | [{u'data': ' ', u'type': u'SpaceCharacters'}, | ||
| 117 | {u'data': u'abc', u'type': u'Characters'}, | ||
| 118 | {u'data': u' ', u'type': u'SpaceCharacters'}] | ||
| 119 | |||
| 120 | :arg data: the text data | ||
| 121 | |||
| 122 | :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens | ||
| 123 | |||
| 124 | """ | ||
| 125 | data = data | ||
| 126 | middle = data.lstrip(spaceCharacters) | ||
| 127 | left = data[:len(data) - len(middle)] | ||
| 128 | if left: | ||
| 129 | yield {"type": "SpaceCharacters", "data": left} | ||
| 130 | data = middle | ||
| 131 | middle = data.rstrip(spaceCharacters) | ||
| 132 | right = data[len(middle):] | ||
| 133 | if middle: | ||
| 134 | yield {"type": "Characters", "data": middle} | ||
| 135 | if right: | ||
| 136 | yield {"type": "SpaceCharacters", "data": right} | ||
| 137 | |||
| 138 | def comment(self, data): | ||
| 139 | """Generates a Comment token | ||
| 140 | |||
| 141 | :arg data: the comment | ||
| 142 | |||
| 143 | :returns: Comment token | ||
| 144 | |||
| 145 | """ | ||
| 146 | return {"type": "Comment", "data": data} | ||
| 147 | |||
| 148 | def doctype(self, name, publicId=None, systemId=None): | ||
| 149 | """Generates a Doctype token | ||
| 150 | |||
| 151 | :arg name: | ||
| 152 | |||
| 153 | :arg publicId: | ||
| 154 | |||
| 155 | :arg systemId: | ||
| 156 | |||
| 157 | :returns: the Doctype token | ||
| 158 | |||
| 159 | """ | ||
| 160 | return {"type": "Doctype", | ||
| 161 | "name": name, | ||
| 162 | "publicId": publicId, | ||
| 163 | "systemId": systemId} | ||
| 164 | |||
| 165 | def entity(self, name): | ||
| 166 | """Generates an Entity token | ||
| 167 | |||
| 168 | :arg name: the entity name | ||
| 169 | |||
| 170 | :returns: an Entity token | ||
| 171 | |||
| 172 | """ | ||
| 173 | return {"type": "Entity", "name": name} | ||
| 174 | |||
| 175 | def unknown(self, nodeType): | ||
| 176 | """Handles unknown node types""" | ||
| 177 | return self.error("Unknown node type: " + nodeType) | ||
| 178 | |||
| 179 | |||
| 180 | class NonRecursiveTreeWalker(TreeWalker): | ||
| 181 | def getNodeDetails(self, node): | ||
| 182 | raise NotImplementedError | ||
| 183 | |||
| 184 | def getFirstChild(self, node): | ||
| 185 | raise NotImplementedError | ||
| 186 | |||
| 187 | def getNextSibling(self, node): | ||
| 188 | raise NotImplementedError | ||
| 189 | |||
| 190 | def getParentNode(self, node): | ||
| 191 | raise NotImplementedError | ||
| 192 | |||
| 193 | def __iter__(self): | ||
| 194 | currentNode = self.tree | ||
| 195 | while currentNode is not None: | ||
| 196 | details = self.getNodeDetails(currentNode) | ||
| 197 | type, details = details[0], details[1:] | ||
| 198 | hasChildren = False | ||
| 199 | |||
| 200 | if type == DOCTYPE: | ||
| 201 | yield self.doctype(*details) | ||
| 202 | |||
| 203 | elif type == TEXT: | ||
| 204 | for token in self.text(*details): | ||
| 205 | yield token | ||
| 206 | |||
| 207 | elif type == ELEMENT: | ||
| 208 | namespace, name, attributes, hasChildren = details | ||
| 209 | if (not namespace or namespace == namespaces["html"]) and name in voidElements: | ||
| 210 | for token in self.emptyTag(namespace, name, attributes, | ||
| 211 | hasChildren): | ||
| 212 | yield token | ||
| 213 | hasChildren = False | ||
| 214 | else: | ||
| 215 | yield self.startTag(namespace, name, attributes) | ||
| 216 | |||
| 217 | elif type == COMMENT: | ||
| 218 | yield self.comment(details[0]) | ||
| 219 | |||
| 220 | elif type == ENTITY: | ||
| 221 | yield self.entity(details[0]) | ||
| 222 | |||
| 223 | elif type == DOCUMENT: | ||
| 224 | hasChildren = True | ||
| 225 | |||
| 226 | else: | ||
| 227 | yield self.unknown(details[0]) | ||
| 228 | |||
| 229 | if hasChildren: | ||
| 230 | firstChild = self.getFirstChild(currentNode) | ||
| 231 | else: | ||
| 232 | firstChild = None | ||
| 233 | |||
| 234 | if firstChild is not None: | ||
| 235 | currentNode = firstChild | ||
| 236 | else: | ||
| 237 | while currentNode is not None: | ||
| 238 | details = self.getNodeDetails(currentNode) | ||
| 239 | type, details = details[0], details[1:] | ||
| 240 | if type == ELEMENT: | ||
| 241 | namespace, name, attributes, hasChildren = details | ||
| 242 | if (namespace and namespace != namespaces["html"]) or name not in voidElements: | ||
| 243 | yield self.endTag(namespace, name) | ||
| 244 | if self.tree is currentNode: | ||
| 245 | currentNode = None | ||
| 246 | break | ||
| 247 | nextSibling = self.getNextSibling(currentNode) | ||
| 248 | if nextSibling is not None: | ||
| 249 | currentNode = nextSibling | ||
| 250 | break | ||
| 251 | else: | ||
| 252 | currentNode = self.getParentNode(currentNode) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/dom.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/dom.py new file mode 100644 index 0000000..b3e2753 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/dom.py | |||
| @@ -0,0 +1,43 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | from xml.dom import Node | ||
| 4 | |||
| 5 | from . import base | ||
| 6 | |||
| 7 | |||
| 8 | class TreeWalker(base.NonRecursiveTreeWalker): | ||
| 9 | def getNodeDetails(self, node): | ||
| 10 | if node.nodeType == Node.DOCUMENT_TYPE_NODE: | ||
| 11 | return base.DOCTYPE, node.name, node.publicId, node.systemId | ||
| 12 | |||
| 13 | elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): | ||
| 14 | return base.TEXT, node.nodeValue | ||
| 15 | |||
| 16 | elif node.nodeType == Node.ELEMENT_NODE: | ||
| 17 | attrs = {} | ||
| 18 | for attr in list(node.attributes.keys()): | ||
| 19 | attr = node.getAttributeNode(attr) | ||
| 20 | if attr.namespaceURI: | ||
| 21 | attrs[(attr.namespaceURI, attr.localName)] = attr.value | ||
| 22 | else: | ||
| 23 | attrs[(None, attr.name)] = attr.value | ||
| 24 | return (base.ELEMENT, node.namespaceURI, node.nodeName, | ||
| 25 | attrs, node.hasChildNodes()) | ||
| 26 | |||
| 27 | elif node.nodeType == Node.COMMENT_NODE: | ||
| 28 | return base.COMMENT, node.nodeValue | ||
| 29 | |||
| 30 | elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE): | ||
| 31 | return (base.DOCUMENT,) | ||
| 32 | |||
| 33 | else: | ||
| 34 | return base.UNKNOWN, node.nodeType | ||
| 35 | |||
| 36 | def getFirstChild(self, node): | ||
| 37 | return node.firstChild | ||
| 38 | |||
| 39 | def getNextSibling(self, node): | ||
| 40 | return node.nextSibling | ||
| 41 | |||
| 42 | def getParentNode(self, node): | ||
| 43 | return node.parentNode | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree.py new file mode 100644 index 0000000..1a35add --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree.py | |||
| @@ -0,0 +1,130 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | from collections import OrderedDict | ||
| 4 | import re | ||
| 5 | |||
| 6 | from pip._vendor.six import string_types | ||
| 7 | |||
| 8 | from . import base | ||
| 9 | from .._utils import moduleFactoryFactory | ||
| 10 | |||
| 11 | tag_regexp = re.compile("{([^}]*)}(.*)") | ||
| 12 | |||
| 13 | |||
| 14 | def getETreeBuilder(ElementTreeImplementation): | ||
| 15 | ElementTree = ElementTreeImplementation | ||
| 16 | ElementTreeCommentType = ElementTree.Comment("asd").tag | ||
| 17 | |||
| 18 | class TreeWalker(base.NonRecursiveTreeWalker): # pylint:disable=unused-variable | ||
| 19 | """Given the particular ElementTree representation, this implementation, | ||
| 20 | to avoid using recursion, returns "nodes" as tuples with the following | ||
| 21 | content: | ||
| 22 | |||
| 23 | 1. The current element | ||
| 24 | |||
| 25 | 2. The index of the element relative to its parent | ||
| 26 | |||
| 27 | 3. A stack of ancestor elements | ||
| 28 | |||
| 29 | 4. A flag "text", "tail" or None to indicate if the current node is a | ||
| 30 | text node; either the text or tail of the current element (1) | ||
| 31 | """ | ||
| 32 | def getNodeDetails(self, node): | ||
| 33 | if isinstance(node, tuple): # It might be the root Element | ||
| 34 | elt, _, _, flag = node | ||
| 35 | if flag in ("text", "tail"): | ||
| 36 | return base.TEXT, getattr(elt, flag) | ||
| 37 | else: | ||
| 38 | node = elt | ||
| 39 | |||
| 40 | if not(hasattr(node, "tag")): | ||
| 41 | node = node.getroot() | ||
| 42 | |||
| 43 | if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"): | ||
| 44 | return (base.DOCUMENT,) | ||
| 45 | |||
| 46 | elif node.tag == "<!DOCTYPE>": | ||
| 47 | return (base.DOCTYPE, node.text, | ||
| 48 | node.get("publicId"), node.get("systemId")) | ||
| 49 | |||
| 50 | elif node.tag == ElementTreeCommentType: | ||
| 51 | return base.COMMENT, node.text | ||
| 52 | |||
| 53 | else: | ||
| 54 | assert isinstance(node.tag, string_types), type(node.tag) | ||
| 55 | # This is assumed to be an ordinary element | ||
| 56 | match = tag_regexp.match(node.tag) | ||
| 57 | if match: | ||
| 58 | namespace, tag = match.groups() | ||
| 59 | else: | ||
| 60 | namespace = None | ||
| 61 | tag = node.tag | ||
| 62 | attrs = OrderedDict() | ||
| 63 | for name, value in list(node.attrib.items()): | ||
| 64 | match = tag_regexp.match(name) | ||
| 65 | if match: | ||
| 66 | attrs[(match.group(1), match.group(2))] = value | ||
| 67 | else: | ||
| 68 | attrs[(None, name)] = value | ||
| 69 | return (base.ELEMENT, namespace, tag, | ||
| 70 | attrs, len(node) or node.text) | ||
| 71 | |||
| 72 | def getFirstChild(self, node): | ||
| 73 | if isinstance(node, tuple): | ||
| 74 | element, key, parents, flag = node | ||
| 75 | else: | ||
| 76 | element, key, parents, flag = node, None, [], None | ||
| 77 | |||
| 78 | if flag in ("text", "tail"): | ||
| 79 | return None | ||
| 80 | else: | ||
| 81 | if element.text: | ||
| 82 | return element, key, parents, "text" | ||
| 83 | elif len(element): | ||
| 84 | parents.append(element) | ||
| 85 | return element[0], 0, parents, None | ||
| 86 | else: | ||
| 87 | return None | ||
| 88 | |||
| 89 | def getNextSibling(self, node): | ||
| 90 | if isinstance(node, tuple): | ||
| 91 | element, key, parents, flag = node | ||
| 92 | else: | ||
| 93 | return None | ||
| 94 | |||
| 95 | if flag == "text": | ||
| 96 | if len(element): | ||
| 97 | parents.append(element) | ||
| 98 | return element[0], 0, parents, None | ||
| 99 | else: | ||
| 100 | return None | ||
| 101 | else: | ||
| 102 | if element.tail and flag != "tail": | ||
| 103 | return element, key, parents, "tail" | ||
| 104 | elif key < len(parents[-1]) - 1: | ||
| 105 | return parents[-1][key + 1], key + 1, parents, None | ||
| 106 | else: | ||
| 107 | return None | ||
| 108 | |||
| 109 | def getParentNode(self, node): | ||
| 110 | if isinstance(node, tuple): | ||
| 111 | element, key, parents, flag = node | ||
| 112 | else: | ||
| 113 | return None | ||
| 114 | |||
| 115 | if flag == "text": | ||
| 116 | if not parents: | ||
| 117 | return element | ||
| 118 | else: | ||
| 119 | return element, key, parents, None | ||
| 120 | else: | ||
| 121 | parent = parents.pop() | ||
| 122 | if not parents: | ||
| 123 | return parent | ||
| 124 | else: | ||
| 125 | assert list(parents[-1]).count(parent) == 1 | ||
| 126 | return parent, list(parents[-1]).index(parent), parents, None | ||
| 127 | |||
| 128 | return locals() | ||
| 129 | |||
| 130 | getETreeModule = moduleFactoryFactory(getETreeBuilder) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree_lxml.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree_lxml.py new file mode 100644 index 0000000..f6f395a --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree_lxml.py | |||
| @@ -0,0 +1,213 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | from pip._vendor.six import text_type | ||
| 3 | |||
| 4 | from lxml import etree | ||
| 5 | from ..treebuilders.etree import tag_regexp | ||
| 6 | |||
| 7 | from . import base | ||
| 8 | |||
| 9 | from .. import _ihatexml | ||
| 10 | |||
| 11 | |||
| 12 | def ensure_str(s): | ||
| 13 | if s is None: | ||
| 14 | return None | ||
| 15 | elif isinstance(s, text_type): | ||
| 16 | return s | ||
| 17 | else: | ||
| 18 | return s.decode("ascii", "strict") | ||
| 19 | |||
| 20 | |||
| 21 | class Root(object): | ||
| 22 | def __init__(self, et): | ||
| 23 | self.elementtree = et | ||
| 24 | self.children = [] | ||
| 25 | |||
| 26 | try: | ||
| 27 | if et.docinfo.internalDTD: | ||
| 28 | self.children.append(Doctype(self, | ||
| 29 | ensure_str(et.docinfo.root_name), | ||
| 30 | ensure_str(et.docinfo.public_id), | ||
| 31 | ensure_str(et.docinfo.system_url))) | ||
| 32 | except AttributeError: | ||
| 33 | pass | ||
| 34 | |||
| 35 | try: | ||
| 36 | node = et.getroot() | ||
| 37 | except AttributeError: | ||
| 38 | node = et | ||
| 39 | |||
| 40 | while node.getprevious() is not None: | ||
| 41 | node = node.getprevious() | ||
| 42 | while node is not None: | ||
| 43 | self.children.append(node) | ||
| 44 | node = node.getnext() | ||
| 45 | |||
| 46 | self.text = None | ||
| 47 | self.tail = None | ||
| 48 | |||
| 49 | def __getitem__(self, key): | ||
| 50 | return self.children[key] | ||
| 51 | |||
| 52 | def getnext(self): | ||
| 53 | return None | ||
| 54 | |||
| 55 | def __len__(self): | ||
| 56 | return 1 | ||
| 57 | |||
| 58 | |||
| 59 | class Doctype(object): | ||
| 60 | def __init__(self, root_node, name, public_id, system_id): | ||
| 61 | self.root_node = root_node | ||
| 62 | self.name = name | ||
| 63 | self.public_id = public_id | ||
| 64 | self.system_id = system_id | ||
| 65 | |||
| 66 | self.text = None | ||
| 67 | self.tail = None | ||
| 68 | |||
| 69 | def getnext(self): | ||
| 70 | return self.root_node.children[1] | ||
| 71 | |||
| 72 | |||
| 73 | class FragmentRoot(Root): | ||
| 74 | def __init__(self, children): | ||
| 75 | self.children = [FragmentWrapper(self, child) for child in children] | ||
| 76 | self.text = self.tail = None | ||
| 77 | |||
| 78 | def getnext(self): | ||
| 79 | return None | ||
| 80 | |||
| 81 | |||
| 82 | class FragmentWrapper(object): | ||
| 83 | def __init__(self, fragment_root, obj): | ||
| 84 | self.root_node = fragment_root | ||
| 85 | self.obj = obj | ||
| 86 | if hasattr(self.obj, 'text'): | ||
| 87 | self.text = ensure_str(self.obj.text) | ||
| 88 | else: | ||
| 89 | self.text = None | ||
| 90 | if hasattr(self.obj, 'tail'): | ||
| 91 | self.tail = ensure_str(self.obj.tail) | ||
| 92 | else: | ||
| 93 | self.tail = None | ||
| 94 | |||
| 95 | def __getattr__(self, name): | ||
| 96 | return getattr(self.obj, name) | ||
| 97 | |||
| 98 | def getnext(self): | ||
| 99 | siblings = self.root_node.children | ||
| 100 | idx = siblings.index(self) | ||
| 101 | if idx < len(siblings) - 1: | ||
| 102 | return siblings[idx + 1] | ||
| 103 | else: | ||
| 104 | return None | ||
| 105 | |||
| 106 | def __getitem__(self, key): | ||
| 107 | return self.obj[key] | ||
| 108 | |||
| 109 | def __bool__(self): | ||
| 110 | return bool(self.obj) | ||
| 111 | |||
| 112 | def getparent(self): | ||
| 113 | return None | ||
| 114 | |||
| 115 | def __str__(self): | ||
| 116 | return str(self.obj) | ||
| 117 | |||
| 118 | def __unicode__(self): | ||
| 119 | return str(self.obj) | ||
| 120 | |||
| 121 | def __len__(self): | ||
| 122 | return len(self.obj) | ||
| 123 | |||
| 124 | |||
| 125 | class TreeWalker(base.NonRecursiveTreeWalker): | ||
| 126 | def __init__(self, tree): | ||
| 127 | # pylint:disable=redefined-variable-type | ||
| 128 | if isinstance(tree, list): | ||
| 129 | self.fragmentChildren = set(tree) | ||
| 130 | tree = FragmentRoot(tree) | ||
| 131 | else: | ||
| 132 | self.fragmentChildren = set() | ||
| 133 | tree = Root(tree) | ||
| 134 | base.NonRecursiveTreeWalker.__init__(self, tree) | ||
| 135 | self.filter = _ihatexml.InfosetFilter() | ||
| 136 | |||
| 137 | def getNodeDetails(self, node): | ||
| 138 | if isinstance(node, tuple): # Text node | ||
| 139 | node, key = node | ||
| 140 | assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key | ||
| 141 | return base.TEXT, ensure_str(getattr(node, key)) | ||
| 142 | |||
| 143 | elif isinstance(node, Root): | ||
| 144 | return (base.DOCUMENT,) | ||
| 145 | |||
| 146 | elif isinstance(node, Doctype): | ||
| 147 | return base.DOCTYPE, node.name, node.public_id, node.system_id | ||
| 148 | |||
| 149 | elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): | ||
| 150 | return base.TEXT, ensure_str(node.obj) | ||
| 151 | |||
| 152 | elif node.tag == etree.Comment: | ||
| 153 | return base.COMMENT, ensure_str(node.text) | ||
| 154 | |||
| 155 | elif node.tag == etree.Entity: | ||
| 156 | return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; | ||
| 157 | |||
| 158 | else: | ||
| 159 | # This is assumed to be an ordinary element | ||
| 160 | match = tag_regexp.match(ensure_str(node.tag)) | ||
| 161 | if match: | ||
| 162 | namespace, tag = match.groups() | ||
| 163 | else: | ||
| 164 | namespace = None | ||
| 165 | tag = ensure_str(node.tag) | ||
| 166 | attrs = {} | ||
| 167 | for name, value in list(node.attrib.items()): | ||
| 168 | name = ensure_str(name) | ||
| 169 | value = ensure_str(value) | ||
| 170 | match = tag_regexp.match(name) | ||
| 171 | if match: | ||
| 172 | attrs[(match.group(1), match.group(2))] = value | ||
| 173 | else: | ||
| 174 | attrs[(None, name)] = value | ||
| 175 | return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), | ||
| 176 | attrs, len(node) > 0 or node.text) | ||
| 177 | |||
| 178 | def getFirstChild(self, node): | ||
| 179 | assert not isinstance(node, tuple), "Text nodes have no children" | ||
| 180 | |||
| 181 | assert len(node) or node.text, "Node has no children" | ||
| 182 | if node.text: | ||
| 183 | return (node, "text") | ||
| 184 | else: | ||
| 185 | return node[0] | ||
| 186 | |||
| 187 | def getNextSibling(self, node): | ||
| 188 | if isinstance(node, tuple): # Text node | ||
| 189 | node, key = node | ||
| 190 | assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key | ||
| 191 | if key == "text": | ||
| 192 | # XXX: we cannot use a "bool(node) and node[0] or None" construct here | ||
| 193 | # because node[0] might evaluate to False if it has no child element | ||
| 194 | if len(node): | ||
| 195 | return node[0] | ||
| 196 | else: | ||
| 197 | return None | ||
| 198 | else: # tail | ||
| 199 | return node.getnext() | ||
| 200 | |||
| 201 | return (node, "tail") if node.tail else node.getnext() | ||
| 202 | |||
| 203 | def getParentNode(self, node): | ||
| 204 | if isinstance(node, tuple): # Text node | ||
| 205 | node, key = node | ||
| 206 | assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key | ||
| 207 | if key == "text": | ||
| 208 | return node | ||
| 209 | # else: fallback to "normal" processing | ||
| 210 | elif node in self.fragmentChildren: | ||
| 211 | return None | ||
| 212 | |||
| 213 | return node.getparent() | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/genshi.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/genshi.py new file mode 100644 index 0000000..42cd559 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/genshi.py | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | from __future__ import absolute_import, division, unicode_literals | ||
| 2 | |||
| 3 | from genshi.core import QName | ||
| 4 | from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT | ||
| 5 | from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT | ||
| 6 | |||
| 7 | from . import base | ||
| 8 | |||
| 9 | from ..constants import voidElements, namespaces | ||
| 10 | |||
| 11 | |||
| 12 | class TreeWalker(base.TreeWalker): | ||
| 13 | def __iter__(self): | ||
| 14 | # Buffer the events so we can pass in the following one | ||
| 15 | previous = None | ||
| 16 | for event in self.tree: | ||
| 17 | if previous is not None: | ||
| 18 | for token in self.tokens(previous, event): | ||
| 19 | yield token | ||
| 20 | previous = event | ||
| 21 | |||
| 22 | # Don't forget the final event! | ||
| 23 | if previous is not None: | ||
| 24 | for token in self.tokens(previous, None): | ||
| 25 | yield token | ||
| 26 | |||
| 27 | def tokens(self, event, next): | ||
| 28 | kind, data, _ = event | ||
| 29 | if kind == START: | ||
| 30 | tag, attribs = data | ||
| 31 | name = tag.localname | ||
| 32 | namespace = tag.namespace | ||
| 33 | converted_attribs = {} | ||
| 34 | for k, v in attribs: | ||
| 35 | if isinstance(k, QName): | ||
| 36 | converted_attribs[(k.namespace, k.localname)] = v | ||
| 37 | else: | ||
| 38 | converted_attribs[(None, k)] = v | ||
| 39 | |||
| 40 | if namespace == namespaces["html"] and name in voidElements: | ||
| 41 | for token in self.emptyTag(namespace, name, converted_attribs, | ||
| 42 | not next or next[0] != END or | ||
| 43 | next[1] != tag): | ||
| 44 | yield token | ||
| 45 | else: | ||
| 46 | yield self.startTag(namespace, name, converted_attribs) | ||
| 47 | |||
| 48 | elif kind == END: | ||
| 49 | name = data.localname | ||
| 50 | namespace = data.namespace | ||
| 51 | if namespace != namespaces["html"] or name not in voidElements: | ||
| 52 | yield self.endTag(namespace, name) | ||
| 53 | |||
| 54 | elif kind == COMMENT: | ||
| 55 | yield self.comment(data) | ||
| 56 | |||
| 57 | elif kind == TEXT: | ||
| 58 | for token in self.text(data): | ||
| 59 | yield token | ||
| 60 | |||
| 61 | elif kind == DOCTYPE: | ||
| 62 | yield self.doctype(*data) | ||
| 63 | |||
| 64 | elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, | ||
| 65 | START_CDATA, END_CDATA, PI): | ||
| 66 | pass | ||
| 67 | |||
| 68 | else: | ||
| 69 | yield self.unknown(kind) | ||
