diff options
author | Shubham Saini <shubham6405@gmail.com> | 2018-12-11 10:01:23 +0000 |
---|---|---|
committer | Shubham Saini <shubham6405@gmail.com> | 2018-12-11 10:01:23 +0000 |
commit | 68df54d6629ec019142eb149dd037774f2d11e7c (patch) | |
tree | 345bc22d46b4e01a4ba8303b94278952a4ed2b9e /venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib |
First commit
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib')
34 files changed, 13190 insertions, 0 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/__init__.py new file mode 100644 index 0000000..0b54002 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/__init__.py | |||
@@ -0,0 +1,35 @@ | |||
1 | """ | ||
2 | HTML parsing library based on the `WHATWG HTML specification | ||
3 | <https://whatwg.org/html>`_. The parser is designed to be compatible with | ||
4 | existing HTML found in the wild and implements well-defined error recovery that | ||
5 | is largely compatible with modern desktop web browsers. | ||
6 | |||
7 | Example usage:: | ||
8 | |||
9 | from pip._vendor import html5lib | ||
10 | with open("my_document.html", "rb") as f: | ||
11 | tree = html5lib.parse(f) | ||
12 | |||
13 | For convenience, this module re-exports the following names: | ||
14 | |||
15 | * :func:`~.html5parser.parse` | ||
16 | * :func:`~.html5parser.parseFragment` | ||
17 | * :class:`~.html5parser.HTMLParser` | ||
18 | * :func:`~.treebuilders.getTreeBuilder` | ||
19 | * :func:`~.treewalkers.getTreeWalker` | ||
20 | * :func:`~.serializer.serialize` | ||
21 | """ | ||
22 | |||
23 | from __future__ import absolute_import, division, unicode_literals | ||
24 | |||
25 | from .html5parser import HTMLParser, parse, parseFragment | ||
26 | from .treebuilders import getTreeBuilder | ||
27 | from .treewalkers import getTreeWalker | ||
28 | from .serializer import serialize | ||
29 | |||
30 | __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", | ||
31 | "getTreeWalker", "serialize"] | ||
32 | |||
33 | # this has to be at the top level, see how setup.py parses this | ||
34 | #: Distribution version number. | ||
35 | __version__ = "1.0.1" | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_ihatexml.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_ihatexml.py new file mode 100644 index 0000000..68f9b1e --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_ihatexml.py | |||
@@ -0,0 +1,288 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | import re | ||
4 | import warnings | ||
5 | |||
6 | from .constants import DataLossWarning | ||
7 | |||
8 | baseChar = """ | ||
9 | [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | | ||
10 | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | | ||
11 | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | | ||
12 | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | | ||
13 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | | ||
14 | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | | ||
15 | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | | ||
16 | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | | ||
17 | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | | ||
18 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | | ||
19 | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | | ||
20 | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | | ||
21 | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | | ||
22 | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | | ||
23 | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | | ||
24 | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | | ||
25 | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | | ||
26 | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | | ||
27 | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | | ||
28 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | | ||
29 | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | | ||
30 | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | | ||
31 | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | | ||
32 | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | | ||
33 | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | | ||
34 | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | | ||
35 | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | | ||
36 | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | | ||
37 | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | | ||
38 | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | | ||
39 | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | | ||
40 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | | ||
41 | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | | ||
42 | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | | ||
43 | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | | ||
44 | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | | ||
45 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | | ||
46 | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | | ||
47 | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | | ||
48 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | | ||
49 | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | | ||
50 | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | | ||
51 | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | | ||
52 | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | | ||
53 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | | ||
54 | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]""" | ||
55 | |||
56 | ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]""" | ||
57 | |||
58 | combiningCharacter = """ | ||
59 | [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | | ||
60 | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | | ||
61 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | | ||
62 | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | | ||
63 | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | | ||
64 | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | | ||
65 | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | | ||
66 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | | ||
67 | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | | ||
68 | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | | ||
69 | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | | ||
70 | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | | ||
71 | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | | ||
72 | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | | ||
73 | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | | ||
74 | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | | ||
75 | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | | ||
76 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | | ||
77 | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | | ||
78 | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | | ||
79 | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | | ||
80 | #x3099 | #x309A""" | ||
81 | |||
82 | digit = """ | ||
83 | [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | | ||
84 | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | | ||
85 | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | | ||
86 | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]""" | ||
87 | |||
88 | extender = """ | ||
89 | #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | | ||
90 | #[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]""" | ||
91 | |||
92 | letter = " | ".join([baseChar, ideographic]) | ||
93 | |||
94 | # Without the | ||
95 | name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter, | ||
96 | extender]) | ||
97 | nameFirst = " | ".join([letter, "_"]) | ||
98 | |||
99 | reChar = re.compile(r"#x([\d|A-F]{4,4})") | ||
100 | reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]") | ||
101 | |||
102 | |||
103 | def charStringToList(chars): | ||
104 | charRanges = [item.strip() for item in chars.split(" | ")] | ||
105 | rv = [] | ||
106 | for item in charRanges: | ||
107 | foundMatch = False | ||
108 | for regexp in (reChar, reCharRange): | ||
109 | match = regexp.match(item) | ||
110 | if match is not None: | ||
111 | rv.append([hexToInt(item) for item in match.groups()]) | ||
112 | if len(rv[-1]) == 1: | ||
113 | rv[-1] = rv[-1] * 2 | ||
114 | foundMatch = True | ||
115 | break | ||
116 | if not foundMatch: | ||
117 | assert len(item) == 1 | ||
118 | |||
119 | rv.append([ord(item)] * 2) | ||
120 | rv = normaliseCharList(rv) | ||
121 | return rv | ||
122 | |||
123 | |||
124 | def normaliseCharList(charList): | ||
125 | charList = sorted(charList) | ||
126 | for item in charList: | ||
127 | assert item[1] >= item[0] | ||
128 | rv = [] | ||
129 | i = 0 | ||
130 | while i < len(charList): | ||
131 | j = 1 | ||
132 | rv.append(charList[i]) | ||
133 | while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1: | ||
134 | rv[-1][1] = charList[i + j][1] | ||
135 | j += 1 | ||
136 | i += j | ||
137 | return rv | ||
138 | |||
139 | # We don't really support characters above the BMP :( | ||
140 | max_unicode = int("FFFF", 16) | ||
141 | |||
142 | |||
143 | def missingRanges(charList): | ||
144 | rv = [] | ||
145 | if charList[0] != 0: | ||
146 | rv.append([0, charList[0][0] - 1]) | ||
147 | for i, item in enumerate(charList[:-1]): | ||
148 | rv.append([item[1] + 1, charList[i + 1][0] - 1]) | ||
149 | if charList[-1][1] != max_unicode: | ||
150 | rv.append([charList[-1][1] + 1, max_unicode]) | ||
151 | return rv | ||
152 | |||
153 | |||
154 | def listToRegexpStr(charList): | ||
155 | rv = [] | ||
156 | for item in charList: | ||
157 | if item[0] == item[1]: | ||
158 | rv.append(escapeRegexp(chr(item[0]))) | ||
159 | else: | ||
160 | rv.append(escapeRegexp(chr(item[0])) + "-" + | ||
161 | escapeRegexp(chr(item[1]))) | ||
162 | return "[%s]" % "".join(rv) | ||
163 | |||
164 | |||
165 | def hexToInt(hex_str): | ||
166 | return int(hex_str, 16) | ||
167 | |||
168 | |||
169 | def escapeRegexp(string): | ||
170 | specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}", | ||
171 | "[", "]", "|", "(", ")", "-") | ||
172 | for char in specialCharacters: | ||
173 | string = string.replace(char, "\\" + char) | ||
174 | |||
175 | return string | ||
176 | |||
177 | # output from the above | ||
178 | nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa | ||
179 | |||
180 | nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa | ||
181 | |||
182 | # Simpler things | ||
183 | nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]") | ||
184 | |||
185 | |||
186 | class InfosetFilter(object): | ||
187 | replacementRegexp = re.compile(r"U[\dA-F]{5,5}") | ||
188 | |||
189 | def __init__(self, | ||
190 | dropXmlnsLocalName=False, | ||
191 | dropXmlnsAttrNs=False, | ||
192 | preventDoubleDashComments=False, | ||
193 | preventDashAtCommentEnd=False, | ||
194 | replaceFormFeedCharacters=True, | ||
195 | preventSingleQuotePubid=False): | ||
196 | |||
197 | self.dropXmlnsLocalName = dropXmlnsLocalName | ||
198 | self.dropXmlnsAttrNs = dropXmlnsAttrNs | ||
199 | |||
200 | self.preventDoubleDashComments = preventDoubleDashComments | ||
201 | self.preventDashAtCommentEnd = preventDashAtCommentEnd | ||
202 | |||
203 | self.replaceFormFeedCharacters = replaceFormFeedCharacters | ||
204 | |||
205 | self.preventSingleQuotePubid = preventSingleQuotePubid | ||
206 | |||
207 | self.replaceCache = {} | ||
208 | |||
209 | def coerceAttribute(self, name, namespace=None): | ||
210 | if self.dropXmlnsLocalName and name.startswith("xmlns:"): | ||
211 | warnings.warn("Attributes cannot begin with xmlns", DataLossWarning) | ||
212 | return None | ||
213 | elif (self.dropXmlnsAttrNs and | ||
214 | namespace == "http://www.w3.org/2000/xmlns/"): | ||
215 | warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning) | ||
216 | return None | ||
217 | else: | ||
218 | return self.toXmlName(name) | ||
219 | |||
220 | def coerceElement(self, name): | ||
221 | return self.toXmlName(name) | ||
222 | |||
223 | def coerceComment(self, data): | ||
224 | if self.preventDoubleDashComments: | ||
225 | while "--" in data: | ||
226 | warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) | ||
227 | data = data.replace("--", "- -") | ||
228 | if data.endswith("-"): | ||
229 | warnings.warn("Comments cannot end in a dash", DataLossWarning) | ||
230 | data += " " | ||
231 | return data | ||
232 | |||
233 | def coerceCharacters(self, data): | ||
234 | if self.replaceFormFeedCharacters: | ||
235 | for _ in range(data.count("\x0C")): | ||
236 | warnings.warn("Text cannot contain U+000C", DataLossWarning) | ||
237 | data = data.replace("\x0C", " ") | ||
238 | # Other non-xml characters | ||
239 | return data | ||
240 | |||
241 | def coercePubid(self, data): | ||
242 | dataOutput = data | ||
243 | for char in nonPubidCharRegexp.findall(data): | ||
244 | warnings.warn("Coercing non-XML pubid", DataLossWarning) | ||
245 | replacement = self.getReplacementCharacter(char) | ||
246 | dataOutput = dataOutput.replace(char, replacement) | ||
247 | if self.preventSingleQuotePubid and dataOutput.find("'") >= 0: | ||
248 | warnings.warn("Pubid cannot contain single quote", DataLossWarning) | ||
249 | dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'")) | ||
250 | return dataOutput | ||
251 | |||
252 | def toXmlName(self, name): | ||
253 | nameFirst = name[0] | ||
254 | nameRest = name[1:] | ||
255 | m = nonXmlNameFirstBMPRegexp.match(nameFirst) | ||
256 | if m: | ||
257 | warnings.warn("Coercing non-XML name", DataLossWarning) | ||
258 | nameFirstOutput = self.getReplacementCharacter(nameFirst) | ||
259 | else: | ||
260 | nameFirstOutput = nameFirst | ||
261 | |||
262 | nameRestOutput = nameRest | ||
263 | replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest)) | ||
264 | for char in replaceChars: | ||
265 | warnings.warn("Coercing non-XML name", DataLossWarning) | ||
266 | replacement = self.getReplacementCharacter(char) | ||
267 | nameRestOutput = nameRestOutput.replace(char, replacement) | ||
268 | return nameFirstOutput + nameRestOutput | ||
269 | |||
270 | def getReplacementCharacter(self, char): | ||
271 | if char in self.replaceCache: | ||
272 | replacement = self.replaceCache[char] | ||
273 | else: | ||
274 | replacement = self.escapeChar(char) | ||
275 | return replacement | ||
276 | |||
277 | def fromXmlName(self, name): | ||
278 | for item in set(self.replacementRegexp.findall(name)): | ||
279 | name = name.replace(item, self.unescapeChar(item)) | ||
280 | return name | ||
281 | |||
282 | def escapeChar(self, char): | ||
283 | replacement = "U%05X" % ord(char) | ||
284 | self.replaceCache[char] = replacement | ||
285 | return replacement | ||
286 | |||
287 | def unescapeChar(self, charcode): | ||
288 | return chr(int(charcode[1:], 16)) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_inputstream.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_inputstream.py new file mode 100644 index 0000000..21c6bbc --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_inputstream.py | |||
@@ -0,0 +1,923 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from pip._vendor.six import text_type, binary_type | ||
4 | from pip._vendor.six.moves import http_client, urllib | ||
5 | |||
6 | import codecs | ||
7 | import re | ||
8 | |||
9 | from pip._vendor import webencodings | ||
10 | |||
11 | from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase | ||
12 | from .constants import _ReparseException | ||
13 | from . import _utils | ||
14 | |||
15 | from io import StringIO | ||
16 | |||
17 | try: | ||
18 | from io import BytesIO | ||
19 | except ImportError: | ||
20 | BytesIO = StringIO | ||
21 | |||
22 | # Non-unicode versions of constants for use in the pre-parser | ||
23 | spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters]) | ||
24 | asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters]) | ||
25 | asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase]) | ||
26 | spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) | ||
27 | |||
28 | |||
29 | invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa | ||
30 | |||
31 | if _utils.supports_lone_surrogates: | ||
32 | # Use one extra step of indirection and create surrogates with | ||
33 | # eval. Not using this indirection would introduce an illegal | ||
34 | # unicode literal on platforms not supporting such lone | ||
35 | # surrogates. | ||
36 | assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1 | ||
37 | invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] + | ||
38 | eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used | ||
39 | "]") | ||
40 | else: | ||
41 | invalid_unicode_re = re.compile(invalid_unicode_no_surrogate) | ||
42 | |||
43 | non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, | ||
44 | 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, | ||
45 | 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, | ||
46 | 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, | ||
47 | 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, | ||
48 | 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, | ||
49 | 0x10FFFE, 0x10FFFF]) | ||
50 | |||
51 | ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]") | ||
52 | |||
53 | # Cache for charsUntil() | ||
54 | charsUntilRegEx = {} | ||
55 | |||
56 | |||
57 | class BufferedStream(object): | ||
58 | """Buffering for streams that do not have buffering of their own | ||
59 | |||
60 | The buffer is implemented as a list of chunks on the assumption that | ||
61 | joining many strings will be slow since it is O(n**2) | ||
62 | """ | ||
63 | |||
64 | def __init__(self, stream): | ||
65 | self.stream = stream | ||
66 | self.buffer = [] | ||
67 | self.position = [-1, 0] # chunk number, offset | ||
68 | |||
69 | def tell(self): | ||
70 | pos = 0 | ||
71 | for chunk in self.buffer[:self.position[0]]: | ||
72 | pos += len(chunk) | ||
73 | pos += self.position[1] | ||
74 | return pos | ||
75 | |||
76 | def seek(self, pos): | ||
77 | assert pos <= self._bufferedBytes() | ||
78 | offset = pos | ||
79 | i = 0 | ||
80 | while len(self.buffer[i]) < offset: | ||
81 | offset -= len(self.buffer[i]) | ||
82 | i += 1 | ||
83 | self.position = [i, offset] | ||
84 | |||
85 | def read(self, bytes): | ||
86 | if not self.buffer: | ||
87 | return self._readStream(bytes) | ||
88 | elif (self.position[0] == len(self.buffer) and | ||
89 | self.position[1] == len(self.buffer[-1])): | ||
90 | return self._readStream(bytes) | ||
91 | else: | ||
92 | return self._readFromBuffer(bytes) | ||
93 | |||
94 | def _bufferedBytes(self): | ||
95 | return sum([len(item) for item in self.buffer]) | ||
96 | |||
97 | def _readStream(self, bytes): | ||
98 | data = self.stream.read(bytes) | ||
99 | self.buffer.append(data) | ||
100 | self.position[0] += 1 | ||
101 | self.position[1] = len(data) | ||
102 | return data | ||
103 | |||
104 | def _readFromBuffer(self, bytes): | ||
105 | remainingBytes = bytes | ||
106 | rv = [] | ||
107 | bufferIndex = self.position[0] | ||
108 | bufferOffset = self.position[1] | ||
109 | while bufferIndex < len(self.buffer) and remainingBytes != 0: | ||
110 | assert remainingBytes > 0 | ||
111 | bufferedData = self.buffer[bufferIndex] | ||
112 | |||
113 | if remainingBytes <= len(bufferedData) - bufferOffset: | ||
114 | bytesToRead = remainingBytes | ||
115 | self.position = [bufferIndex, bufferOffset + bytesToRead] | ||
116 | else: | ||
117 | bytesToRead = len(bufferedData) - bufferOffset | ||
118 | self.position = [bufferIndex, len(bufferedData)] | ||
119 | bufferIndex += 1 | ||
120 | rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead]) | ||
121 | remainingBytes -= bytesToRead | ||
122 | |||
123 | bufferOffset = 0 | ||
124 | |||
125 | if remainingBytes: | ||
126 | rv.append(self._readStream(remainingBytes)) | ||
127 | |||
128 | return b"".join(rv) | ||
129 | |||
130 | |||
131 | def HTMLInputStream(source, **kwargs): | ||
132 | # Work around Python bug #20007: read(0) closes the connection. | ||
133 | # http://bugs.python.org/issue20007 | ||
134 | if (isinstance(source, http_client.HTTPResponse) or | ||
135 | # Also check for addinfourl wrapping HTTPResponse | ||
136 | (isinstance(source, urllib.response.addbase) and | ||
137 | isinstance(source.fp, http_client.HTTPResponse))): | ||
138 | isUnicode = False | ||
139 | elif hasattr(source, "read"): | ||
140 | isUnicode = isinstance(source.read(0), text_type) | ||
141 | else: | ||
142 | isUnicode = isinstance(source, text_type) | ||
143 | |||
144 | if isUnicode: | ||
145 | encodings = [x for x in kwargs if x.endswith("_encoding")] | ||
146 | if encodings: | ||
147 | raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings) | ||
148 | |||
149 | return HTMLUnicodeInputStream(source, **kwargs) | ||
150 | else: | ||
151 | return HTMLBinaryInputStream(source, **kwargs) | ||
152 | |||
153 | |||
154 | class HTMLUnicodeInputStream(object): | ||
155 | """Provides a unicode stream of characters to the HTMLTokenizer. | ||
156 | |||
157 | This class takes care of character encoding and removing or replacing | ||
158 | incorrect byte-sequences and also provides column and line tracking. | ||
159 | |||
160 | """ | ||
161 | |||
162 | _defaultChunkSize = 10240 | ||
163 | |||
164 | def __init__(self, source): | ||
165 | """Initialises the HTMLInputStream. | ||
166 | |||
167 | HTMLInputStream(source, [encoding]) -> Normalized stream from source | ||
168 | for use by html5lib. | ||
169 | |||
170 | source can be either a file-object, local filename or a string. | ||
171 | |||
172 | The optional encoding parameter must be a string that indicates | ||
173 | the encoding. If specified, that encoding will be used, | ||
174 | regardless of any BOM or later declaration (such as in a meta | ||
175 | element) | ||
176 | |||
177 | """ | ||
178 | |||
179 | if not _utils.supports_lone_surrogates: | ||
180 | # Such platforms will have already checked for such | ||
181 | # surrogate errors, so no need to do this checking. | ||
182 | self.reportCharacterErrors = None | ||
183 | elif len("\U0010FFFF") == 1: | ||
184 | self.reportCharacterErrors = self.characterErrorsUCS4 | ||
185 | else: | ||
186 | self.reportCharacterErrors = self.characterErrorsUCS2 | ||
187 | |||
188 | # List of where new lines occur | ||
189 | self.newLines = [0] | ||
190 | |||
191 | self.charEncoding = (lookupEncoding("utf-8"), "certain") | ||
192 | self.dataStream = self.openStream(source) | ||
193 | |||
194 | self.reset() | ||
195 | |||
196 | def reset(self): | ||
197 | self.chunk = "" | ||
198 | self.chunkSize = 0 | ||
199 | self.chunkOffset = 0 | ||
200 | self.errors = [] | ||
201 | |||
202 | # number of (complete) lines in previous chunks | ||
203 | self.prevNumLines = 0 | ||
204 | # number of columns in the last line of the previous chunk | ||
205 | self.prevNumCols = 0 | ||
206 | |||
207 | # Deal with CR LF and surrogates split over chunk boundaries | ||
208 | self._bufferedCharacter = None | ||
209 | |||
210 | def openStream(self, source): | ||
211 | """Produces a file object from source. | ||
212 | |||
213 | source can be either a file object, local filename or a string. | ||
214 | |||
215 | """ | ||
216 | # Already a file object | ||
217 | if hasattr(source, 'read'): | ||
218 | stream = source | ||
219 | else: | ||
220 | stream = StringIO(source) | ||
221 | |||
222 | return stream | ||
223 | |||
224 | def _position(self, offset): | ||
225 | chunk = self.chunk | ||
226 | nLines = chunk.count('\n', 0, offset) | ||
227 | positionLine = self.prevNumLines + nLines | ||
228 | lastLinePos = chunk.rfind('\n', 0, offset) | ||
229 | if lastLinePos == -1: | ||
230 | positionColumn = self.prevNumCols + offset | ||
231 | else: | ||
232 | positionColumn = offset - (lastLinePos + 1) | ||
233 | return (positionLine, positionColumn) | ||
234 | |||
235 | def position(self): | ||
236 | """Returns (line, col) of the current position in the stream.""" | ||
237 | line, col = self._position(self.chunkOffset) | ||
238 | return (line + 1, col) | ||
239 | |||
240 | def char(self): | ||
241 | """ Read one character from the stream or queue if available. Return | ||
242 | EOF when EOF is reached. | ||
243 | """ | ||
244 | # Read a new chunk from the input stream if necessary | ||
245 | if self.chunkOffset >= self.chunkSize: | ||
246 | if not self.readChunk(): | ||
247 | return EOF | ||
248 | |||
249 | chunkOffset = self.chunkOffset | ||
250 | char = self.chunk[chunkOffset] | ||
251 | self.chunkOffset = chunkOffset + 1 | ||
252 | |||
253 | return char | ||
254 | |||
255 | def readChunk(self, chunkSize=None): | ||
256 | if chunkSize is None: | ||
257 | chunkSize = self._defaultChunkSize | ||
258 | |||
259 | self.prevNumLines, self.prevNumCols = self._position(self.chunkSize) | ||
260 | |||
261 | self.chunk = "" | ||
262 | self.chunkSize = 0 | ||
263 | self.chunkOffset = 0 | ||
264 | |||
265 | data = self.dataStream.read(chunkSize) | ||
266 | |||
267 | # Deal with CR LF and surrogates broken across chunks | ||
268 | if self._bufferedCharacter: | ||
269 | data = self._bufferedCharacter + data | ||
270 | self._bufferedCharacter = None | ||
271 | elif not data: | ||
272 | # We have no more data, bye-bye stream | ||
273 | return False | ||
274 | |||
275 | if len(data) > 1: | ||
276 | lastv = ord(data[-1]) | ||
277 | if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF: | ||
278 | self._bufferedCharacter = data[-1] | ||
279 | data = data[:-1] | ||
280 | |||
281 | if self.reportCharacterErrors: | ||
282 | self.reportCharacterErrors(data) | ||
283 | |||
284 | # Replace invalid characters | ||
285 | data = data.replace("\r\n", "\n") | ||
286 | data = data.replace("\r", "\n") | ||
287 | |||
288 | self.chunk = data | ||
289 | self.chunkSize = len(data) | ||
290 | |||
291 | return True | ||
292 | |||
293 | def characterErrorsUCS4(self, data): | ||
294 | for _ in range(len(invalid_unicode_re.findall(data))): | ||
295 | self.errors.append("invalid-codepoint") | ||
296 | |||
297 | def characterErrorsUCS2(self, data): | ||
298 | # Someone picked the wrong compile option | ||
299 | # You lose | ||
300 | skip = False | ||
301 | for match in invalid_unicode_re.finditer(data): | ||
302 | if skip: | ||
303 | continue | ||
304 | codepoint = ord(match.group()) | ||
305 | pos = match.start() | ||
306 | # Pretty sure there should be endianness issues here | ||
307 | if _utils.isSurrogatePair(data[pos:pos + 2]): | ||
308 | # We have a surrogate pair! | ||
309 | char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2]) | ||
310 | if char_val in non_bmp_invalid_codepoints: | ||
311 | self.errors.append("invalid-codepoint") | ||
312 | skip = True | ||
313 | elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and | ||
314 | pos == len(data) - 1): | ||
315 | self.errors.append("invalid-codepoint") | ||
316 | else: | ||
317 | skip = False | ||
318 | self.errors.append("invalid-codepoint") | ||
319 | |||
320 | def charsUntil(self, characters, opposite=False): | ||
321 | """ Returns a string of characters from the stream up to but not | ||
322 | including any character in 'characters' or EOF. 'characters' must be | ||
323 | a container that supports the 'in' method and iteration over its | ||
324 | characters. | ||
325 | """ | ||
326 | |||
327 | # Use a cache of regexps to find the required characters | ||
328 | try: | ||
329 | chars = charsUntilRegEx[(characters, opposite)] | ||
330 | except KeyError: | ||
331 | if __debug__: | ||
332 | for c in characters: | ||
333 | assert(ord(c) < 128) | ||
334 | regex = "".join(["\\x%02x" % ord(c) for c in characters]) | ||
335 | if not opposite: | ||
336 | regex = "^%s" % regex | ||
337 | chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex) | ||
338 | |||
339 | rv = [] | ||
340 | |||
341 | while True: | ||
342 | # Find the longest matching prefix | ||
343 | m = chars.match(self.chunk, self.chunkOffset) | ||
344 | if m is None: | ||
345 | # If nothing matched, and it wasn't because we ran out of chunk, | ||
346 | # then stop | ||
347 | if self.chunkOffset != self.chunkSize: | ||
348 | break | ||
349 | else: | ||
350 | end = m.end() | ||
351 | # If not the whole chunk matched, return everything | ||
352 | # up to the part that didn't match | ||
353 | if end != self.chunkSize: | ||
354 | rv.append(self.chunk[self.chunkOffset:end]) | ||
355 | self.chunkOffset = end | ||
356 | break | ||
357 | # If the whole remainder of the chunk matched, | ||
358 | # use it all and read the next chunk | ||
359 | rv.append(self.chunk[self.chunkOffset:]) | ||
360 | if not self.readChunk(): | ||
361 | # Reached EOF | ||
362 | break | ||
363 | |||
364 | r = "".join(rv) | ||
365 | return r | ||
366 | |||
367 | def unget(self, char): | ||
368 | # Only one character is allowed to be ungotten at once - it must | ||
369 | # be consumed again before any further call to unget | ||
370 | if char is not None: | ||
371 | if self.chunkOffset == 0: | ||
372 | # unget is called quite rarely, so it's a good idea to do | ||
373 | # more work here if it saves a bit of work in the frequently | ||
374 | # called char and charsUntil. | ||
375 | # So, just prepend the ungotten character onto the current | ||
376 | # chunk: | ||
377 | self.chunk = char + self.chunk | ||
378 | self.chunkSize += 1 | ||
379 | else: | ||
380 | self.chunkOffset -= 1 | ||
381 | assert self.chunk[self.chunkOffset] == char | ||
382 | |||
383 | |||
384 | class HTMLBinaryInputStream(HTMLUnicodeInputStream): | ||
385 | """Provides a unicode stream of characters to the HTMLTokenizer. | ||
386 | |||
387 | This class takes care of character encoding and removing or replacing | ||
388 | incorrect byte-sequences and also provides column and line tracking. | ||
389 | |||
390 | """ | ||
391 | |||
392 | def __init__(self, source, override_encoding=None, transport_encoding=None, | ||
393 | same_origin_parent_encoding=None, likely_encoding=None, | ||
394 | default_encoding="windows-1252", useChardet=True): | ||
395 | """Initialises the HTMLInputStream. | ||
396 | |||
397 | HTMLInputStream(source, [encoding]) -> Normalized stream from source | ||
398 | for use by html5lib. | ||
399 | |||
400 | source can be either a file-object, local filename or a string. | ||
401 | |||
402 | The optional encoding parameter must be a string that indicates | ||
403 | the encoding. If specified, that encoding will be used, | ||
404 | regardless of any BOM or later declaration (such as in a meta | ||
405 | element) | ||
406 | |||
407 | """ | ||
408 | # Raw Stream - for unicode objects this will encode to utf-8 and set | ||
409 | # self.charEncoding as appropriate | ||
410 | self.rawStream = self.openStream(source) | ||
411 | |||
412 | HTMLUnicodeInputStream.__init__(self, self.rawStream) | ||
413 | |||
414 | # Encoding Information | ||
415 | # Number of bytes to use when looking for a meta element with | ||
416 | # encoding information | ||
417 | self.numBytesMeta = 1024 | ||
418 | # Number of bytes to use when using detecting encoding using chardet | ||
419 | self.numBytesChardet = 100 | ||
420 | # Things from args | ||
421 | self.override_encoding = override_encoding | ||
422 | self.transport_encoding = transport_encoding | ||
423 | self.same_origin_parent_encoding = same_origin_parent_encoding | ||
424 | self.likely_encoding = likely_encoding | ||
425 | self.default_encoding = default_encoding | ||
426 | |||
427 | # Determine encoding | ||
428 | self.charEncoding = self.determineEncoding(useChardet) | ||
429 | assert self.charEncoding[0] is not None | ||
430 | |||
431 | # Call superclass | ||
432 | self.reset() | ||
433 | |||
434 | def reset(self): | ||
435 | self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace') | ||
436 | HTMLUnicodeInputStream.reset(self) | ||
437 | |||
438 | def openStream(self, source): | ||
439 | """Produces a file object from source. | ||
440 | |||
441 | source can be either a file object, local filename or a string. | ||
442 | |||
443 | """ | ||
444 | # Already a file object | ||
445 | if hasattr(source, 'read'): | ||
446 | stream = source | ||
447 | else: | ||
448 | stream = BytesIO(source) | ||
449 | |||
450 | try: | ||
451 | stream.seek(stream.tell()) | ||
452 | except: # pylint:disable=bare-except | ||
453 | stream = BufferedStream(stream) | ||
454 | |||
455 | return stream | ||
456 | |||
457 | def determineEncoding(self, chardet=True): | ||
458 | # BOMs take precedence over everything | ||
459 | # This will also read past the BOM if present | ||
460 | charEncoding = self.detectBOM(), "certain" | ||
461 | if charEncoding[0] is not None: | ||
462 | return charEncoding | ||
463 | |||
464 | # If we've been overriden, we've been overriden | ||
465 | charEncoding = lookupEncoding(self.override_encoding), "certain" | ||
466 | if charEncoding[0] is not None: | ||
467 | return charEncoding | ||
468 | |||
469 | # Now check the transport layer | ||
470 | charEncoding = lookupEncoding(self.transport_encoding), "certain" | ||
471 | if charEncoding[0] is not None: | ||
472 | return charEncoding | ||
473 | |||
474 | # Look for meta elements with encoding information | ||
475 | charEncoding = self.detectEncodingMeta(), "tentative" | ||
476 | if charEncoding[0] is not None: | ||
477 | return charEncoding | ||
478 | |||
479 | # Parent document encoding | ||
480 | charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative" | ||
481 | if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"): | ||
482 | return charEncoding | ||
483 | |||
484 | # "likely" encoding | ||
485 | charEncoding = lookupEncoding(self.likely_encoding), "tentative" | ||
486 | if charEncoding[0] is not None: | ||
487 | return charEncoding | ||
488 | |||
489 | # Guess with chardet, if available | ||
490 | if chardet: | ||
491 | try: | ||
492 | from pip._vendor.chardet.universaldetector import UniversalDetector | ||
493 | except ImportError: | ||
494 | pass | ||
495 | else: | ||
496 | buffers = [] | ||
497 | detector = UniversalDetector() | ||
498 | while not detector.done: | ||
499 | buffer = self.rawStream.read(self.numBytesChardet) | ||
500 | assert isinstance(buffer, bytes) | ||
501 | if not buffer: | ||
502 | break | ||
503 | buffers.append(buffer) | ||
504 | detector.feed(buffer) | ||
505 | detector.close() | ||
506 | encoding = lookupEncoding(detector.result['encoding']) | ||
507 | self.rawStream.seek(0) | ||
508 | if encoding is not None: | ||
509 | return encoding, "tentative" | ||
510 | |||
511 | # Try the default encoding | ||
512 | charEncoding = lookupEncoding(self.default_encoding), "tentative" | ||
513 | if charEncoding[0] is not None: | ||
514 | return charEncoding | ||
515 | |||
516 | # Fallback to html5lib's default if even that hasn't worked | ||
517 | return lookupEncoding("windows-1252"), "tentative" | ||
518 | |||
519 | def changeEncoding(self, newEncoding): | ||
520 | assert self.charEncoding[1] != "certain" | ||
521 | newEncoding = lookupEncoding(newEncoding) | ||
522 | if newEncoding is None: | ||
523 | return | ||
524 | if newEncoding.name in ("utf-16be", "utf-16le"): | ||
525 | newEncoding = lookupEncoding("utf-8") | ||
526 | assert newEncoding is not None | ||
527 | elif newEncoding == self.charEncoding[0]: | ||
528 | self.charEncoding = (self.charEncoding[0], "certain") | ||
529 | else: | ||
530 | self.rawStream.seek(0) | ||
531 | self.charEncoding = (newEncoding, "certain") | ||
532 | self.reset() | ||
533 | raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding)) | ||
534 | |||
535 | def detectBOM(self): | ||
536 | """Attempts to detect at BOM at the start of the stream. If | ||
537 | an encoding can be determined from the BOM return the name of the | ||
538 | encoding otherwise return None""" | ||
539 | bomDict = { | ||
540 | codecs.BOM_UTF8: 'utf-8', | ||
541 | codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be', | ||
542 | codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be' | ||
543 | } | ||
544 | |||
545 | # Go to beginning of file and read in 4 bytes | ||
546 | string = self.rawStream.read(4) | ||
547 | assert isinstance(string, bytes) | ||
548 | |||
549 | # Try detecting the BOM using bytes from the string | ||
550 | encoding = bomDict.get(string[:3]) # UTF-8 | ||
551 | seek = 3 | ||
552 | if not encoding: | ||
553 | # Need to detect UTF-32 before UTF-16 | ||
554 | encoding = bomDict.get(string) # UTF-32 | ||
555 | seek = 4 | ||
556 | if not encoding: | ||
557 | encoding = bomDict.get(string[:2]) # UTF-16 | ||
558 | seek = 2 | ||
559 | |||
560 | # Set the read position past the BOM if one was found, otherwise | ||
561 | # set it to the start of the stream | ||
562 | if encoding: | ||
563 | self.rawStream.seek(seek) | ||
564 | return lookupEncoding(encoding) | ||
565 | else: | ||
566 | self.rawStream.seek(0) | ||
567 | return None | ||
568 | |||
569 | def detectEncodingMeta(self): | ||
570 | """Report the encoding declared by the meta element | ||
571 | """ | ||
572 | buffer = self.rawStream.read(self.numBytesMeta) | ||
573 | assert isinstance(buffer, bytes) | ||
574 | parser = EncodingParser(buffer) | ||
575 | self.rawStream.seek(0) | ||
576 | encoding = parser.getEncoding() | ||
577 | |||
578 | if encoding is not None and encoding.name in ("utf-16be", "utf-16le"): | ||
579 | encoding = lookupEncoding("utf-8") | ||
580 | |||
581 | return encoding | ||
582 | |||
583 | |||
584 | class EncodingBytes(bytes): | ||
585 | """String-like object with an associated position and various extra methods | ||
586 | If the position is ever greater than the string length then an exception is | ||
587 | raised""" | ||
588 | def __new__(self, value): | ||
589 | assert isinstance(value, bytes) | ||
590 | return bytes.__new__(self, value.lower()) | ||
591 | |||
592 | def __init__(self, value): | ||
593 | # pylint:disable=unused-argument | ||
594 | self._position = -1 | ||
595 | |||
596 | def __iter__(self): | ||
597 | return self | ||
598 | |||
599 | def __next__(self): | ||
600 | p = self._position = self._position + 1 | ||
601 | if p >= len(self): | ||
602 | raise StopIteration | ||
603 | elif p < 0: | ||
604 | raise TypeError | ||
605 | return self[p:p + 1] | ||
606 | |||
607 | def next(self): | ||
608 | # Py2 compat | ||
609 | return self.__next__() | ||
610 | |||
611 | def previous(self): | ||
612 | p = self._position | ||
613 | if p >= len(self): | ||
614 | raise StopIteration | ||
615 | elif p < 0: | ||
616 | raise TypeError | ||
617 | self._position = p = p - 1 | ||
618 | return self[p:p + 1] | ||
619 | |||
620 | def setPosition(self, position): | ||
621 | if self._position >= len(self): | ||
622 | raise StopIteration | ||
623 | self._position = position | ||
624 | |||
625 | def getPosition(self): | ||
626 | if self._position >= len(self): | ||
627 | raise StopIteration | ||
628 | if self._position >= 0: | ||
629 | return self._position | ||
630 | else: | ||
631 | return None | ||
632 | |||
633 | position = property(getPosition, setPosition) | ||
634 | |||
635 | def getCurrentByte(self): | ||
636 | return self[self.position:self.position + 1] | ||
637 | |||
638 | currentByte = property(getCurrentByte) | ||
639 | |||
640 | def skip(self, chars=spaceCharactersBytes): | ||
641 | """Skip past a list of characters""" | ||
642 | p = self.position # use property for the error-checking | ||
643 | while p < len(self): | ||
644 | c = self[p:p + 1] | ||
645 | if c not in chars: | ||
646 | self._position = p | ||
647 | return c | ||
648 | p += 1 | ||
649 | self._position = p | ||
650 | return None | ||
651 | |||
652 | def skipUntil(self, chars): | ||
653 | p = self.position | ||
654 | while p < len(self): | ||
655 | c = self[p:p + 1] | ||
656 | if c in chars: | ||
657 | self._position = p | ||
658 | return c | ||
659 | p += 1 | ||
660 | self._position = p | ||
661 | return None | ||
662 | |||
663 | def matchBytes(self, bytes): | ||
664 | """Look for a sequence of bytes at the start of a string. If the bytes | ||
665 | are found return True and advance the position to the byte after the | ||
666 | match. Otherwise return False and leave the position alone""" | ||
667 | p = self.position | ||
668 | data = self[p:p + len(bytes)] | ||
669 | rv = data.startswith(bytes) | ||
670 | if rv: | ||
671 | self.position += len(bytes) | ||
672 | return rv | ||
673 | |||
674 | def jumpTo(self, bytes): | ||
675 | """Look for the next sequence of bytes matching a given sequence. If | ||
676 | a match is found advance the position to the last byte of the match""" | ||
677 | newPosition = self[self.position:].find(bytes) | ||
678 | if newPosition > -1: | ||
679 | # XXX: This is ugly, but I can't see a nicer way to fix this. | ||
680 | if self._position == -1: | ||
681 | self._position = 0 | ||
682 | self._position += (newPosition + len(bytes) - 1) | ||
683 | return True | ||
684 | else: | ||
685 | raise StopIteration | ||
686 | |||
687 | |||
688 | class EncodingParser(object): | ||
689 | """Mini parser for detecting character encoding from meta elements""" | ||
690 | |||
691 | def __init__(self, data): | ||
692 | """string - the data to work on for encoding detection""" | ||
693 | self.data = EncodingBytes(data) | ||
694 | self.encoding = None | ||
695 | |||
696 | def getEncoding(self): | ||
697 | methodDispatch = ( | ||
698 | (b"<!--", self.handleComment), | ||
699 | (b"<meta", self.handleMeta), | ||
700 | (b"</", self.handlePossibleEndTag), | ||
701 | (b"<!", self.handleOther), | ||
702 | (b"<?", self.handleOther), | ||
703 | (b"<", self.handlePossibleStartTag)) | ||
704 | for _ in self.data: | ||
705 | keepParsing = True | ||
706 | for key, method in methodDispatch: | ||
707 | if self.data.matchBytes(key): | ||
708 | try: | ||
709 | keepParsing = method() | ||
710 | break | ||
711 | except StopIteration: | ||
712 | keepParsing = False | ||
713 | break | ||
714 | if not keepParsing: | ||
715 | break | ||
716 | |||
717 | return self.encoding | ||
718 | |||
719 | def handleComment(self): | ||
720 | """Skip over comments""" | ||
721 | return self.data.jumpTo(b"-->") | ||
722 | |||
723 | def handleMeta(self): | ||
724 | if self.data.currentByte not in spaceCharactersBytes: | ||
725 | # if we have <meta not followed by a space so just keep going | ||
726 | return True | ||
727 | # We have a valid meta element we want to search for attributes | ||
728 | hasPragma = False | ||
729 | pendingEncoding = None | ||
730 | while True: | ||
731 | # Try to find the next attribute after the current position | ||
732 | attr = self.getAttribute() | ||
733 | if attr is None: | ||
734 | return True | ||
735 | else: | ||
736 | if attr[0] == b"http-equiv": | ||
737 | hasPragma = attr[1] == b"content-type" | ||
738 | if hasPragma and pendingEncoding is not None: | ||
739 | self.encoding = pendingEncoding | ||
740 | return False | ||
741 | elif attr[0] == b"charset": | ||
742 | tentativeEncoding = attr[1] | ||
743 | codec = lookupEncoding(tentativeEncoding) | ||
744 | if codec is not None: | ||
745 | self.encoding = codec | ||
746 | return False | ||
747 | elif attr[0] == b"content": | ||
748 | contentParser = ContentAttrParser(EncodingBytes(attr[1])) | ||
749 | tentativeEncoding = contentParser.parse() | ||
750 | if tentativeEncoding is not None: | ||
751 | codec = lookupEncoding(tentativeEncoding) | ||
752 | if codec is not None: | ||
753 | if hasPragma: | ||
754 | self.encoding = codec | ||
755 | return False | ||
756 | else: | ||
757 | pendingEncoding = codec | ||
758 | |||
759 | def handlePossibleStartTag(self): | ||
760 | return self.handlePossibleTag(False) | ||
761 | |||
762 | def handlePossibleEndTag(self): | ||
763 | next(self.data) | ||
764 | return self.handlePossibleTag(True) | ||
765 | |||
766 | def handlePossibleTag(self, endTag): | ||
767 | data = self.data | ||
768 | if data.currentByte not in asciiLettersBytes: | ||
769 | # If the next byte is not an ascii letter either ignore this | ||
770 | # fragment (possible start tag case) or treat it according to | ||
771 | # handleOther | ||
772 | if endTag: | ||
773 | data.previous() | ||
774 | self.handleOther() | ||
775 | return True | ||
776 | |||
777 | c = data.skipUntil(spacesAngleBrackets) | ||
778 | if c == b"<": | ||
779 | # return to the first step in the overall "two step" algorithm | ||
780 | # reprocessing the < byte | ||
781 | data.previous() | ||
782 | else: | ||
783 | # Read all attributes | ||
784 | attr = self.getAttribute() | ||
785 | while attr is not None: | ||
786 | attr = self.getAttribute() | ||
787 | return True | ||
788 | |||
789 | def handleOther(self): | ||
790 | return self.data.jumpTo(b">") | ||
791 | |||
792 | def getAttribute(self): | ||
793 | """Return a name,value pair for the next attribute in the stream, | ||
794 | if one is found, or None""" | ||
795 | data = self.data | ||
796 | # Step 1 (skip chars) | ||
797 | c = data.skip(spaceCharactersBytes | frozenset([b"/"])) | ||
798 | assert c is None or len(c) == 1 | ||
799 | # Step 2 | ||
800 | if c in (b">", None): | ||
801 | return None | ||
802 | # Step 3 | ||
803 | attrName = [] | ||
804 | attrValue = [] | ||
805 | # Step 4 attribute name | ||
806 | while True: | ||
807 | if c == b"=" and attrName: | ||
808 | break | ||
809 | elif c in spaceCharactersBytes: | ||
810 | # Step 6! | ||
811 | c = data.skip() | ||
812 | break | ||
813 | elif c in (b"/", b">"): | ||
814 | return b"".join(attrName), b"" | ||
815 | elif c in asciiUppercaseBytes: | ||
816 | attrName.append(c.lower()) | ||
817 | elif c is None: | ||
818 | return None | ||
819 | else: | ||
820 | attrName.append(c) | ||
821 | # Step 5 | ||
822 | c = next(data) | ||
823 | # Step 7 | ||
824 | if c != b"=": | ||
825 | data.previous() | ||
826 | return b"".join(attrName), b"" | ||
827 | # Step 8 | ||
828 | next(data) | ||
829 | # Step 9 | ||
830 | c = data.skip() | ||
831 | # Step 10 | ||
832 | if c in (b"'", b'"'): | ||
833 | # 10.1 | ||
834 | quoteChar = c | ||
835 | while True: | ||
836 | # 10.2 | ||
837 | c = next(data) | ||
838 | # 10.3 | ||
839 | if c == quoteChar: | ||
840 | next(data) | ||
841 | return b"".join(attrName), b"".join(attrValue) | ||
842 | # 10.4 | ||
843 | elif c in asciiUppercaseBytes: | ||
844 | attrValue.append(c.lower()) | ||
845 | # 10.5 | ||
846 | else: | ||
847 | attrValue.append(c) | ||
848 | elif c == b">": | ||
849 | return b"".join(attrName), b"" | ||
850 | elif c in asciiUppercaseBytes: | ||
851 | attrValue.append(c.lower()) | ||
852 | elif c is None: | ||
853 | return None | ||
854 | else: | ||
855 | attrValue.append(c) | ||
856 | # Step 11 | ||
857 | while True: | ||
858 | c = next(data) | ||
859 | if c in spacesAngleBrackets: | ||
860 | return b"".join(attrName), b"".join(attrValue) | ||
861 | elif c in asciiUppercaseBytes: | ||
862 | attrValue.append(c.lower()) | ||
863 | elif c is None: | ||
864 | return None | ||
865 | else: | ||
866 | attrValue.append(c) | ||
867 | |||
868 | |||
869 | class ContentAttrParser(object): | ||
870 | def __init__(self, data): | ||
871 | assert isinstance(data, bytes) | ||
872 | self.data = data | ||
873 | |||
874 | def parse(self): | ||
875 | try: | ||
876 | # Check if the attr name is charset | ||
877 | # otherwise return | ||
878 | self.data.jumpTo(b"charset") | ||
879 | self.data.position += 1 | ||
880 | self.data.skip() | ||
881 | if not self.data.currentByte == b"=": | ||
882 | # If there is no = sign keep looking for attrs | ||
883 | return None | ||
884 | self.data.position += 1 | ||
885 | self.data.skip() | ||
886 | # Look for an encoding between matching quote marks | ||
887 | if self.data.currentByte in (b'"', b"'"): | ||
888 | quoteMark = self.data.currentByte | ||
889 | self.data.position += 1 | ||
890 | oldPosition = self.data.position | ||
891 | if self.data.jumpTo(quoteMark): | ||
892 | return self.data[oldPosition:self.data.position] | ||
893 | else: | ||
894 | return None | ||
895 | else: | ||
896 | # Unquoted value | ||
897 | oldPosition = self.data.position | ||
898 | try: | ||
899 | self.data.skipUntil(spaceCharactersBytes) | ||
900 | return self.data[oldPosition:self.data.position] | ||
901 | except StopIteration: | ||
902 | # Return the whole remaining value | ||
903 | return self.data[oldPosition:] | ||
904 | except StopIteration: | ||
905 | return None | ||
906 | |||
907 | |||
908 | def lookupEncoding(encoding): | ||
909 | """Return the python codec name corresponding to an encoding or None if the | ||
910 | string doesn't correspond to a valid encoding.""" | ||
911 | if isinstance(encoding, binary_type): | ||
912 | try: | ||
913 | encoding = encoding.decode("ascii") | ||
914 | except UnicodeDecodeError: | ||
915 | return None | ||
916 | |||
917 | if encoding is not None: | ||
918 | try: | ||
919 | return webencodings.lookup(encoding) | ||
920 | except AttributeError: | ||
921 | return None | ||
922 | else: | ||
923 | return None | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_tokenizer.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_tokenizer.py new file mode 100644 index 0000000..ef1ccf8 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_tokenizer.py | |||
@@ -0,0 +1,1721 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from pip._vendor.six import unichr as chr | ||
4 | |||
5 | from collections import deque | ||
6 | |||
7 | from .constants import spaceCharacters | ||
8 | from .constants import entities | ||
9 | from .constants import asciiLetters, asciiUpper2Lower | ||
10 | from .constants import digits, hexDigits, EOF | ||
11 | from .constants import tokenTypes, tagTokenTypes | ||
12 | from .constants import replacementCharacters | ||
13 | |||
14 | from ._inputstream import HTMLInputStream | ||
15 | |||
16 | from ._trie import Trie | ||
17 | |||
18 | entitiesTrie = Trie(entities) | ||
19 | |||
20 | |||
21 | class HTMLTokenizer(object): | ||
22 | """ This class takes care of tokenizing HTML. | ||
23 | |||
24 | * self.currentToken | ||
25 | Holds the token that is currently being processed. | ||
26 | |||
27 | * self.state | ||
28 | Holds a reference to the method to be invoked... XXX | ||
29 | |||
30 | * self.stream | ||
31 | Points to HTMLInputStream object. | ||
32 | """ | ||
33 | |||
34 | def __init__(self, stream, parser=None, **kwargs): | ||
35 | |||
36 | self.stream = HTMLInputStream(stream, **kwargs) | ||
37 | self.parser = parser | ||
38 | |||
39 | # Setup the initial tokenizer state | ||
40 | self.escapeFlag = False | ||
41 | self.lastFourChars = [] | ||
42 | self.state = self.dataState | ||
43 | self.escape = False | ||
44 | |||
45 | # The current token being created | ||
46 | self.currentToken = None | ||
47 | super(HTMLTokenizer, self).__init__() | ||
48 | |||
49 | def __iter__(self): | ||
50 | """ This is where the magic happens. | ||
51 | |||
52 | We do our usually processing through the states and when we have a token | ||
53 | to return we yield the token which pauses processing until the next token | ||
54 | is requested. | ||
55 | """ | ||
56 | self.tokenQueue = deque([]) | ||
57 | # Start processing. When EOF is reached self.state will return False | ||
58 | # instead of True and the loop will terminate. | ||
59 | while self.state(): | ||
60 | while self.stream.errors: | ||
61 | yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)} | ||
62 | while self.tokenQueue: | ||
63 | yield self.tokenQueue.popleft() | ||
64 | |||
65 | def consumeNumberEntity(self, isHex): | ||
66 | """This function returns either U+FFFD or the character based on the | ||
67 | decimal or hexadecimal representation. It also discards ";" if present. | ||
68 | If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked. | ||
69 | """ | ||
70 | |||
71 | allowed = digits | ||
72 | radix = 10 | ||
73 | if isHex: | ||
74 | allowed = hexDigits | ||
75 | radix = 16 | ||
76 | |||
77 | charStack = [] | ||
78 | |||
79 | # Consume all the characters that are in range while making sure we | ||
80 | # don't hit an EOF. | ||
81 | c = self.stream.char() | ||
82 | while c in allowed and c is not EOF: | ||
83 | charStack.append(c) | ||
84 | c = self.stream.char() | ||
85 | |||
86 | # Convert the set of characters consumed to an int. | ||
87 | charAsInt = int("".join(charStack), radix) | ||
88 | |||
89 | # Certain characters get replaced with others | ||
90 | if charAsInt in replacementCharacters: | ||
91 | char = replacementCharacters[charAsInt] | ||
92 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
93 | "illegal-codepoint-for-numeric-entity", | ||
94 | "datavars": {"charAsInt": charAsInt}}) | ||
95 | elif ((0xD800 <= charAsInt <= 0xDFFF) or | ||
96 | (charAsInt > 0x10FFFF)): | ||
97 | char = "\uFFFD" | ||
98 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
99 | "illegal-codepoint-for-numeric-entity", | ||
100 | "datavars": {"charAsInt": charAsInt}}) | ||
101 | else: | ||
102 | # Should speed up this check somehow (e.g. move the set to a constant) | ||
103 | if ((0x0001 <= charAsInt <= 0x0008) or | ||
104 | (0x000E <= charAsInt <= 0x001F) or | ||
105 | (0x007F <= charAsInt <= 0x009F) or | ||
106 | (0xFDD0 <= charAsInt <= 0xFDEF) or | ||
107 | charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, | ||
108 | 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, | ||
109 | 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, | ||
110 | 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, | ||
111 | 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, | ||
112 | 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, | ||
113 | 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, | ||
114 | 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, | ||
115 | 0xFFFFF, 0x10FFFE, 0x10FFFF])): | ||
116 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
117 | "data": | ||
118 | "illegal-codepoint-for-numeric-entity", | ||
119 | "datavars": {"charAsInt": charAsInt}}) | ||
120 | try: | ||
121 | # Try/except needed as UCS-2 Python builds' unichar only works | ||
122 | # within the BMP. | ||
123 | char = chr(charAsInt) | ||
124 | except ValueError: | ||
125 | v = charAsInt - 0x10000 | ||
126 | char = chr(0xD800 | (v >> 10)) + chr(0xDC00 | (v & 0x3FF)) | ||
127 | |||
128 | # Discard the ; if present. Otherwise, put it back on the queue and | ||
129 | # invoke parseError on parser. | ||
130 | if c != ";": | ||
131 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
132 | "numeric-entity-without-semicolon"}) | ||
133 | self.stream.unget(c) | ||
134 | |||
135 | return char | ||
136 | |||
137 | def consumeEntity(self, allowedChar=None, fromAttribute=False): | ||
138 | # Initialise to the default output for when no entity is matched | ||
139 | output = "&" | ||
140 | |||
141 | charStack = [self.stream.char()] | ||
142 | if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or | ||
143 | (allowedChar is not None and allowedChar == charStack[0])): | ||
144 | self.stream.unget(charStack[0]) | ||
145 | |||
146 | elif charStack[0] == "#": | ||
147 | # Read the next character to see if it's hex or decimal | ||
148 | hex = False | ||
149 | charStack.append(self.stream.char()) | ||
150 | if charStack[-1] in ("x", "X"): | ||
151 | hex = True | ||
152 | charStack.append(self.stream.char()) | ||
153 | |||
154 | # charStack[-1] should be the first digit | ||
155 | if (hex and charStack[-1] in hexDigits) \ | ||
156 | or (not hex and charStack[-1] in digits): | ||
157 | # At least one digit found, so consume the whole number | ||
158 | self.stream.unget(charStack[-1]) | ||
159 | output = self.consumeNumberEntity(hex) | ||
160 | else: | ||
161 | # No digits found | ||
162 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
163 | "data": "expected-numeric-entity"}) | ||
164 | self.stream.unget(charStack.pop()) | ||
165 | output = "&" + "".join(charStack) | ||
166 | |||
167 | else: | ||
168 | # At this point in the process might have named entity. Entities | ||
169 | # are stored in the global variable "entities". | ||
170 | # | ||
171 | # Consume characters and compare to these to a substring of the | ||
172 | # entity names in the list until the substring no longer matches. | ||
173 | while (charStack[-1] is not EOF): | ||
174 | if not entitiesTrie.has_keys_with_prefix("".join(charStack)): | ||
175 | break | ||
176 | charStack.append(self.stream.char()) | ||
177 | |||
178 | # At this point we have a string that starts with some characters | ||
179 | # that may match an entity | ||
180 | # Try to find the longest entity the string will match to take care | ||
181 | # of ¬i for instance. | ||
182 | try: | ||
183 | entityName = entitiesTrie.longest_prefix("".join(charStack[:-1])) | ||
184 | entityLength = len(entityName) | ||
185 | except KeyError: | ||
186 | entityName = None | ||
187 | |||
188 | if entityName is not None: | ||
189 | if entityName[-1] != ";": | ||
190 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
191 | "named-entity-without-semicolon"}) | ||
192 | if (entityName[-1] != ";" and fromAttribute and | ||
193 | (charStack[entityLength] in asciiLetters or | ||
194 | charStack[entityLength] in digits or | ||
195 | charStack[entityLength] == "=")): | ||
196 | self.stream.unget(charStack.pop()) | ||
197 | output = "&" + "".join(charStack) | ||
198 | else: | ||
199 | output = entities[entityName] | ||
200 | self.stream.unget(charStack.pop()) | ||
201 | output += "".join(charStack[entityLength:]) | ||
202 | else: | ||
203 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
204 | "expected-named-entity"}) | ||
205 | self.stream.unget(charStack.pop()) | ||
206 | output = "&" + "".join(charStack) | ||
207 | |||
208 | if fromAttribute: | ||
209 | self.currentToken["data"][-1][1] += output | ||
210 | else: | ||
211 | if output in spaceCharacters: | ||
212 | tokenType = "SpaceCharacters" | ||
213 | else: | ||
214 | tokenType = "Characters" | ||
215 | self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output}) | ||
216 | |||
217 | def processEntityInAttribute(self, allowedChar): | ||
218 | """This method replaces the need for "entityInAttributeValueState". | ||
219 | """ | ||
220 | self.consumeEntity(allowedChar=allowedChar, fromAttribute=True) | ||
221 | |||
222 | def emitCurrentToken(self): | ||
223 | """This method is a generic handler for emitting the tags. It also sets | ||
224 | the state to "data" because that's what's needed after a token has been | ||
225 | emitted. | ||
226 | """ | ||
227 | token = self.currentToken | ||
228 | # Add token to the queue to be yielded | ||
229 | if (token["type"] in tagTokenTypes): | ||
230 | token["name"] = token["name"].translate(asciiUpper2Lower) | ||
231 | if token["type"] == tokenTypes["EndTag"]: | ||
232 | if token["data"]: | ||
233 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
234 | "data": "attributes-in-end-tag"}) | ||
235 | if token["selfClosing"]: | ||
236 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
237 | "data": "self-closing-flag-on-end-tag"}) | ||
238 | self.tokenQueue.append(token) | ||
239 | self.state = self.dataState | ||
240 | |||
241 | # Below are the various tokenizer states worked out. | ||
242 | def dataState(self): | ||
243 | data = self.stream.char() | ||
244 | if data == "&": | ||
245 | self.state = self.entityDataState | ||
246 | elif data == "<": | ||
247 | self.state = self.tagOpenState | ||
248 | elif data == "\u0000": | ||
249 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
250 | "data": "invalid-codepoint"}) | ||
251 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
252 | "data": "\u0000"}) | ||
253 | elif data is EOF: | ||
254 | # Tokenization ends. | ||
255 | return False | ||
256 | elif data in spaceCharacters: | ||
257 | # Directly after emitting a token you switch back to the "data | ||
258 | # state". At that point spaceCharacters are important so they are | ||
259 | # emitted separately. | ||
260 | self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": | ||
261 | data + self.stream.charsUntil(spaceCharacters, True)}) | ||
262 | # No need to update lastFourChars here, since the first space will | ||
263 | # have already been appended to lastFourChars and will have broken | ||
264 | # any <!-- or --> sequences | ||
265 | else: | ||
266 | chars = self.stream.charsUntil(("&", "<", "\u0000")) | ||
267 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | ||
268 | data + chars}) | ||
269 | return True | ||
270 | |||
271 | def entityDataState(self): | ||
272 | self.consumeEntity() | ||
273 | self.state = self.dataState | ||
274 | return True | ||
275 | |||
276 | def rcdataState(self): | ||
277 | data = self.stream.char() | ||
278 | if data == "&": | ||
279 | self.state = self.characterReferenceInRcdata | ||
280 | elif data == "<": | ||
281 | self.state = self.rcdataLessThanSignState | ||
282 | elif data == EOF: | ||
283 | # Tokenization ends. | ||
284 | return False | ||
285 | elif data == "\u0000": | ||
286 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
287 | "data": "invalid-codepoint"}) | ||
288 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
289 | "data": "\uFFFD"}) | ||
290 | elif data in spaceCharacters: | ||
291 | # Directly after emitting a token you switch back to the "data | ||
292 | # state". At that point spaceCharacters are important so they are | ||
293 | # emitted separately. | ||
294 | self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": | ||
295 | data + self.stream.charsUntil(spaceCharacters, True)}) | ||
296 | # No need to update lastFourChars here, since the first space will | ||
297 | # have already been appended to lastFourChars and will have broken | ||
298 | # any <!-- or --> sequences | ||
299 | else: | ||
300 | chars = self.stream.charsUntil(("&", "<", "\u0000")) | ||
301 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | ||
302 | data + chars}) | ||
303 | return True | ||
304 | |||
305 | def characterReferenceInRcdata(self): | ||
306 | self.consumeEntity() | ||
307 | self.state = self.rcdataState | ||
308 | return True | ||
309 | |||
310 | def rawtextState(self): | ||
311 | data = self.stream.char() | ||
312 | if data == "<": | ||
313 | self.state = self.rawtextLessThanSignState | ||
314 | elif data == "\u0000": | ||
315 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
316 | "data": "invalid-codepoint"}) | ||
317 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
318 | "data": "\uFFFD"}) | ||
319 | elif data == EOF: | ||
320 | # Tokenization ends. | ||
321 | return False | ||
322 | else: | ||
323 | chars = self.stream.charsUntil(("<", "\u0000")) | ||
324 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | ||
325 | data + chars}) | ||
326 | return True | ||
327 | |||
328 | def scriptDataState(self): | ||
329 | data = self.stream.char() | ||
330 | if data == "<": | ||
331 | self.state = self.scriptDataLessThanSignState | ||
332 | elif data == "\u0000": | ||
333 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
334 | "data": "invalid-codepoint"}) | ||
335 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
336 | "data": "\uFFFD"}) | ||
337 | elif data == EOF: | ||
338 | # Tokenization ends. | ||
339 | return False | ||
340 | else: | ||
341 | chars = self.stream.charsUntil(("<", "\u0000")) | ||
342 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | ||
343 | data + chars}) | ||
344 | return True | ||
345 | |||
346 | def plaintextState(self): | ||
347 | data = self.stream.char() | ||
348 | if data == EOF: | ||
349 | # Tokenization ends. | ||
350 | return False | ||
351 | elif data == "\u0000": | ||
352 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
353 | "data": "invalid-codepoint"}) | ||
354 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
355 | "data": "\uFFFD"}) | ||
356 | else: | ||
357 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | ||
358 | data + self.stream.charsUntil("\u0000")}) | ||
359 | return True | ||
360 | |||
361 | def tagOpenState(self): | ||
362 | data = self.stream.char() | ||
363 | if data == "!": | ||
364 | self.state = self.markupDeclarationOpenState | ||
365 | elif data == "/": | ||
366 | self.state = self.closeTagOpenState | ||
367 | elif data in asciiLetters: | ||
368 | self.currentToken = {"type": tokenTypes["StartTag"], | ||
369 | "name": data, "data": [], | ||
370 | "selfClosing": False, | ||
371 | "selfClosingAcknowledged": False} | ||
372 | self.state = self.tagNameState | ||
373 | elif data == ">": | ||
374 | # XXX In theory it could be something besides a tag name. But | ||
375 | # do we really care? | ||
376 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
377 | "expected-tag-name-but-got-right-bracket"}) | ||
378 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"}) | ||
379 | self.state = self.dataState | ||
380 | elif data == "?": | ||
381 | # XXX In theory it could be something besides a tag name. But | ||
382 | # do we really care? | ||
383 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
384 | "expected-tag-name-but-got-question-mark"}) | ||
385 | self.stream.unget(data) | ||
386 | self.state = self.bogusCommentState | ||
387 | else: | ||
388 | # XXX | ||
389 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
390 | "expected-tag-name"}) | ||
391 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | ||
392 | self.stream.unget(data) | ||
393 | self.state = self.dataState | ||
394 | return True | ||
395 | |||
396 | def closeTagOpenState(self): | ||
397 | data = self.stream.char() | ||
398 | if data in asciiLetters: | ||
399 | self.currentToken = {"type": tokenTypes["EndTag"], "name": data, | ||
400 | "data": [], "selfClosing": False} | ||
401 | self.state = self.tagNameState | ||
402 | elif data == ">": | ||
403 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
404 | "expected-closing-tag-but-got-right-bracket"}) | ||
405 | self.state = self.dataState | ||
406 | elif data is EOF: | ||
407 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
408 | "expected-closing-tag-but-got-eof"}) | ||
409 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) | ||
410 | self.state = self.dataState | ||
411 | else: | ||
412 | # XXX data can be _'_... | ||
413 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
414 | "expected-closing-tag-but-got-char", | ||
415 | "datavars": {"data": data}}) | ||
416 | self.stream.unget(data) | ||
417 | self.state = self.bogusCommentState | ||
418 | return True | ||
419 | |||
420 | def tagNameState(self): | ||
421 | data = self.stream.char() | ||
422 | if data in spaceCharacters: | ||
423 | self.state = self.beforeAttributeNameState | ||
424 | elif data == ">": | ||
425 | self.emitCurrentToken() | ||
426 | elif data is EOF: | ||
427 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
428 | "eof-in-tag-name"}) | ||
429 | self.state = self.dataState | ||
430 | elif data == "/": | ||
431 | self.state = self.selfClosingStartTagState | ||
432 | elif data == "\u0000": | ||
433 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
434 | "data": "invalid-codepoint"}) | ||
435 | self.currentToken["name"] += "\uFFFD" | ||
436 | else: | ||
437 | self.currentToken["name"] += data | ||
438 | # (Don't use charsUntil here, because tag names are | ||
439 | # very short and it's faster to not do anything fancy) | ||
440 | return True | ||
441 | |||
442 | def rcdataLessThanSignState(self): | ||
443 | data = self.stream.char() | ||
444 | if data == "/": | ||
445 | self.temporaryBuffer = "" | ||
446 | self.state = self.rcdataEndTagOpenState | ||
447 | else: | ||
448 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | ||
449 | self.stream.unget(data) | ||
450 | self.state = self.rcdataState | ||
451 | return True | ||
452 | |||
453 | def rcdataEndTagOpenState(self): | ||
454 | data = self.stream.char() | ||
455 | if data in asciiLetters: | ||
456 | self.temporaryBuffer += data | ||
457 | self.state = self.rcdataEndTagNameState | ||
458 | else: | ||
459 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) | ||
460 | self.stream.unget(data) | ||
461 | self.state = self.rcdataState | ||
462 | return True | ||
463 | |||
464 | def rcdataEndTagNameState(self): | ||
465 | appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() | ||
466 | data = self.stream.char() | ||
467 | if data in spaceCharacters and appropriate: | ||
468 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
469 | "name": self.temporaryBuffer, | ||
470 | "data": [], "selfClosing": False} | ||
471 | self.state = self.beforeAttributeNameState | ||
472 | elif data == "/" and appropriate: | ||
473 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
474 | "name": self.temporaryBuffer, | ||
475 | "data": [], "selfClosing": False} | ||
476 | self.state = self.selfClosingStartTagState | ||
477 | elif data == ">" and appropriate: | ||
478 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
479 | "name": self.temporaryBuffer, | ||
480 | "data": [], "selfClosing": False} | ||
481 | self.emitCurrentToken() | ||
482 | self.state = self.dataState | ||
483 | elif data in asciiLetters: | ||
484 | self.temporaryBuffer += data | ||
485 | else: | ||
486 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
487 | "data": "</" + self.temporaryBuffer}) | ||
488 | self.stream.unget(data) | ||
489 | self.state = self.rcdataState | ||
490 | return True | ||
491 | |||
492 | def rawtextLessThanSignState(self): | ||
493 | data = self.stream.char() | ||
494 | if data == "/": | ||
495 | self.temporaryBuffer = "" | ||
496 | self.state = self.rawtextEndTagOpenState | ||
497 | else: | ||
498 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | ||
499 | self.stream.unget(data) | ||
500 | self.state = self.rawtextState | ||
501 | return True | ||
502 | |||
503 | def rawtextEndTagOpenState(self): | ||
504 | data = self.stream.char() | ||
505 | if data in asciiLetters: | ||
506 | self.temporaryBuffer += data | ||
507 | self.state = self.rawtextEndTagNameState | ||
508 | else: | ||
509 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) | ||
510 | self.stream.unget(data) | ||
511 | self.state = self.rawtextState | ||
512 | return True | ||
513 | |||
514 | def rawtextEndTagNameState(self): | ||
515 | appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() | ||
516 | data = self.stream.char() | ||
517 | if data in spaceCharacters and appropriate: | ||
518 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
519 | "name": self.temporaryBuffer, | ||
520 | "data": [], "selfClosing": False} | ||
521 | self.state = self.beforeAttributeNameState | ||
522 | elif data == "/" and appropriate: | ||
523 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
524 | "name": self.temporaryBuffer, | ||
525 | "data": [], "selfClosing": False} | ||
526 | self.state = self.selfClosingStartTagState | ||
527 | elif data == ">" and appropriate: | ||
528 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
529 | "name": self.temporaryBuffer, | ||
530 | "data": [], "selfClosing": False} | ||
531 | self.emitCurrentToken() | ||
532 | self.state = self.dataState | ||
533 | elif data in asciiLetters: | ||
534 | self.temporaryBuffer += data | ||
535 | else: | ||
536 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
537 | "data": "</" + self.temporaryBuffer}) | ||
538 | self.stream.unget(data) | ||
539 | self.state = self.rawtextState | ||
540 | return True | ||
541 | |||
542 | def scriptDataLessThanSignState(self): | ||
543 | data = self.stream.char() | ||
544 | if data == "/": | ||
545 | self.temporaryBuffer = "" | ||
546 | self.state = self.scriptDataEndTagOpenState | ||
547 | elif data == "!": | ||
548 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<!"}) | ||
549 | self.state = self.scriptDataEscapeStartState | ||
550 | else: | ||
551 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | ||
552 | self.stream.unget(data) | ||
553 | self.state = self.scriptDataState | ||
554 | return True | ||
555 | |||
556 | def scriptDataEndTagOpenState(self): | ||
557 | data = self.stream.char() | ||
558 | if data in asciiLetters: | ||
559 | self.temporaryBuffer += data | ||
560 | self.state = self.scriptDataEndTagNameState | ||
561 | else: | ||
562 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) | ||
563 | self.stream.unget(data) | ||
564 | self.state = self.scriptDataState | ||
565 | return True | ||
566 | |||
567 | def scriptDataEndTagNameState(self): | ||
568 | appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() | ||
569 | data = self.stream.char() | ||
570 | if data in spaceCharacters and appropriate: | ||
571 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
572 | "name": self.temporaryBuffer, | ||
573 | "data": [], "selfClosing": False} | ||
574 | self.state = self.beforeAttributeNameState | ||
575 | elif data == "/" and appropriate: | ||
576 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
577 | "name": self.temporaryBuffer, | ||
578 | "data": [], "selfClosing": False} | ||
579 | self.state = self.selfClosingStartTagState | ||
580 | elif data == ">" and appropriate: | ||
581 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
582 | "name": self.temporaryBuffer, | ||
583 | "data": [], "selfClosing": False} | ||
584 | self.emitCurrentToken() | ||
585 | self.state = self.dataState | ||
586 | elif data in asciiLetters: | ||
587 | self.temporaryBuffer += data | ||
588 | else: | ||
589 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
590 | "data": "</" + self.temporaryBuffer}) | ||
591 | self.stream.unget(data) | ||
592 | self.state = self.scriptDataState | ||
593 | return True | ||
594 | |||
595 | def scriptDataEscapeStartState(self): | ||
596 | data = self.stream.char() | ||
597 | if data == "-": | ||
598 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | ||
599 | self.state = self.scriptDataEscapeStartDashState | ||
600 | else: | ||
601 | self.stream.unget(data) | ||
602 | self.state = self.scriptDataState | ||
603 | return True | ||
604 | |||
605 | def scriptDataEscapeStartDashState(self): | ||
606 | data = self.stream.char() | ||
607 | if data == "-": | ||
608 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | ||
609 | self.state = self.scriptDataEscapedDashDashState | ||
610 | else: | ||
611 | self.stream.unget(data) | ||
612 | self.state = self.scriptDataState | ||
613 | return True | ||
614 | |||
615 | def scriptDataEscapedState(self): | ||
616 | data = self.stream.char() | ||
617 | if data == "-": | ||
618 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | ||
619 | self.state = self.scriptDataEscapedDashState | ||
620 | elif data == "<": | ||
621 | self.state = self.scriptDataEscapedLessThanSignState | ||
622 | elif data == "\u0000": | ||
623 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
624 | "data": "invalid-codepoint"}) | ||
625 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
626 | "data": "\uFFFD"}) | ||
627 | elif data == EOF: | ||
628 | self.state = self.dataState | ||
629 | else: | ||
630 | chars = self.stream.charsUntil(("<", "-", "\u0000")) | ||
631 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | ||
632 | data + chars}) | ||
633 | return True | ||
634 | |||
635 | def scriptDataEscapedDashState(self): | ||
636 | data = self.stream.char() | ||
637 | if data == "-": | ||
638 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | ||
639 | self.state = self.scriptDataEscapedDashDashState | ||
640 | elif data == "<": | ||
641 | self.state = self.scriptDataEscapedLessThanSignState | ||
642 | elif data == "\u0000": | ||
643 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
644 | "data": "invalid-codepoint"}) | ||
645 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
646 | "data": "\uFFFD"}) | ||
647 | self.state = self.scriptDataEscapedState | ||
648 | elif data == EOF: | ||
649 | self.state = self.dataState | ||
650 | else: | ||
651 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
652 | self.state = self.scriptDataEscapedState | ||
653 | return True | ||
654 | |||
655 | def scriptDataEscapedDashDashState(self): | ||
656 | data = self.stream.char() | ||
657 | if data == "-": | ||
658 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | ||
659 | elif data == "<": | ||
660 | self.state = self.scriptDataEscapedLessThanSignState | ||
661 | elif data == ">": | ||
662 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) | ||
663 | self.state = self.scriptDataState | ||
664 | elif data == "\u0000": | ||
665 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
666 | "data": "invalid-codepoint"}) | ||
667 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
668 | "data": "\uFFFD"}) | ||
669 | self.state = self.scriptDataEscapedState | ||
670 | elif data == EOF: | ||
671 | self.state = self.dataState | ||
672 | else: | ||
673 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
674 | self.state = self.scriptDataEscapedState | ||
675 | return True | ||
676 | |||
677 | def scriptDataEscapedLessThanSignState(self): | ||
678 | data = self.stream.char() | ||
679 | if data == "/": | ||
680 | self.temporaryBuffer = "" | ||
681 | self.state = self.scriptDataEscapedEndTagOpenState | ||
682 | elif data in asciiLetters: | ||
683 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data}) | ||
684 | self.temporaryBuffer = data | ||
685 | self.state = self.scriptDataDoubleEscapeStartState | ||
686 | else: | ||
687 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | ||
688 | self.stream.unget(data) | ||
689 | self.state = self.scriptDataEscapedState | ||
690 | return True | ||
691 | |||
692 | def scriptDataEscapedEndTagOpenState(self): | ||
693 | data = self.stream.char() | ||
694 | if data in asciiLetters: | ||
695 | self.temporaryBuffer = data | ||
696 | self.state = self.scriptDataEscapedEndTagNameState | ||
697 | else: | ||
698 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) | ||
699 | self.stream.unget(data) | ||
700 | self.state = self.scriptDataEscapedState | ||
701 | return True | ||
702 | |||
703 | def scriptDataEscapedEndTagNameState(self): | ||
704 | appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() | ||
705 | data = self.stream.char() | ||
706 | if data in spaceCharacters and appropriate: | ||
707 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
708 | "name": self.temporaryBuffer, | ||
709 | "data": [], "selfClosing": False} | ||
710 | self.state = self.beforeAttributeNameState | ||
711 | elif data == "/" and appropriate: | ||
712 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
713 | "name": self.temporaryBuffer, | ||
714 | "data": [], "selfClosing": False} | ||
715 | self.state = self.selfClosingStartTagState | ||
716 | elif data == ">" and appropriate: | ||
717 | self.currentToken = {"type": tokenTypes["EndTag"], | ||
718 | "name": self.temporaryBuffer, | ||
719 | "data": [], "selfClosing": False} | ||
720 | self.emitCurrentToken() | ||
721 | self.state = self.dataState | ||
722 | elif data in asciiLetters: | ||
723 | self.temporaryBuffer += data | ||
724 | else: | ||
725 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
726 | "data": "</" + self.temporaryBuffer}) | ||
727 | self.stream.unget(data) | ||
728 | self.state = self.scriptDataEscapedState | ||
729 | return True | ||
730 | |||
731 | def scriptDataDoubleEscapeStartState(self): | ||
732 | data = self.stream.char() | ||
733 | if data in (spaceCharacters | frozenset(("/", ">"))): | ||
734 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
735 | if self.temporaryBuffer.lower() == "script": | ||
736 | self.state = self.scriptDataDoubleEscapedState | ||
737 | else: | ||
738 | self.state = self.scriptDataEscapedState | ||
739 | elif data in asciiLetters: | ||
740 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
741 | self.temporaryBuffer += data | ||
742 | else: | ||
743 | self.stream.unget(data) | ||
744 | self.state = self.scriptDataEscapedState | ||
745 | return True | ||
746 | |||
747 | def scriptDataDoubleEscapedState(self): | ||
748 | data = self.stream.char() | ||
749 | if data == "-": | ||
750 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | ||
751 | self.state = self.scriptDataDoubleEscapedDashState | ||
752 | elif data == "<": | ||
753 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | ||
754 | self.state = self.scriptDataDoubleEscapedLessThanSignState | ||
755 | elif data == "\u0000": | ||
756 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
757 | "data": "invalid-codepoint"}) | ||
758 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
759 | "data": "\uFFFD"}) | ||
760 | elif data == EOF: | ||
761 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
762 | "eof-in-script-in-script"}) | ||
763 | self.state = self.dataState | ||
764 | else: | ||
765 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
766 | return True | ||
767 | |||
768 | def scriptDataDoubleEscapedDashState(self): | ||
769 | data = self.stream.char() | ||
770 | if data == "-": | ||
771 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | ||
772 | self.state = self.scriptDataDoubleEscapedDashDashState | ||
773 | elif data == "<": | ||
774 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | ||
775 | self.state = self.scriptDataDoubleEscapedLessThanSignState | ||
776 | elif data == "\u0000": | ||
777 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
778 | "data": "invalid-codepoint"}) | ||
779 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
780 | "data": "\uFFFD"}) | ||
781 | self.state = self.scriptDataDoubleEscapedState | ||
782 | elif data == EOF: | ||
783 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
784 | "eof-in-script-in-script"}) | ||
785 | self.state = self.dataState | ||
786 | else: | ||
787 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
788 | self.state = self.scriptDataDoubleEscapedState | ||
789 | return True | ||
790 | |||
791 | def scriptDataDoubleEscapedDashDashState(self): | ||
792 | data = self.stream.char() | ||
793 | if data == "-": | ||
794 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | ||
795 | elif data == "<": | ||
796 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | ||
797 | self.state = self.scriptDataDoubleEscapedLessThanSignState | ||
798 | elif data == ">": | ||
799 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) | ||
800 | self.state = self.scriptDataState | ||
801 | elif data == "\u0000": | ||
802 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
803 | "data": "invalid-codepoint"}) | ||
804 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
805 | "data": "\uFFFD"}) | ||
806 | self.state = self.scriptDataDoubleEscapedState | ||
807 | elif data == EOF: | ||
808 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
809 | "eof-in-script-in-script"}) | ||
810 | self.state = self.dataState | ||
811 | else: | ||
812 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
813 | self.state = self.scriptDataDoubleEscapedState | ||
814 | return True | ||
815 | |||
816 | def scriptDataDoubleEscapedLessThanSignState(self): | ||
817 | data = self.stream.char() | ||
818 | if data == "/": | ||
819 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"}) | ||
820 | self.temporaryBuffer = "" | ||
821 | self.state = self.scriptDataDoubleEscapeEndState | ||
822 | else: | ||
823 | self.stream.unget(data) | ||
824 | self.state = self.scriptDataDoubleEscapedState | ||
825 | return True | ||
826 | |||
827 | def scriptDataDoubleEscapeEndState(self): | ||
828 | data = self.stream.char() | ||
829 | if data in (spaceCharacters | frozenset(("/", ">"))): | ||
830 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
831 | if self.temporaryBuffer.lower() == "script": | ||
832 | self.state = self.scriptDataEscapedState | ||
833 | else: | ||
834 | self.state = self.scriptDataDoubleEscapedState | ||
835 | elif data in asciiLetters: | ||
836 | self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | ||
837 | self.temporaryBuffer += data | ||
838 | else: | ||
839 | self.stream.unget(data) | ||
840 | self.state = self.scriptDataDoubleEscapedState | ||
841 | return True | ||
842 | |||
843 | def beforeAttributeNameState(self): | ||
844 | data = self.stream.char() | ||
845 | if data in spaceCharacters: | ||
846 | self.stream.charsUntil(spaceCharacters, True) | ||
847 | elif data in asciiLetters: | ||
848 | self.currentToken["data"].append([data, ""]) | ||
849 | self.state = self.attributeNameState | ||
850 | elif data == ">": | ||
851 | self.emitCurrentToken() | ||
852 | elif data == "/": | ||
853 | self.state = self.selfClosingStartTagState | ||
854 | elif data in ("'", '"', "=", "<"): | ||
855 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
856 | "invalid-character-in-attribute-name"}) | ||
857 | self.currentToken["data"].append([data, ""]) | ||
858 | self.state = self.attributeNameState | ||
859 | elif data == "\u0000": | ||
860 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
861 | "data": "invalid-codepoint"}) | ||
862 | self.currentToken["data"].append(["\uFFFD", ""]) | ||
863 | self.state = self.attributeNameState | ||
864 | elif data is EOF: | ||
865 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
866 | "expected-attribute-name-but-got-eof"}) | ||
867 | self.state = self.dataState | ||
868 | else: | ||
869 | self.currentToken["data"].append([data, ""]) | ||
870 | self.state = self.attributeNameState | ||
871 | return True | ||
872 | |||
873 | def attributeNameState(self): | ||
874 | data = self.stream.char() | ||
875 | leavingThisState = True | ||
876 | emitToken = False | ||
877 | if data == "=": | ||
878 | self.state = self.beforeAttributeValueState | ||
879 | elif data in asciiLetters: | ||
880 | self.currentToken["data"][-1][0] += data +\ | ||
881 | self.stream.charsUntil(asciiLetters, True) | ||
882 | leavingThisState = False | ||
883 | elif data == ">": | ||
884 | # XXX If we emit here the attributes are converted to a dict | ||
885 | # without being checked and when the code below runs we error | ||
886 | # because data is a dict not a list | ||
887 | emitToken = True | ||
888 | elif data in spaceCharacters: | ||
889 | self.state = self.afterAttributeNameState | ||
890 | elif data == "/": | ||
891 | self.state = self.selfClosingStartTagState | ||
892 | elif data == "\u0000": | ||
893 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
894 | "data": "invalid-codepoint"}) | ||
895 | self.currentToken["data"][-1][0] += "\uFFFD" | ||
896 | leavingThisState = False | ||
897 | elif data in ("'", '"', "<"): | ||
898 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
899 | "data": | ||
900 | "invalid-character-in-attribute-name"}) | ||
901 | self.currentToken["data"][-1][0] += data | ||
902 | leavingThisState = False | ||
903 | elif data is EOF: | ||
904 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
905 | "data": "eof-in-attribute-name"}) | ||
906 | self.state = self.dataState | ||
907 | else: | ||
908 | self.currentToken["data"][-1][0] += data | ||
909 | leavingThisState = False | ||
910 | |||
911 | if leavingThisState: | ||
912 | # Attributes are not dropped at this stage. That happens when the | ||
913 | # start tag token is emitted so values can still be safely appended | ||
914 | # to attributes, but we do want to report the parse error in time. | ||
915 | self.currentToken["data"][-1][0] = ( | ||
916 | self.currentToken["data"][-1][0].translate(asciiUpper2Lower)) | ||
917 | for name, _ in self.currentToken["data"][:-1]: | ||
918 | if self.currentToken["data"][-1][0] == name: | ||
919 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
920 | "duplicate-attribute"}) | ||
921 | break | ||
922 | # XXX Fix for above XXX | ||
923 | if emitToken: | ||
924 | self.emitCurrentToken() | ||
925 | return True | ||
926 | |||
927 | def afterAttributeNameState(self): | ||
928 | data = self.stream.char() | ||
929 | if data in spaceCharacters: | ||
930 | self.stream.charsUntil(spaceCharacters, True) | ||
931 | elif data == "=": | ||
932 | self.state = self.beforeAttributeValueState | ||
933 | elif data == ">": | ||
934 | self.emitCurrentToken() | ||
935 | elif data in asciiLetters: | ||
936 | self.currentToken["data"].append([data, ""]) | ||
937 | self.state = self.attributeNameState | ||
938 | elif data == "/": | ||
939 | self.state = self.selfClosingStartTagState | ||
940 | elif data == "\u0000": | ||
941 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
942 | "data": "invalid-codepoint"}) | ||
943 | self.currentToken["data"].append(["\uFFFD", ""]) | ||
944 | self.state = self.attributeNameState | ||
945 | elif data in ("'", '"', "<"): | ||
946 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
947 | "invalid-character-after-attribute-name"}) | ||
948 | self.currentToken["data"].append([data, ""]) | ||
949 | self.state = self.attributeNameState | ||
950 | elif data is EOF: | ||
951 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
952 | "expected-end-of-tag-but-got-eof"}) | ||
953 | self.state = self.dataState | ||
954 | else: | ||
955 | self.currentToken["data"].append([data, ""]) | ||
956 | self.state = self.attributeNameState | ||
957 | return True | ||
958 | |||
959 | def beforeAttributeValueState(self): | ||
960 | data = self.stream.char() | ||
961 | if data in spaceCharacters: | ||
962 | self.stream.charsUntil(spaceCharacters, True) | ||
963 | elif data == "\"": | ||
964 | self.state = self.attributeValueDoubleQuotedState | ||
965 | elif data == "&": | ||
966 | self.state = self.attributeValueUnQuotedState | ||
967 | self.stream.unget(data) | ||
968 | elif data == "'": | ||
969 | self.state = self.attributeValueSingleQuotedState | ||
970 | elif data == ">": | ||
971 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
972 | "expected-attribute-value-but-got-right-bracket"}) | ||
973 | self.emitCurrentToken() | ||
974 | elif data == "\u0000": | ||
975 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
976 | "data": "invalid-codepoint"}) | ||
977 | self.currentToken["data"][-1][1] += "\uFFFD" | ||
978 | self.state = self.attributeValueUnQuotedState | ||
979 | elif data in ("=", "<", "`"): | ||
980 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
981 | "equals-in-unquoted-attribute-value"}) | ||
982 | self.currentToken["data"][-1][1] += data | ||
983 | self.state = self.attributeValueUnQuotedState | ||
984 | elif data is EOF: | ||
985 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
986 | "expected-attribute-value-but-got-eof"}) | ||
987 | self.state = self.dataState | ||
988 | else: | ||
989 | self.currentToken["data"][-1][1] += data | ||
990 | self.state = self.attributeValueUnQuotedState | ||
991 | return True | ||
992 | |||
993 | def attributeValueDoubleQuotedState(self): | ||
994 | data = self.stream.char() | ||
995 | if data == "\"": | ||
996 | self.state = self.afterAttributeValueState | ||
997 | elif data == "&": | ||
998 | self.processEntityInAttribute('"') | ||
999 | elif data == "\u0000": | ||
1000 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1001 | "data": "invalid-codepoint"}) | ||
1002 | self.currentToken["data"][-1][1] += "\uFFFD" | ||
1003 | elif data is EOF: | ||
1004 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1005 | "eof-in-attribute-value-double-quote"}) | ||
1006 | self.state = self.dataState | ||
1007 | else: | ||
1008 | self.currentToken["data"][-1][1] += data +\ | ||
1009 | self.stream.charsUntil(("\"", "&", "\u0000")) | ||
1010 | return True | ||
1011 | |||
1012 | def attributeValueSingleQuotedState(self): | ||
1013 | data = self.stream.char() | ||
1014 | if data == "'": | ||
1015 | self.state = self.afterAttributeValueState | ||
1016 | elif data == "&": | ||
1017 | self.processEntityInAttribute("'") | ||
1018 | elif data == "\u0000": | ||
1019 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1020 | "data": "invalid-codepoint"}) | ||
1021 | self.currentToken["data"][-1][1] += "\uFFFD" | ||
1022 | elif data is EOF: | ||
1023 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1024 | "eof-in-attribute-value-single-quote"}) | ||
1025 | self.state = self.dataState | ||
1026 | else: | ||
1027 | self.currentToken["data"][-1][1] += data +\ | ||
1028 | self.stream.charsUntil(("'", "&", "\u0000")) | ||
1029 | return True | ||
1030 | |||
1031 | def attributeValueUnQuotedState(self): | ||
1032 | data = self.stream.char() | ||
1033 | if data in spaceCharacters: | ||
1034 | self.state = self.beforeAttributeNameState | ||
1035 | elif data == "&": | ||
1036 | self.processEntityInAttribute(">") | ||
1037 | elif data == ">": | ||
1038 | self.emitCurrentToken() | ||
1039 | elif data in ('"', "'", "=", "<", "`"): | ||
1040 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1041 | "unexpected-character-in-unquoted-attribute-value"}) | ||
1042 | self.currentToken["data"][-1][1] += data | ||
1043 | elif data == "\u0000": | ||
1044 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1045 | "data": "invalid-codepoint"}) | ||
1046 | self.currentToken["data"][-1][1] += "\uFFFD" | ||
1047 | elif data is EOF: | ||
1048 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1049 | "eof-in-attribute-value-no-quotes"}) | ||
1050 | self.state = self.dataState | ||
1051 | else: | ||
1052 | self.currentToken["data"][-1][1] += data + self.stream.charsUntil( | ||
1053 | frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters) | ||
1054 | return True | ||
1055 | |||
1056 | def afterAttributeValueState(self): | ||
1057 | data = self.stream.char() | ||
1058 | if data in spaceCharacters: | ||
1059 | self.state = self.beforeAttributeNameState | ||
1060 | elif data == ">": | ||
1061 | self.emitCurrentToken() | ||
1062 | elif data == "/": | ||
1063 | self.state = self.selfClosingStartTagState | ||
1064 | elif data is EOF: | ||
1065 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1066 | "unexpected-EOF-after-attribute-value"}) | ||
1067 | self.stream.unget(data) | ||
1068 | self.state = self.dataState | ||
1069 | else: | ||
1070 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1071 | "unexpected-character-after-attribute-value"}) | ||
1072 | self.stream.unget(data) | ||
1073 | self.state = self.beforeAttributeNameState | ||
1074 | return True | ||
1075 | |||
1076 | def selfClosingStartTagState(self): | ||
1077 | data = self.stream.char() | ||
1078 | if data == ">": | ||
1079 | self.currentToken["selfClosing"] = True | ||
1080 | self.emitCurrentToken() | ||
1081 | elif data is EOF: | ||
1082 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1083 | "data": | ||
1084 | "unexpected-EOF-after-solidus-in-tag"}) | ||
1085 | self.stream.unget(data) | ||
1086 | self.state = self.dataState | ||
1087 | else: | ||
1088 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1089 | "unexpected-character-after-solidus-in-tag"}) | ||
1090 | self.stream.unget(data) | ||
1091 | self.state = self.beforeAttributeNameState | ||
1092 | return True | ||
1093 | |||
1094 | def bogusCommentState(self): | ||
1095 | # Make a new comment token and give it as value all the characters | ||
1096 | # until the first > or EOF (charsUntil checks for EOF automatically) | ||
1097 | # and emit it. | ||
1098 | data = self.stream.charsUntil(">") | ||
1099 | data = data.replace("\u0000", "\uFFFD") | ||
1100 | self.tokenQueue.append( | ||
1101 | {"type": tokenTypes["Comment"], "data": data}) | ||
1102 | |||
1103 | # Eat the character directly after the bogus comment which is either a | ||
1104 | # ">" or an EOF. | ||
1105 | self.stream.char() | ||
1106 | self.state = self.dataState | ||
1107 | return True | ||
1108 | |||
1109 | def markupDeclarationOpenState(self): | ||
1110 | charStack = [self.stream.char()] | ||
1111 | if charStack[-1] == "-": | ||
1112 | charStack.append(self.stream.char()) | ||
1113 | if charStack[-1] == "-": | ||
1114 | self.currentToken = {"type": tokenTypes["Comment"], "data": ""} | ||
1115 | self.state = self.commentStartState | ||
1116 | return True | ||
1117 | elif charStack[-1] in ('d', 'D'): | ||
1118 | matched = True | ||
1119 | for expected in (('o', 'O'), ('c', 'C'), ('t', 'T'), | ||
1120 | ('y', 'Y'), ('p', 'P'), ('e', 'E')): | ||
1121 | charStack.append(self.stream.char()) | ||
1122 | if charStack[-1] not in expected: | ||
1123 | matched = False | ||
1124 | break | ||
1125 | if matched: | ||
1126 | self.currentToken = {"type": tokenTypes["Doctype"], | ||
1127 | "name": "", | ||
1128 | "publicId": None, "systemId": None, | ||
1129 | "correct": True} | ||
1130 | self.state = self.doctypeState | ||
1131 | return True | ||
1132 | elif (charStack[-1] == "[" and | ||
1133 | self.parser is not None and | ||
1134 | self.parser.tree.openElements and | ||
1135 | self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace): | ||
1136 | matched = True | ||
1137 | for expected in ["C", "D", "A", "T", "A", "["]: | ||
1138 | charStack.append(self.stream.char()) | ||
1139 | if charStack[-1] != expected: | ||
1140 | matched = False | ||
1141 | break | ||
1142 | if matched: | ||
1143 | self.state = self.cdataSectionState | ||
1144 | return True | ||
1145 | |||
1146 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1147 | "expected-dashes-or-doctype"}) | ||
1148 | |||
1149 | while charStack: | ||
1150 | self.stream.unget(charStack.pop()) | ||
1151 | self.state = self.bogusCommentState | ||
1152 | return True | ||
1153 | |||
1154 | def commentStartState(self): | ||
1155 | data = self.stream.char() | ||
1156 | if data == "-": | ||
1157 | self.state = self.commentStartDashState | ||
1158 | elif data == "\u0000": | ||
1159 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1160 | "data": "invalid-codepoint"}) | ||
1161 | self.currentToken["data"] += "\uFFFD" | ||
1162 | elif data == ">": | ||
1163 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1164 | "incorrect-comment"}) | ||
1165 | self.tokenQueue.append(self.currentToken) | ||
1166 | self.state = self.dataState | ||
1167 | elif data is EOF: | ||
1168 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1169 | "eof-in-comment"}) | ||
1170 | self.tokenQueue.append(self.currentToken) | ||
1171 | self.state = self.dataState | ||
1172 | else: | ||
1173 | self.currentToken["data"] += data | ||
1174 | self.state = self.commentState | ||
1175 | return True | ||
1176 | |||
1177 | def commentStartDashState(self): | ||
1178 | data = self.stream.char() | ||
1179 | if data == "-": | ||
1180 | self.state = self.commentEndState | ||
1181 | elif data == "\u0000": | ||
1182 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1183 | "data": "invalid-codepoint"}) | ||
1184 | self.currentToken["data"] += "-\uFFFD" | ||
1185 | elif data == ">": | ||
1186 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1187 | "incorrect-comment"}) | ||
1188 | self.tokenQueue.append(self.currentToken) | ||
1189 | self.state = self.dataState | ||
1190 | elif data is EOF: | ||
1191 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1192 | "eof-in-comment"}) | ||
1193 | self.tokenQueue.append(self.currentToken) | ||
1194 | self.state = self.dataState | ||
1195 | else: | ||
1196 | self.currentToken["data"] += "-" + data | ||
1197 | self.state = self.commentState | ||
1198 | return True | ||
1199 | |||
1200 | def commentState(self): | ||
1201 | data = self.stream.char() | ||
1202 | if data == "-": | ||
1203 | self.state = self.commentEndDashState | ||
1204 | elif data == "\u0000": | ||
1205 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1206 | "data": "invalid-codepoint"}) | ||
1207 | self.currentToken["data"] += "\uFFFD" | ||
1208 | elif data is EOF: | ||
1209 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1210 | "data": "eof-in-comment"}) | ||
1211 | self.tokenQueue.append(self.currentToken) | ||
1212 | self.state = self.dataState | ||
1213 | else: | ||
1214 | self.currentToken["data"] += data + \ | ||
1215 | self.stream.charsUntil(("-", "\u0000")) | ||
1216 | return True | ||
1217 | |||
1218 | def commentEndDashState(self): | ||
1219 | data = self.stream.char() | ||
1220 | if data == "-": | ||
1221 | self.state = self.commentEndState | ||
1222 | elif data == "\u0000": | ||
1223 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1224 | "data": "invalid-codepoint"}) | ||
1225 | self.currentToken["data"] += "-\uFFFD" | ||
1226 | self.state = self.commentState | ||
1227 | elif data is EOF: | ||
1228 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1229 | "eof-in-comment-end-dash"}) | ||
1230 | self.tokenQueue.append(self.currentToken) | ||
1231 | self.state = self.dataState | ||
1232 | else: | ||
1233 | self.currentToken["data"] += "-" + data | ||
1234 | self.state = self.commentState | ||
1235 | return True | ||
1236 | |||
1237 | def commentEndState(self): | ||
1238 | data = self.stream.char() | ||
1239 | if data == ">": | ||
1240 | self.tokenQueue.append(self.currentToken) | ||
1241 | self.state = self.dataState | ||
1242 | elif data == "\u0000": | ||
1243 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1244 | "data": "invalid-codepoint"}) | ||
1245 | self.currentToken["data"] += "--\uFFFD" | ||
1246 | self.state = self.commentState | ||
1247 | elif data == "!": | ||
1248 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1249 | "unexpected-bang-after-double-dash-in-comment"}) | ||
1250 | self.state = self.commentEndBangState | ||
1251 | elif data == "-": | ||
1252 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1253 | "unexpected-dash-after-double-dash-in-comment"}) | ||
1254 | self.currentToken["data"] += data | ||
1255 | elif data is EOF: | ||
1256 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1257 | "eof-in-comment-double-dash"}) | ||
1258 | self.tokenQueue.append(self.currentToken) | ||
1259 | self.state = self.dataState | ||
1260 | else: | ||
1261 | # XXX | ||
1262 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1263 | "unexpected-char-in-comment"}) | ||
1264 | self.currentToken["data"] += "--" + data | ||
1265 | self.state = self.commentState | ||
1266 | return True | ||
1267 | |||
1268 | def commentEndBangState(self): | ||
1269 | data = self.stream.char() | ||
1270 | if data == ">": | ||
1271 | self.tokenQueue.append(self.currentToken) | ||
1272 | self.state = self.dataState | ||
1273 | elif data == "-": | ||
1274 | self.currentToken["data"] += "--!" | ||
1275 | self.state = self.commentEndDashState | ||
1276 | elif data == "\u0000": | ||
1277 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1278 | "data": "invalid-codepoint"}) | ||
1279 | self.currentToken["data"] += "--!\uFFFD" | ||
1280 | self.state = self.commentState | ||
1281 | elif data is EOF: | ||
1282 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1283 | "eof-in-comment-end-bang-state"}) | ||
1284 | self.tokenQueue.append(self.currentToken) | ||
1285 | self.state = self.dataState | ||
1286 | else: | ||
1287 | self.currentToken["data"] += "--!" + data | ||
1288 | self.state = self.commentState | ||
1289 | return True | ||
1290 | |||
1291 | def doctypeState(self): | ||
1292 | data = self.stream.char() | ||
1293 | if data in spaceCharacters: | ||
1294 | self.state = self.beforeDoctypeNameState | ||
1295 | elif data is EOF: | ||
1296 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1297 | "expected-doctype-name-but-got-eof"}) | ||
1298 | self.currentToken["correct"] = False | ||
1299 | self.tokenQueue.append(self.currentToken) | ||
1300 | self.state = self.dataState | ||
1301 | else: | ||
1302 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1303 | "need-space-after-doctype"}) | ||
1304 | self.stream.unget(data) | ||
1305 | self.state = self.beforeDoctypeNameState | ||
1306 | return True | ||
1307 | |||
1308 | def beforeDoctypeNameState(self): | ||
1309 | data = self.stream.char() | ||
1310 | if data in spaceCharacters: | ||
1311 | pass | ||
1312 | elif data == ">": | ||
1313 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1314 | "expected-doctype-name-but-got-right-bracket"}) | ||
1315 | self.currentToken["correct"] = False | ||
1316 | self.tokenQueue.append(self.currentToken) | ||
1317 | self.state = self.dataState | ||
1318 | elif data == "\u0000": | ||
1319 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1320 | "data": "invalid-codepoint"}) | ||
1321 | self.currentToken["name"] = "\uFFFD" | ||
1322 | self.state = self.doctypeNameState | ||
1323 | elif data is EOF: | ||
1324 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1325 | "expected-doctype-name-but-got-eof"}) | ||
1326 | self.currentToken["correct"] = False | ||
1327 | self.tokenQueue.append(self.currentToken) | ||
1328 | self.state = self.dataState | ||
1329 | else: | ||
1330 | self.currentToken["name"] = data | ||
1331 | self.state = self.doctypeNameState | ||
1332 | return True | ||
1333 | |||
1334 | def doctypeNameState(self): | ||
1335 | data = self.stream.char() | ||
1336 | if data in spaceCharacters: | ||
1337 | self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) | ||
1338 | self.state = self.afterDoctypeNameState | ||
1339 | elif data == ">": | ||
1340 | self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) | ||
1341 | self.tokenQueue.append(self.currentToken) | ||
1342 | self.state = self.dataState | ||
1343 | elif data == "\u0000": | ||
1344 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1345 | "data": "invalid-codepoint"}) | ||
1346 | self.currentToken["name"] += "\uFFFD" | ||
1347 | self.state = self.doctypeNameState | ||
1348 | elif data is EOF: | ||
1349 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1350 | "eof-in-doctype-name"}) | ||
1351 | self.currentToken["correct"] = False | ||
1352 | self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) | ||
1353 | self.tokenQueue.append(self.currentToken) | ||
1354 | self.state = self.dataState | ||
1355 | else: | ||
1356 | self.currentToken["name"] += data | ||
1357 | return True | ||
1358 | |||
1359 | def afterDoctypeNameState(self): | ||
1360 | data = self.stream.char() | ||
1361 | if data in spaceCharacters: | ||
1362 | pass | ||
1363 | elif data == ">": | ||
1364 | self.tokenQueue.append(self.currentToken) | ||
1365 | self.state = self.dataState | ||
1366 | elif data is EOF: | ||
1367 | self.currentToken["correct"] = False | ||
1368 | self.stream.unget(data) | ||
1369 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1370 | "eof-in-doctype"}) | ||
1371 | self.tokenQueue.append(self.currentToken) | ||
1372 | self.state = self.dataState | ||
1373 | else: | ||
1374 | if data in ("p", "P"): | ||
1375 | matched = True | ||
1376 | for expected in (("u", "U"), ("b", "B"), ("l", "L"), | ||
1377 | ("i", "I"), ("c", "C")): | ||
1378 | data = self.stream.char() | ||
1379 | if data not in expected: | ||
1380 | matched = False | ||
1381 | break | ||
1382 | if matched: | ||
1383 | self.state = self.afterDoctypePublicKeywordState | ||
1384 | return True | ||
1385 | elif data in ("s", "S"): | ||
1386 | matched = True | ||
1387 | for expected in (("y", "Y"), ("s", "S"), ("t", "T"), | ||
1388 | ("e", "E"), ("m", "M")): | ||
1389 | data = self.stream.char() | ||
1390 | if data not in expected: | ||
1391 | matched = False | ||
1392 | break | ||
1393 | if matched: | ||
1394 | self.state = self.afterDoctypeSystemKeywordState | ||
1395 | return True | ||
1396 | |||
1397 | # All the characters read before the current 'data' will be | ||
1398 | # [a-zA-Z], so they're garbage in the bogus doctype and can be | ||
1399 | # discarded; only the latest character might be '>' or EOF | ||
1400 | # and needs to be ungetted | ||
1401 | self.stream.unget(data) | ||
1402 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1403 | "expected-space-or-right-bracket-in-doctype", "datavars": | ||
1404 | {"data": data}}) | ||
1405 | self.currentToken["correct"] = False | ||
1406 | self.state = self.bogusDoctypeState | ||
1407 | |||
1408 | return True | ||
1409 | |||
1410 | def afterDoctypePublicKeywordState(self): | ||
1411 | data = self.stream.char() | ||
1412 | if data in spaceCharacters: | ||
1413 | self.state = self.beforeDoctypePublicIdentifierState | ||
1414 | elif data in ("'", '"'): | ||
1415 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1416 | "unexpected-char-in-doctype"}) | ||
1417 | self.stream.unget(data) | ||
1418 | self.state = self.beforeDoctypePublicIdentifierState | ||
1419 | elif data is EOF: | ||
1420 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1421 | "eof-in-doctype"}) | ||
1422 | self.currentToken["correct"] = False | ||
1423 | self.tokenQueue.append(self.currentToken) | ||
1424 | self.state = self.dataState | ||
1425 | else: | ||
1426 | self.stream.unget(data) | ||
1427 | self.state = self.beforeDoctypePublicIdentifierState | ||
1428 | return True | ||
1429 | |||
1430 | def beforeDoctypePublicIdentifierState(self): | ||
1431 | data = self.stream.char() | ||
1432 | if data in spaceCharacters: | ||
1433 | pass | ||
1434 | elif data == "\"": | ||
1435 | self.currentToken["publicId"] = "" | ||
1436 | self.state = self.doctypePublicIdentifierDoubleQuotedState | ||
1437 | elif data == "'": | ||
1438 | self.currentToken["publicId"] = "" | ||
1439 | self.state = self.doctypePublicIdentifierSingleQuotedState | ||
1440 | elif data == ">": | ||
1441 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1442 | "unexpected-end-of-doctype"}) | ||
1443 | self.currentToken["correct"] = False | ||
1444 | self.tokenQueue.append(self.currentToken) | ||
1445 | self.state = self.dataState | ||
1446 | elif data is EOF: | ||
1447 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1448 | "eof-in-doctype"}) | ||
1449 | self.currentToken["correct"] = False | ||
1450 | self.tokenQueue.append(self.currentToken) | ||
1451 | self.state = self.dataState | ||
1452 | else: | ||
1453 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1454 | "unexpected-char-in-doctype"}) | ||
1455 | self.currentToken["correct"] = False | ||
1456 | self.state = self.bogusDoctypeState | ||
1457 | return True | ||
1458 | |||
1459 | def doctypePublicIdentifierDoubleQuotedState(self): | ||
1460 | data = self.stream.char() | ||
1461 | if data == "\"": | ||
1462 | self.state = self.afterDoctypePublicIdentifierState | ||
1463 | elif data == "\u0000": | ||
1464 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1465 | "data": "invalid-codepoint"}) | ||
1466 | self.currentToken["publicId"] += "\uFFFD" | ||
1467 | elif data == ">": | ||
1468 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1469 | "unexpected-end-of-doctype"}) | ||
1470 | self.currentToken["correct"] = False | ||
1471 | self.tokenQueue.append(self.currentToken) | ||
1472 | self.state = self.dataState | ||
1473 | elif data is EOF: | ||
1474 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1475 | "eof-in-doctype"}) | ||
1476 | self.currentToken["correct"] = False | ||
1477 | self.tokenQueue.append(self.currentToken) | ||
1478 | self.state = self.dataState | ||
1479 | else: | ||
1480 | self.currentToken["publicId"] += data | ||
1481 | return True | ||
1482 | |||
1483 | def doctypePublicIdentifierSingleQuotedState(self): | ||
1484 | data = self.stream.char() | ||
1485 | if data == "'": | ||
1486 | self.state = self.afterDoctypePublicIdentifierState | ||
1487 | elif data == "\u0000": | ||
1488 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1489 | "data": "invalid-codepoint"}) | ||
1490 | self.currentToken["publicId"] += "\uFFFD" | ||
1491 | elif data == ">": | ||
1492 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1493 | "unexpected-end-of-doctype"}) | ||
1494 | self.currentToken["correct"] = False | ||
1495 | self.tokenQueue.append(self.currentToken) | ||
1496 | self.state = self.dataState | ||
1497 | elif data is EOF: | ||
1498 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1499 | "eof-in-doctype"}) | ||
1500 | self.currentToken["correct"] = False | ||
1501 | self.tokenQueue.append(self.currentToken) | ||
1502 | self.state = self.dataState | ||
1503 | else: | ||
1504 | self.currentToken["publicId"] += data | ||
1505 | return True | ||
1506 | |||
1507 | def afterDoctypePublicIdentifierState(self): | ||
1508 | data = self.stream.char() | ||
1509 | if data in spaceCharacters: | ||
1510 | self.state = self.betweenDoctypePublicAndSystemIdentifiersState | ||
1511 | elif data == ">": | ||
1512 | self.tokenQueue.append(self.currentToken) | ||
1513 | self.state = self.dataState | ||
1514 | elif data == '"': | ||
1515 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1516 | "unexpected-char-in-doctype"}) | ||
1517 | self.currentToken["systemId"] = "" | ||
1518 | self.state = self.doctypeSystemIdentifierDoubleQuotedState | ||
1519 | elif data == "'": | ||
1520 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1521 | "unexpected-char-in-doctype"}) | ||
1522 | self.currentToken["systemId"] = "" | ||
1523 | self.state = self.doctypeSystemIdentifierSingleQuotedState | ||
1524 | elif data is EOF: | ||
1525 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1526 | "eof-in-doctype"}) | ||
1527 | self.currentToken["correct"] = False | ||
1528 | self.tokenQueue.append(self.currentToken) | ||
1529 | self.state = self.dataState | ||
1530 | else: | ||
1531 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1532 | "unexpected-char-in-doctype"}) | ||
1533 | self.currentToken["correct"] = False | ||
1534 | self.state = self.bogusDoctypeState | ||
1535 | return True | ||
1536 | |||
1537 | def betweenDoctypePublicAndSystemIdentifiersState(self): | ||
1538 | data = self.stream.char() | ||
1539 | if data in spaceCharacters: | ||
1540 | pass | ||
1541 | elif data == ">": | ||
1542 | self.tokenQueue.append(self.currentToken) | ||
1543 | self.state = self.dataState | ||
1544 | elif data == '"': | ||
1545 | self.currentToken["systemId"] = "" | ||
1546 | self.state = self.doctypeSystemIdentifierDoubleQuotedState | ||
1547 | elif data == "'": | ||
1548 | self.currentToken["systemId"] = "" | ||
1549 | self.state = self.doctypeSystemIdentifierSingleQuotedState | ||
1550 | elif data == EOF: | ||
1551 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1552 | "eof-in-doctype"}) | ||
1553 | self.currentToken["correct"] = False | ||
1554 | self.tokenQueue.append(self.currentToken) | ||
1555 | self.state = self.dataState | ||
1556 | else: | ||
1557 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1558 | "unexpected-char-in-doctype"}) | ||
1559 | self.currentToken["correct"] = False | ||
1560 | self.state = self.bogusDoctypeState | ||
1561 | return True | ||
1562 | |||
1563 | def afterDoctypeSystemKeywordState(self): | ||
1564 | data = self.stream.char() | ||
1565 | if data in spaceCharacters: | ||
1566 | self.state = self.beforeDoctypeSystemIdentifierState | ||
1567 | elif data in ("'", '"'): | ||
1568 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1569 | "unexpected-char-in-doctype"}) | ||
1570 | self.stream.unget(data) | ||
1571 | self.state = self.beforeDoctypeSystemIdentifierState | ||
1572 | elif data is EOF: | ||
1573 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1574 | "eof-in-doctype"}) | ||
1575 | self.currentToken["correct"] = False | ||
1576 | self.tokenQueue.append(self.currentToken) | ||
1577 | self.state = self.dataState | ||
1578 | else: | ||
1579 | self.stream.unget(data) | ||
1580 | self.state = self.beforeDoctypeSystemIdentifierState | ||
1581 | return True | ||
1582 | |||
1583 | def beforeDoctypeSystemIdentifierState(self): | ||
1584 | data = self.stream.char() | ||
1585 | if data in spaceCharacters: | ||
1586 | pass | ||
1587 | elif data == "\"": | ||
1588 | self.currentToken["systemId"] = "" | ||
1589 | self.state = self.doctypeSystemIdentifierDoubleQuotedState | ||
1590 | elif data == "'": | ||
1591 | self.currentToken["systemId"] = "" | ||
1592 | self.state = self.doctypeSystemIdentifierSingleQuotedState | ||
1593 | elif data == ">": | ||
1594 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1595 | "unexpected-char-in-doctype"}) | ||
1596 | self.currentToken["correct"] = False | ||
1597 | self.tokenQueue.append(self.currentToken) | ||
1598 | self.state = self.dataState | ||
1599 | elif data is EOF: | ||
1600 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1601 | "eof-in-doctype"}) | ||
1602 | self.currentToken["correct"] = False | ||
1603 | self.tokenQueue.append(self.currentToken) | ||
1604 | self.state = self.dataState | ||
1605 | else: | ||
1606 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1607 | "unexpected-char-in-doctype"}) | ||
1608 | self.currentToken["correct"] = False | ||
1609 | self.state = self.bogusDoctypeState | ||
1610 | return True | ||
1611 | |||
1612 | def doctypeSystemIdentifierDoubleQuotedState(self): | ||
1613 | data = self.stream.char() | ||
1614 | if data == "\"": | ||
1615 | self.state = self.afterDoctypeSystemIdentifierState | ||
1616 | elif data == "\u0000": | ||
1617 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1618 | "data": "invalid-codepoint"}) | ||
1619 | self.currentToken["systemId"] += "\uFFFD" | ||
1620 | elif data == ">": | ||
1621 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1622 | "unexpected-end-of-doctype"}) | ||
1623 | self.currentToken["correct"] = False | ||
1624 | self.tokenQueue.append(self.currentToken) | ||
1625 | self.state = self.dataState | ||
1626 | elif data is EOF: | ||
1627 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1628 | "eof-in-doctype"}) | ||
1629 | self.currentToken["correct"] = False | ||
1630 | self.tokenQueue.append(self.currentToken) | ||
1631 | self.state = self.dataState | ||
1632 | else: | ||
1633 | self.currentToken["systemId"] += data | ||
1634 | return True | ||
1635 | |||
1636 | def doctypeSystemIdentifierSingleQuotedState(self): | ||
1637 | data = self.stream.char() | ||
1638 | if data == "'": | ||
1639 | self.state = self.afterDoctypeSystemIdentifierState | ||
1640 | elif data == "\u0000": | ||
1641 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1642 | "data": "invalid-codepoint"}) | ||
1643 | self.currentToken["systemId"] += "\uFFFD" | ||
1644 | elif data == ">": | ||
1645 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1646 | "unexpected-end-of-doctype"}) | ||
1647 | self.currentToken["correct"] = False | ||
1648 | self.tokenQueue.append(self.currentToken) | ||
1649 | self.state = self.dataState | ||
1650 | elif data is EOF: | ||
1651 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1652 | "eof-in-doctype"}) | ||
1653 | self.currentToken["correct"] = False | ||
1654 | self.tokenQueue.append(self.currentToken) | ||
1655 | self.state = self.dataState | ||
1656 | else: | ||
1657 | self.currentToken["systemId"] += data | ||
1658 | return True | ||
1659 | |||
1660 | def afterDoctypeSystemIdentifierState(self): | ||
1661 | data = self.stream.char() | ||
1662 | if data in spaceCharacters: | ||
1663 | pass | ||
1664 | elif data == ">": | ||
1665 | self.tokenQueue.append(self.currentToken) | ||
1666 | self.state = self.dataState | ||
1667 | elif data is EOF: | ||
1668 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1669 | "eof-in-doctype"}) | ||
1670 | self.currentToken["correct"] = False | ||
1671 | self.tokenQueue.append(self.currentToken) | ||
1672 | self.state = self.dataState | ||
1673 | else: | ||
1674 | self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | ||
1675 | "unexpected-char-in-doctype"}) | ||
1676 | self.state = self.bogusDoctypeState | ||
1677 | return True | ||
1678 | |||
1679 | def bogusDoctypeState(self): | ||
1680 | data = self.stream.char() | ||
1681 | if data == ">": | ||
1682 | self.tokenQueue.append(self.currentToken) | ||
1683 | self.state = self.dataState | ||
1684 | elif data is EOF: | ||
1685 | # XXX EMIT | ||
1686 | self.stream.unget(data) | ||
1687 | self.tokenQueue.append(self.currentToken) | ||
1688 | self.state = self.dataState | ||
1689 | else: | ||
1690 | pass | ||
1691 | return True | ||
1692 | |||
1693 | def cdataSectionState(self): | ||
1694 | data = [] | ||
1695 | while True: | ||
1696 | data.append(self.stream.charsUntil("]")) | ||
1697 | data.append(self.stream.charsUntil(">")) | ||
1698 | char = self.stream.char() | ||
1699 | if char == EOF: | ||
1700 | break | ||
1701 | else: | ||
1702 | assert char == ">" | ||
1703 | if data[-1][-2:] == "]]": | ||
1704 | data[-1] = data[-1][:-2] | ||
1705 | break | ||
1706 | else: | ||
1707 | data.append(char) | ||
1708 | |||
1709 | data = "".join(data) # pylint:disable=redefined-variable-type | ||
1710 | # Deal with null here rather than in the parser | ||
1711 | nullCount = data.count("\u0000") | ||
1712 | if nullCount > 0: | ||
1713 | for _ in range(nullCount): | ||
1714 | self.tokenQueue.append({"type": tokenTypes["ParseError"], | ||
1715 | "data": "invalid-codepoint"}) | ||
1716 | data = data.replace("\u0000", "\uFFFD") | ||
1717 | if data: | ||
1718 | self.tokenQueue.append({"type": tokenTypes["Characters"], | ||
1719 | "data": data}) | ||
1720 | self.state = self.dataState | ||
1721 | return True | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/__init__.py new file mode 100644 index 0000000..ccc70bd --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/__init__.py | |||
@@ -0,0 +1,14 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from .py import Trie as PyTrie | ||
4 | |||
5 | Trie = PyTrie | ||
6 | |||
7 | # pylint:disable=wrong-import-position | ||
8 | try: | ||
9 | from .datrie import Trie as DATrie | ||
10 | except ImportError: | ||
11 | pass | ||
12 | else: | ||
13 | Trie = DATrie | ||
14 | # pylint:enable=wrong-import-position | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/_base.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/_base.py new file mode 100644 index 0000000..ecfff32 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/_base.py | |||
@@ -0,0 +1,37 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from collections import Mapping | ||
4 | |||
5 | |||
6 | class Trie(Mapping): | ||
7 | """Abstract base class for tries""" | ||
8 | |||
9 | def keys(self, prefix=None): | ||
10 | # pylint:disable=arguments-differ | ||
11 | keys = super(Trie, self).keys() | ||
12 | |||
13 | if prefix is None: | ||
14 | return set(keys) | ||
15 | |||
16 | return {x for x in keys if x.startswith(prefix)} | ||
17 | |||
18 | def has_keys_with_prefix(self, prefix): | ||
19 | for key in self.keys(): | ||
20 | if key.startswith(prefix): | ||
21 | return True | ||
22 | |||
23 | return False | ||
24 | |||
25 | def longest_prefix(self, prefix): | ||
26 | if prefix in self: | ||
27 | return prefix | ||
28 | |||
29 | for i in range(1, len(prefix) + 1): | ||
30 | if prefix[:-i] in self: | ||
31 | return prefix[:-i] | ||
32 | |||
33 | raise KeyError(prefix) | ||
34 | |||
35 | def longest_prefix_item(self, prefix): | ||
36 | lprefix = self.longest_prefix(prefix) | ||
37 | return (lprefix, self[lprefix]) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/datrie.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/datrie.py new file mode 100644 index 0000000..cb1af60 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/datrie.py | |||
@@ -0,0 +1,44 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from datrie import Trie as DATrie | ||
4 | from pip._vendor.six import text_type | ||
5 | |||
6 | from ._base import Trie as ABCTrie | ||
7 | |||
8 | |||
9 | class Trie(ABCTrie): | ||
10 | def __init__(self, data): | ||
11 | chars = set() | ||
12 | for key in data.keys(): | ||
13 | if not isinstance(key, text_type): | ||
14 | raise TypeError("All keys must be strings") | ||
15 | for char in key: | ||
16 | chars.add(char) | ||
17 | |||
18 | self._data = DATrie("".join(chars)) | ||
19 | for key, value in data.items(): | ||
20 | self._data[key] = value | ||
21 | |||
22 | def __contains__(self, key): | ||
23 | return key in self._data | ||
24 | |||
25 | def __len__(self): | ||
26 | return len(self._data) | ||
27 | |||
28 | def __iter__(self): | ||
29 | raise NotImplementedError() | ||
30 | |||
31 | def __getitem__(self, key): | ||
32 | return self._data[key] | ||
33 | |||
34 | def keys(self, prefix=None): | ||
35 | return self._data.keys(prefix) | ||
36 | |||
37 | def has_keys_with_prefix(self, prefix): | ||
38 | return self._data.has_keys_with_prefix(prefix) | ||
39 | |||
40 | def longest_prefix(self, prefix): | ||
41 | return self._data.longest_prefix(prefix) | ||
42 | |||
43 | def longest_prefix_item(self, prefix): | ||
44 | return self._data.longest_prefix_item(prefix) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/py.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/py.py new file mode 100644 index 0000000..5531263 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_trie/py.py | |||
@@ -0,0 +1,67 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | from pip._vendor.six import text_type | ||
3 | |||
4 | from bisect import bisect_left | ||
5 | |||
6 | from ._base import Trie as ABCTrie | ||
7 | |||
8 | |||
9 | class Trie(ABCTrie): | ||
10 | def __init__(self, data): | ||
11 | if not all(isinstance(x, text_type) for x in data.keys()): | ||
12 | raise TypeError("All keys must be strings") | ||
13 | |||
14 | self._data = data | ||
15 | self._keys = sorted(data.keys()) | ||
16 | self._cachestr = "" | ||
17 | self._cachepoints = (0, len(data)) | ||
18 | |||
19 | def __contains__(self, key): | ||
20 | return key in self._data | ||
21 | |||
22 | def __len__(self): | ||
23 | return len(self._data) | ||
24 | |||
25 | def __iter__(self): | ||
26 | return iter(self._data) | ||
27 | |||
28 | def __getitem__(self, key): | ||
29 | return self._data[key] | ||
30 | |||
31 | def keys(self, prefix=None): | ||
32 | if prefix is None or prefix == "" or not self._keys: | ||
33 | return set(self._keys) | ||
34 | |||
35 | if prefix.startswith(self._cachestr): | ||
36 | lo, hi = self._cachepoints | ||
37 | start = i = bisect_left(self._keys, prefix, lo, hi) | ||
38 | else: | ||
39 | start = i = bisect_left(self._keys, prefix) | ||
40 | |||
41 | keys = set() | ||
42 | if start == len(self._keys): | ||
43 | return keys | ||
44 | |||
45 | while self._keys[i].startswith(prefix): | ||
46 | keys.add(self._keys[i]) | ||
47 | i += 1 | ||
48 | |||
49 | self._cachestr = prefix | ||
50 | self._cachepoints = (start, i) | ||
51 | |||
52 | return keys | ||
53 | |||
54 | def has_keys_with_prefix(self, prefix): | ||
55 | if prefix in self._data: | ||
56 | return True | ||
57 | |||
58 | if prefix.startswith(self._cachestr): | ||
59 | lo, hi = self._cachepoints | ||
60 | i = bisect_left(self._keys, prefix, lo, hi) | ||
61 | else: | ||
62 | i = bisect_left(self._keys, prefix) | ||
63 | |||
64 | if i == len(self._keys): | ||
65 | return False | ||
66 | |||
67 | return self._keys[i].startswith(prefix) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_utils.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_utils.py new file mode 100644 index 0000000..a559fa0 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/_utils.py | |||
@@ -0,0 +1,124 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from types import ModuleType | ||
4 | |||
5 | from pip._vendor.six import text_type | ||
6 | |||
7 | try: | ||
8 | import xml.etree.cElementTree as default_etree | ||
9 | except ImportError: | ||
10 | import xml.etree.ElementTree as default_etree | ||
11 | |||
12 | |||
13 | __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", | ||
14 | "surrogatePairToCodepoint", "moduleFactoryFactory", | ||
15 | "supports_lone_surrogates"] | ||
16 | |||
17 | |||
18 | # Platforms not supporting lone surrogates (\uD800-\uDFFF) should be | ||
19 | # caught by the below test. In general this would be any platform | ||
20 | # using UTF-16 as its encoding of unicode strings, such as | ||
21 | # Jython. This is because UTF-16 itself is based on the use of such | ||
22 | # surrogates, and there is no mechanism to further escape such | ||
23 | # escapes. | ||
24 | try: | ||
25 | _x = eval('"\\uD800"') # pylint:disable=eval-used | ||
26 | if not isinstance(_x, text_type): | ||
27 | # We need this with u"" because of http://bugs.jython.org/issue2039 | ||
28 | _x = eval('u"\\uD800"') # pylint:disable=eval-used | ||
29 | assert isinstance(_x, text_type) | ||
30 | except: # pylint:disable=bare-except | ||
31 | supports_lone_surrogates = False | ||
32 | else: | ||
33 | supports_lone_surrogates = True | ||
34 | |||
35 | |||
36 | class MethodDispatcher(dict): | ||
37 | """Dict with 2 special properties: | ||
38 | |||
39 | On initiation, keys that are lists, sets or tuples are converted to | ||
40 | multiple keys so accessing any one of the items in the original | ||
41 | list-like object returns the matching value | ||
42 | |||
43 | md = MethodDispatcher({("foo", "bar"):"baz"}) | ||
44 | md["foo"] == "baz" | ||
45 | |||
46 | A default value which can be set through the default attribute. | ||
47 | """ | ||
48 | |||
49 | def __init__(self, items=()): | ||
50 | # Using _dictEntries instead of directly assigning to self is about | ||
51 | # twice as fast. Please do careful performance testing before changing | ||
52 | # anything here. | ||
53 | _dictEntries = [] | ||
54 | for name, value in items: | ||
55 | if isinstance(name, (list, tuple, frozenset, set)): | ||
56 | for item in name: | ||
57 | _dictEntries.append((item, value)) | ||
58 | else: | ||
59 | _dictEntries.append((name, value)) | ||
60 | dict.__init__(self, _dictEntries) | ||
61 | assert len(self) == len(_dictEntries) | ||
62 | self.default = None | ||
63 | |||
64 | def __getitem__(self, key): | ||
65 | return dict.get(self, key, self.default) | ||
66 | |||
67 | |||
68 | # Some utility functions to deal with weirdness around UCS2 vs UCS4 | ||
69 | # python builds | ||
70 | |||
71 | def isSurrogatePair(data): | ||
72 | return (len(data) == 2 and | ||
73 | ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and | ||
74 | ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF) | ||
75 | |||
76 | |||
77 | def surrogatePairToCodepoint(data): | ||
78 | char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + | ||
79 | (ord(data[1]) - 0xDC00)) | ||
80 | return char_val | ||
81 | |||
82 | # Module Factory Factory (no, this isn't Java, I know) | ||
83 | # Here to stop this being duplicated all over the place. | ||
84 | |||
85 | |||
86 | def moduleFactoryFactory(factory): | ||
87 | moduleCache = {} | ||
88 | |||
89 | def moduleFactory(baseModule, *args, **kwargs): | ||
90 | if isinstance(ModuleType.__name__, type("")): | ||
91 | name = "_%s_factory" % baseModule.__name__ | ||
92 | else: | ||
93 | name = b"_%s_factory" % baseModule.__name__ | ||
94 | |||
95 | kwargs_tuple = tuple(kwargs.items()) | ||
96 | |||
97 | try: | ||
98 | return moduleCache[name][args][kwargs_tuple] | ||
99 | except KeyError: | ||
100 | mod = ModuleType(name) | ||
101 | objs = factory(baseModule, *args, **kwargs) | ||
102 | mod.__dict__.update(objs) | ||
103 | if "name" not in moduleCache: | ||
104 | moduleCache[name] = {} | ||
105 | if "args" not in moduleCache[name]: | ||
106 | moduleCache[name][args] = {} | ||
107 | if "kwargs" not in moduleCache[name][args]: | ||
108 | moduleCache[name][args][kwargs_tuple] = {} | ||
109 | moduleCache[name][args][kwargs_tuple] = mod | ||
110 | return mod | ||
111 | |||
112 | return moduleFactory | ||
113 | |||
114 | |||
115 | def memoize(func): | ||
116 | cache = {} | ||
117 | |||
118 | def wrapped(*args, **kwargs): | ||
119 | key = (tuple(args), tuple(kwargs.items())) | ||
120 | if key not in cache: | ||
121 | cache[key] = func(*args, **kwargs) | ||
122 | return cache[key] | ||
123 | |||
124 | return wrapped | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/constants.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/constants.py new file mode 100644 index 0000000..bca155e --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/constants.py | |||
@@ -0,0 +1,2947 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | import string | ||
4 | |||
5 | EOF = None | ||
6 | |||
7 | E = { | ||
8 | "null-character": | ||
9 | "Null character in input stream, replaced with U+FFFD.", | ||
10 | "invalid-codepoint": | ||
11 | "Invalid codepoint in stream.", | ||
12 | "incorrectly-placed-solidus": | ||
13 | "Solidus (/) incorrectly placed in tag.", | ||
14 | "incorrect-cr-newline-entity": | ||
15 | "Incorrect CR newline entity, replaced with LF.", | ||
16 | "illegal-windows-1252-entity": | ||
17 | "Entity used with illegal number (windows-1252 reference).", | ||
18 | "cant-convert-numeric-entity": | ||
19 | "Numeric entity couldn't be converted to character " | ||
20 | "(codepoint U+%(charAsInt)08x).", | ||
21 | "illegal-codepoint-for-numeric-entity": | ||
22 | "Numeric entity represents an illegal codepoint: " | ||
23 | "U+%(charAsInt)08x.", | ||
24 | "numeric-entity-without-semicolon": | ||
25 | "Numeric entity didn't end with ';'.", | ||
26 | "expected-numeric-entity-but-got-eof": | ||
27 | "Numeric entity expected. Got end of file instead.", | ||
28 | "expected-numeric-entity": | ||
29 | "Numeric entity expected but none found.", | ||
30 | "named-entity-without-semicolon": | ||
31 | "Named entity didn't end with ';'.", | ||
32 | "expected-named-entity": | ||
33 | "Named entity expected. Got none.", | ||
34 | "attributes-in-end-tag": | ||
35 | "End tag contains unexpected attributes.", | ||
36 | 'self-closing-flag-on-end-tag': | ||
37 | "End tag contains unexpected self-closing flag.", | ||
38 | "expected-tag-name-but-got-right-bracket": | ||
39 | "Expected tag name. Got '>' instead.", | ||
40 | "expected-tag-name-but-got-question-mark": | ||
41 | "Expected tag name. Got '?' instead. (HTML doesn't " | ||
42 | "support processing instructions.)", | ||
43 | "expected-tag-name": | ||
44 | "Expected tag name. Got something else instead", | ||
45 | "expected-closing-tag-but-got-right-bracket": | ||
46 | "Expected closing tag. Got '>' instead. Ignoring '</>'.", | ||
47 | "expected-closing-tag-but-got-eof": | ||
48 | "Expected closing tag. Unexpected end of file.", | ||
49 | "expected-closing-tag-but-got-char": | ||
50 | "Expected closing tag. Unexpected character '%(data)s' found.", | ||
51 | "eof-in-tag-name": | ||
52 | "Unexpected end of file in the tag name.", | ||
53 | "expected-attribute-name-but-got-eof": | ||
54 | "Unexpected end of file. Expected attribute name instead.", | ||
55 | "eof-in-attribute-name": | ||
56 | "Unexpected end of file in attribute name.", | ||
57 | "invalid-character-in-attribute-name": | ||
58 | "Invalid character in attribute name", | ||
59 | "duplicate-attribute": | ||
60 | "Dropped duplicate attribute on tag.", | ||
61 | "expected-end-of-tag-name-but-got-eof": | ||
62 | "Unexpected end of file. Expected = or end of tag.", | ||
63 | "expected-attribute-value-but-got-eof": | ||
64 | "Unexpected end of file. Expected attribute value.", | ||
65 | "expected-attribute-value-but-got-right-bracket": | ||
66 | "Expected attribute value. Got '>' instead.", | ||
67 | 'equals-in-unquoted-attribute-value': | ||
68 | "Unexpected = in unquoted attribute", | ||
69 | 'unexpected-character-in-unquoted-attribute-value': | ||
70 | "Unexpected character in unquoted attribute", | ||
71 | "invalid-character-after-attribute-name": | ||
72 | "Unexpected character after attribute name.", | ||
73 | "unexpected-character-after-attribute-value": | ||
74 | "Unexpected character after attribute value.", | ||
75 | "eof-in-attribute-value-double-quote": | ||
76 | "Unexpected end of file in attribute value (\").", | ||
77 | "eof-in-attribute-value-single-quote": | ||
78 | "Unexpected end of file in attribute value (').", | ||
79 | "eof-in-attribute-value-no-quotes": | ||
80 | "Unexpected end of file in attribute value.", | ||
81 | "unexpected-EOF-after-solidus-in-tag": | ||
82 | "Unexpected end of file in tag. Expected >", | ||
83 | "unexpected-character-after-solidus-in-tag": | ||
84 | "Unexpected character after / in tag. Expected >", | ||
85 | "expected-dashes-or-doctype": | ||
86 | "Expected '--' or 'DOCTYPE'. Not found.", | ||
87 | "unexpected-bang-after-double-dash-in-comment": | ||
88 | "Unexpected ! after -- in comment", | ||
89 | "unexpected-space-after-double-dash-in-comment": | ||
90 | "Unexpected space after -- in comment", | ||
91 | "incorrect-comment": | ||
92 | "Incorrect comment.", | ||
93 | "eof-in-comment": | ||
94 | "Unexpected end of file in comment.", | ||
95 | "eof-in-comment-end-dash": | ||
96 | "Unexpected end of file in comment (-)", | ||
97 | "unexpected-dash-after-double-dash-in-comment": | ||
98 | "Unexpected '-' after '--' found in comment.", | ||
99 | "eof-in-comment-double-dash": | ||
100 | "Unexpected end of file in comment (--).", | ||
101 | "eof-in-comment-end-space-state": | ||
102 | "Unexpected end of file in comment.", | ||
103 | "eof-in-comment-end-bang-state": | ||
104 | "Unexpected end of file in comment.", | ||
105 | "unexpected-char-in-comment": | ||
106 | "Unexpected character in comment found.", | ||
107 | "need-space-after-doctype": | ||
108 | "No space after literal string 'DOCTYPE'.", | ||
109 | "expected-doctype-name-but-got-right-bracket": | ||
110 | "Unexpected > character. Expected DOCTYPE name.", | ||
111 | "expected-doctype-name-but-got-eof": | ||
112 | "Unexpected end of file. Expected DOCTYPE name.", | ||
113 | "eof-in-doctype-name": | ||
114 | "Unexpected end of file in DOCTYPE name.", | ||
115 | "eof-in-doctype": | ||
116 | "Unexpected end of file in DOCTYPE.", | ||
117 | "expected-space-or-right-bracket-in-doctype": | ||
118 | "Expected space or '>'. Got '%(data)s'", | ||
119 | "unexpected-end-of-doctype": | ||
120 | "Unexpected end of DOCTYPE.", | ||
121 | "unexpected-char-in-doctype": | ||
122 | "Unexpected character in DOCTYPE.", | ||
123 | "eof-in-innerhtml": | ||
124 | "XXX innerHTML EOF", | ||
125 | "unexpected-doctype": | ||
126 | "Unexpected DOCTYPE. Ignored.", | ||
127 | "non-html-root": | ||
128 | "html needs to be the first start tag.", | ||
129 | "expected-doctype-but-got-eof": | ||
130 | "Unexpected End of file. Expected DOCTYPE.", | ||
131 | "unknown-doctype": | ||
132 | "Erroneous DOCTYPE.", | ||
133 | "expected-doctype-but-got-chars": | ||
134 | "Unexpected non-space characters. Expected DOCTYPE.", | ||
135 | "expected-doctype-but-got-start-tag": | ||
136 | "Unexpected start tag (%(name)s). Expected DOCTYPE.", | ||
137 | "expected-doctype-but-got-end-tag": | ||
138 | "Unexpected end tag (%(name)s). Expected DOCTYPE.", | ||
139 | "end-tag-after-implied-root": | ||
140 | "Unexpected end tag (%(name)s) after the (implied) root element.", | ||
141 | "expected-named-closing-tag-but-got-eof": | ||
142 | "Unexpected end of file. Expected end tag (%(name)s).", | ||
143 | "two-heads-are-not-better-than-one": | ||
144 | "Unexpected start tag head in existing head. Ignored.", | ||
145 | "unexpected-end-tag": | ||
146 | "Unexpected end tag (%(name)s). Ignored.", | ||
147 | "unexpected-start-tag-out-of-my-head": | ||
148 | "Unexpected start tag (%(name)s) that can be in head. Moved.", | ||
149 | "unexpected-start-tag": | ||
150 | "Unexpected start tag (%(name)s).", | ||
151 | "missing-end-tag": | ||
152 | "Missing end tag (%(name)s).", | ||
153 | "missing-end-tags": | ||
154 | "Missing end tags (%(name)s).", | ||
155 | "unexpected-start-tag-implies-end-tag": | ||
156 | "Unexpected start tag (%(startName)s) " | ||
157 | "implies end tag (%(endName)s).", | ||
158 | "unexpected-start-tag-treated-as": | ||
159 | "Unexpected start tag (%(originalName)s). Treated as %(newName)s.", | ||
160 | "deprecated-tag": | ||
161 | "Unexpected start tag %(name)s. Don't use it!", | ||
162 | "unexpected-start-tag-ignored": | ||
163 | "Unexpected start tag %(name)s. Ignored.", | ||
164 | "expected-one-end-tag-but-got-another": | ||
165 | "Unexpected end tag (%(gotName)s). " | ||
166 | "Missing end tag (%(expectedName)s).", | ||
167 | "end-tag-too-early": | ||
168 | "End tag (%(name)s) seen too early. Expected other end tag.", | ||
169 | "end-tag-too-early-named": | ||
170 | "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).", | ||
171 | "end-tag-too-early-ignored": | ||
172 | "End tag (%(name)s) seen too early. Ignored.", | ||
173 | "adoption-agency-1.1": | ||
174 | "End tag (%(name)s) violates step 1, " | ||
175 | "paragraph 1 of the adoption agency algorithm.", | ||
176 | "adoption-agency-1.2": | ||
177 | "End tag (%(name)s) violates step 1, " | ||
178 | "paragraph 2 of the adoption agency algorithm.", | ||
179 | "adoption-agency-1.3": | ||
180 | "End tag (%(name)s) violates step 1, " | ||
181 | "paragraph 3 of the adoption agency algorithm.", | ||
182 | "adoption-agency-4.4": | ||
183 | "End tag (%(name)s) violates step 4, " | ||
184 | "paragraph 4 of the adoption agency algorithm.", | ||
185 | "unexpected-end-tag-treated-as": | ||
186 | "Unexpected end tag (%(originalName)s). Treated as %(newName)s.", | ||
187 | "no-end-tag": | ||
188 | "This element (%(name)s) has no end tag.", | ||
189 | "unexpected-implied-end-tag-in-table": | ||
190 | "Unexpected implied end tag (%(name)s) in the table phase.", | ||
191 | "unexpected-implied-end-tag-in-table-body": | ||
192 | "Unexpected implied end tag (%(name)s) in the table body phase.", | ||
193 | "unexpected-char-implies-table-voodoo": | ||
194 | "Unexpected non-space characters in " | ||
195 | "table context caused voodoo mode.", | ||
196 | "unexpected-hidden-input-in-table": | ||
197 | "Unexpected input with type hidden in table context.", | ||
198 | "unexpected-form-in-table": | ||
199 | "Unexpected form in table context.", | ||
200 | "unexpected-start-tag-implies-table-voodoo": | ||
201 | "Unexpected start tag (%(name)s) in " | ||
202 | "table context caused voodoo mode.", | ||
203 | "unexpected-end-tag-implies-table-voodoo": | ||
204 | "Unexpected end tag (%(name)s) in " | ||
205 | "table context caused voodoo mode.", | ||
206 | "unexpected-cell-in-table-body": | ||
207 | "Unexpected table cell start tag (%(name)s) " | ||
208 | "in the table body phase.", | ||
209 | "unexpected-cell-end-tag": | ||
210 | "Got table cell end tag (%(name)s) " | ||
211 | "while required end tags are missing.", | ||
212 | "unexpected-end-tag-in-table-body": | ||
213 | "Unexpected end tag (%(name)s) in the table body phase. Ignored.", | ||
214 | "unexpected-implied-end-tag-in-table-row": | ||
215 | "Unexpected implied end tag (%(name)s) in the table row phase.", | ||
216 | "unexpected-end-tag-in-table-row": | ||
217 | "Unexpected end tag (%(name)s) in the table row phase. Ignored.", | ||
218 | "unexpected-select-in-select": | ||
219 | "Unexpected select start tag in the select phase " | ||
220 | "treated as select end tag.", | ||
221 | "unexpected-input-in-select": | ||
222 | "Unexpected input start tag in the select phase.", | ||
223 | "unexpected-start-tag-in-select": | ||
224 | "Unexpected start tag token (%(name)s in the select phase. " | ||
225 | "Ignored.", | ||
226 | "unexpected-end-tag-in-select": | ||
227 | "Unexpected end tag (%(name)s) in the select phase. Ignored.", | ||
228 | "unexpected-table-element-start-tag-in-select-in-table": | ||
229 | "Unexpected table element start tag (%(name)s) in the select in table phase.", | ||
230 | "unexpected-table-element-end-tag-in-select-in-table": | ||
231 | "Unexpected table element end tag (%(name)s) in the select in table phase.", | ||
232 | "unexpected-char-after-body": | ||
233 | "Unexpected non-space characters in the after body phase.", | ||
234 | "unexpected-start-tag-after-body": | ||
235 | "Unexpected start tag token (%(name)s)" | ||
236 | " in the after body phase.", | ||
237 | "unexpected-end-tag-after-body": | ||
238 | "Unexpected end tag token (%(name)s)" | ||
239 | " in the after body phase.", | ||
240 | "unexpected-char-in-frameset": | ||
241 | "Unexpected characters in the frameset phase. Characters ignored.", | ||
242 | "unexpected-start-tag-in-frameset": | ||
243 | "Unexpected start tag token (%(name)s)" | ||
244 | " in the frameset phase. Ignored.", | ||
245 | "unexpected-frameset-in-frameset-innerhtml": | ||
246 | "Unexpected end tag token (frameset) " | ||
247 | "in the frameset phase (innerHTML).", | ||
248 | "unexpected-end-tag-in-frameset": | ||
249 | "Unexpected end tag token (%(name)s)" | ||
250 | " in the frameset phase. Ignored.", | ||
251 | "unexpected-char-after-frameset": | ||
252 | "Unexpected non-space characters in the " | ||
253 | "after frameset phase. Ignored.", | ||
254 | "unexpected-start-tag-after-frameset": | ||
255 | "Unexpected start tag (%(name)s)" | ||
256 | " in the after frameset phase. Ignored.", | ||
257 | "unexpected-end-tag-after-frameset": | ||
258 | "Unexpected end tag (%(name)s)" | ||
259 | " in the after frameset phase. Ignored.", | ||
260 | "unexpected-end-tag-after-body-innerhtml": | ||
261 | "Unexpected end tag after body(innerHtml)", | ||
262 | "expected-eof-but-got-char": | ||
263 | "Unexpected non-space characters. Expected end of file.", | ||
264 | "expected-eof-but-got-start-tag": | ||
265 | "Unexpected start tag (%(name)s)" | ||
266 | ". Expected end of file.", | ||
267 | "expected-eof-but-got-end-tag": | ||
268 | "Unexpected end tag (%(name)s)" | ||
269 | ". Expected end of file.", | ||
270 | "eof-in-table": | ||
271 | "Unexpected end of file. Expected table content.", | ||
272 | "eof-in-select": | ||
273 | "Unexpected end of file. Expected select content.", | ||
274 | "eof-in-frameset": | ||
275 | "Unexpected end of file. Expected frameset content.", | ||
276 | "eof-in-script-in-script": | ||
277 | "Unexpected end of file. Expected script content.", | ||
278 | "eof-in-foreign-lands": | ||
279 | "Unexpected end of file. Expected foreign content", | ||
280 | "non-void-element-with-trailing-solidus": | ||
281 | "Trailing solidus not allowed on element %(name)s", | ||
282 | "unexpected-html-element-in-foreign-content": | ||
283 | "Element %(name)s not allowed in a non-html context", | ||
284 | "unexpected-end-tag-before-html": | ||
285 | "Unexpected end tag (%(name)s) before html.", | ||
286 | "unexpected-inhead-noscript-tag": | ||
287 | "Element %(name)s not allowed in a inhead-noscript context", | ||
288 | "eof-in-head-noscript": | ||
289 | "Unexpected end of file. Expected inhead-noscript content", | ||
290 | "char-in-head-noscript": | ||
291 | "Unexpected non-space character. Expected inhead-noscript content", | ||
292 | "XXX-undefined-error": | ||
293 | "Undefined error (this sucks and should be fixed)", | ||
294 | } | ||
295 | |||
296 | namespaces = { | ||
297 | "html": "http://www.w3.org/1999/xhtml", | ||
298 | "mathml": "http://www.w3.org/1998/Math/MathML", | ||
299 | "svg": "http://www.w3.org/2000/svg", | ||
300 | "xlink": "http://www.w3.org/1999/xlink", | ||
301 | "xml": "http://www.w3.org/XML/1998/namespace", | ||
302 | "xmlns": "http://www.w3.org/2000/xmlns/" | ||
303 | } | ||
304 | |||
305 | scopingElements = frozenset([ | ||
306 | (namespaces["html"], "applet"), | ||
307 | (namespaces["html"], "caption"), | ||
308 | (namespaces["html"], "html"), | ||
309 | (namespaces["html"], "marquee"), | ||
310 | (namespaces["html"], "object"), | ||
311 | (namespaces["html"], "table"), | ||
312 | (namespaces["html"], "td"), | ||
313 | (namespaces["html"], "th"), | ||
314 | (namespaces["mathml"], "mi"), | ||
315 | (namespaces["mathml"], "mo"), | ||
316 | (namespaces["mathml"], "mn"), | ||
317 | (namespaces["mathml"], "ms"), | ||
318 | (namespaces["mathml"], "mtext"), | ||
319 | (namespaces["mathml"], "annotation-xml"), | ||
320 | (namespaces["svg"], "foreignObject"), | ||
321 | (namespaces["svg"], "desc"), | ||
322 | (namespaces["svg"], "title"), | ||
323 | ]) | ||
324 | |||
325 | formattingElements = frozenset([ | ||
326 | (namespaces["html"], "a"), | ||
327 | (namespaces["html"], "b"), | ||
328 | (namespaces["html"], "big"), | ||
329 | (namespaces["html"], "code"), | ||
330 | (namespaces["html"], "em"), | ||
331 | (namespaces["html"], "font"), | ||
332 | (namespaces["html"], "i"), | ||
333 | (namespaces["html"], "nobr"), | ||
334 | (namespaces["html"], "s"), | ||
335 | (namespaces["html"], "small"), | ||
336 | (namespaces["html"], "strike"), | ||
337 | (namespaces["html"], "strong"), | ||
338 | (namespaces["html"], "tt"), | ||
339 | (namespaces["html"], "u") | ||
340 | ]) | ||
341 | |||
342 | specialElements = frozenset([ | ||
343 | (namespaces["html"], "address"), | ||
344 | (namespaces["html"], "applet"), | ||
345 | (namespaces["html"], "area"), | ||
346 | (namespaces["html"], "article"), | ||
347 | (namespaces["html"], "aside"), | ||
348 | (namespaces["html"], "base"), | ||
349 | (namespaces["html"], "basefont"), | ||
350 | (namespaces["html"], "bgsound"), | ||
351 | (namespaces["html"], "blockquote"), | ||
352 | (namespaces["html"], "body"), | ||
353 | (namespaces["html"], "br"), | ||
354 | (namespaces["html"], "button"), | ||
355 | (namespaces["html"], "caption"), | ||
356 | (namespaces["html"], "center"), | ||
357 | (namespaces["html"], "col"), | ||
358 | (namespaces["html"], "colgroup"), | ||
359 | (namespaces["html"], "command"), | ||
360 | (namespaces["html"], "dd"), | ||
361 | (namespaces["html"], "details"), | ||
362 | (namespaces["html"], "dir"), | ||
363 | (namespaces["html"], "div"), | ||
364 | (namespaces["html"], "dl"), | ||
365 | (namespaces["html"], "dt"), | ||
366 | (namespaces["html"], "embed"), | ||
367 | (namespaces["html"], "fieldset"), | ||
368 | (namespaces["html"], "figure"), | ||
369 | (namespaces["html"], "footer"), | ||
370 | (namespaces["html"], "form"), | ||
371 | (namespaces["html"], "frame"), | ||
372 | (namespaces["html"], "frameset"), | ||
373 | (namespaces["html"], "h1"), | ||
374 | (namespaces["html"], "h2"), | ||
375 | (namespaces["html"], "h3"), | ||
376 | (namespaces["html"], "h4"), | ||
377 | (namespaces["html"], "h5"), | ||
378 | (namespaces["html"], "h6"), | ||
379 | (namespaces["html"], "head"), | ||
380 | (namespaces["html"], "header"), | ||
381 | (namespaces["html"], "hr"), | ||
382 | (namespaces["html"], "html"), | ||
383 | (namespaces["html"], "iframe"), | ||
384 | # Note that image is commented out in the spec as "this isn't an | ||
385 | # element that can end up on the stack, so it doesn't matter," | ||
386 | (namespaces["html"], "image"), | ||
387 | (namespaces["html"], "img"), | ||
388 | (namespaces["html"], "input"), | ||
389 | (namespaces["html"], "isindex"), | ||
390 | (namespaces["html"], "li"), | ||
391 | (namespaces["html"], "link"), | ||
392 | (namespaces["html"], "listing"), | ||
393 | (namespaces["html"], "marquee"), | ||
394 | (namespaces["html"], "menu"), | ||
395 | (namespaces["html"], "meta"), | ||
396 | (namespaces["html"], "nav"), | ||
397 | (namespaces["html"], "noembed"), | ||
398 | (namespaces["html"], "noframes"), | ||
399 | (namespaces["html"], "noscript"), | ||
400 | (namespaces["html"], "object"), | ||
401 | (namespaces["html"], "ol"), | ||
402 | (namespaces["html"], "p"), | ||
403 | (namespaces["html"], "param"), | ||
404 | (namespaces["html"], "plaintext"), | ||
405 | (namespaces["html"], "pre"), | ||
406 | (namespaces["html"], "script"), | ||
407 | (namespaces["html"], "section"), | ||
408 | (namespaces["html"], "select"), | ||
409 | (namespaces["html"], "style"), | ||
410 | (namespaces["html"], "table"), | ||
411 | (namespaces["html"], "tbody"), | ||
412 | (namespaces["html"], "td"), | ||
413 | (namespaces["html"], "textarea"), | ||
414 | (namespaces["html"], "tfoot"), | ||
415 | (namespaces["html"], "th"), | ||
416 | (namespaces["html"], "thead"), | ||
417 | (namespaces["html"], "title"), | ||
418 | (namespaces["html"], "tr"), | ||
419 | (namespaces["html"], "ul"), | ||
420 | (namespaces["html"], "wbr"), | ||
421 | (namespaces["html"], "xmp"), | ||
422 | (namespaces["svg"], "foreignObject") | ||
423 | ]) | ||
424 | |||
425 | htmlIntegrationPointElements = frozenset([ | ||
426 | (namespaces["mathml"], "annotation-xml"), | ||
427 | (namespaces["svg"], "foreignObject"), | ||
428 | (namespaces["svg"], "desc"), | ||
429 | (namespaces["svg"], "title") | ||
430 | ]) | ||
431 | |||
432 | mathmlTextIntegrationPointElements = frozenset([ | ||
433 | (namespaces["mathml"], "mi"), | ||
434 | (namespaces["mathml"], "mo"), | ||
435 | (namespaces["mathml"], "mn"), | ||
436 | (namespaces["mathml"], "ms"), | ||
437 | (namespaces["mathml"], "mtext") | ||
438 | ]) | ||
439 | |||
440 | adjustSVGAttributes = { | ||
441 | "attributename": "attributeName", | ||
442 | "attributetype": "attributeType", | ||
443 | "basefrequency": "baseFrequency", | ||
444 | "baseprofile": "baseProfile", | ||
445 | "calcmode": "calcMode", | ||
446 | "clippathunits": "clipPathUnits", | ||
447 | "contentscripttype": "contentScriptType", | ||
448 | "contentstyletype": "contentStyleType", | ||
449 | "diffuseconstant": "diffuseConstant", | ||
450 | "edgemode": "edgeMode", | ||
451 | "externalresourcesrequired": "externalResourcesRequired", | ||
452 | "filterres": "filterRes", | ||
453 | "filterunits": "filterUnits", | ||
454 | "glyphref": "glyphRef", | ||
455 | "gradienttransform": "gradientTransform", | ||
456 | "gradientunits": "gradientUnits", | ||
457 | "kernelmatrix": "kernelMatrix", | ||
458 | "kernelunitlength": "kernelUnitLength", | ||
459 | "keypoints": "keyPoints", | ||
460 | "keysplines": "keySplines", | ||
461 | "keytimes": "keyTimes", | ||
462 | "lengthadjust": "lengthAdjust", | ||
463 | "limitingconeangle": "limitingConeAngle", | ||
464 | "markerheight": "markerHeight", | ||
465 | "markerunits": "markerUnits", | ||
466 | "markerwidth": "markerWidth", | ||
467 | "maskcontentunits": "maskContentUnits", | ||
468 | "maskunits": "maskUnits", | ||
469 | "numoctaves": "numOctaves", | ||
470 | "pathlength": "pathLength", | ||
471 | "patterncontentunits": "patternContentUnits", | ||
472 | "patterntransform": "patternTransform", | ||
473 | "patternunits": "patternUnits", | ||
474 | "pointsatx": "pointsAtX", | ||
475 | "pointsaty": "pointsAtY", | ||
476 | "pointsatz": "pointsAtZ", | ||
477 | "preservealpha": "preserveAlpha", | ||
478 | "preserveaspectratio": "preserveAspectRatio", | ||
479 | "primitiveunits": "primitiveUnits", | ||
480 | "refx": "refX", | ||
481 | "refy": "refY", | ||
482 | "repeatcount": "repeatCount", | ||
483 | "repeatdur": "repeatDur", | ||
484 | "requiredextensions": "requiredExtensions", | ||
485 | "requiredfeatures": "requiredFeatures", | ||
486 | "specularconstant": "specularConstant", | ||
487 | "specularexponent": "specularExponent", | ||
488 | "spreadmethod": "spreadMethod", | ||
489 | "startoffset": "startOffset", | ||
490 | "stddeviation": "stdDeviation", | ||
491 | "stitchtiles": "stitchTiles", | ||
492 | "surfacescale": "surfaceScale", | ||
493 | "systemlanguage": "systemLanguage", | ||
494 | "tablevalues": "tableValues", | ||
495 | "targetx": "targetX", | ||
496 | "targety": "targetY", | ||
497 | "textlength": "textLength", | ||
498 | "viewbox": "viewBox", | ||
499 | "viewtarget": "viewTarget", | ||
500 | "xchannelselector": "xChannelSelector", | ||
501 | "ychannelselector": "yChannelSelector", | ||
502 | "zoomandpan": "zoomAndPan" | ||
503 | } | ||
504 | |||
505 | adjustMathMLAttributes = {"definitionurl": "definitionURL"} | ||
506 | |||
507 | adjustForeignAttributes = { | ||
508 | "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), | ||
509 | "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]), | ||
510 | "xlink:href": ("xlink", "href", namespaces["xlink"]), | ||
511 | "xlink:role": ("xlink", "role", namespaces["xlink"]), | ||
512 | "xlink:show": ("xlink", "show", namespaces["xlink"]), | ||
513 | "xlink:title": ("xlink", "title", namespaces["xlink"]), | ||
514 | "xlink:type": ("xlink", "type", namespaces["xlink"]), | ||
515 | "xml:base": ("xml", "base", namespaces["xml"]), | ||
516 | "xml:lang": ("xml", "lang", namespaces["xml"]), | ||
517 | "xml:space": ("xml", "space", namespaces["xml"]), | ||
518 | "xmlns": (None, "xmlns", namespaces["xmlns"]), | ||
519 | "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"]) | ||
520 | } | ||
521 | |||
522 | unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in | ||
523 | adjustForeignAttributes.items()]) | ||
524 | |||
525 | spaceCharacters = frozenset([ | ||
526 | "\t", | ||
527 | "\n", | ||
528 | "\u000C", | ||
529 | " ", | ||
530 | "\r" | ||
531 | ]) | ||
532 | |||
533 | tableInsertModeElements = frozenset([ | ||
534 | "table", | ||
535 | "tbody", | ||
536 | "tfoot", | ||
537 | "thead", | ||
538 | "tr" | ||
539 | ]) | ||
540 | |||
541 | asciiLowercase = frozenset(string.ascii_lowercase) | ||
542 | asciiUppercase = frozenset(string.ascii_uppercase) | ||
543 | asciiLetters = frozenset(string.ascii_letters) | ||
544 | digits = frozenset(string.digits) | ||
545 | hexDigits = frozenset(string.hexdigits) | ||
546 | |||
547 | asciiUpper2Lower = dict([(ord(c), ord(c.lower())) | ||
548 | for c in string.ascii_uppercase]) | ||
549 | |||
550 | # Heading elements need to be ordered | ||
551 | headingElements = ( | ||
552 | "h1", | ||
553 | "h2", | ||
554 | "h3", | ||
555 | "h4", | ||
556 | "h5", | ||
557 | "h6" | ||
558 | ) | ||
559 | |||
560 | voidElements = frozenset([ | ||
561 | "base", | ||
562 | "command", | ||
563 | "event-source", | ||
564 | "link", | ||
565 | "meta", | ||
566 | "hr", | ||
567 | "br", | ||
568 | "img", | ||
569 | "embed", | ||
570 | "param", | ||
571 | "area", | ||
572 | "col", | ||
573 | "input", | ||
574 | "source", | ||
575 | "track" | ||
576 | ]) | ||
577 | |||
578 | cdataElements = frozenset(['title', 'textarea']) | ||
579 | |||
580 | rcdataElements = frozenset([ | ||
581 | 'style', | ||
582 | 'script', | ||
583 | 'xmp', | ||
584 | 'iframe', | ||
585 | 'noembed', | ||
586 | 'noframes', | ||
587 | 'noscript' | ||
588 | ]) | ||
589 | |||
590 | booleanAttributes = { | ||
591 | "": frozenset(["irrelevant", "itemscope"]), | ||
592 | "style": frozenset(["scoped"]), | ||
593 | "img": frozenset(["ismap"]), | ||
594 | "audio": frozenset(["autoplay", "controls"]), | ||
595 | "video": frozenset(["autoplay", "controls"]), | ||
596 | "script": frozenset(["defer", "async"]), | ||
597 | "details": frozenset(["open"]), | ||
598 | "datagrid": frozenset(["multiple", "disabled"]), | ||
599 | "command": frozenset(["hidden", "disabled", "checked", "default"]), | ||
600 | "hr": frozenset(["noshade"]), | ||
601 | "menu": frozenset(["autosubmit"]), | ||
602 | "fieldset": frozenset(["disabled", "readonly"]), | ||
603 | "option": frozenset(["disabled", "readonly", "selected"]), | ||
604 | "optgroup": frozenset(["disabled", "readonly"]), | ||
605 | "button": frozenset(["disabled", "autofocus"]), | ||
606 | "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]), | ||
607 | "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]), | ||
608 | "output": frozenset(["disabled", "readonly"]), | ||
609 | "iframe": frozenset(["seamless"]), | ||
610 | } | ||
611 | |||
612 | # entitiesWindows1252 has to be _ordered_ and needs to have an index. It | ||
613 | # therefore can't be a frozenset. | ||
614 | entitiesWindows1252 = ( | ||
615 | 8364, # 0x80 0x20AC EURO SIGN | ||
616 | 65533, # 0x81 UNDEFINED | ||
617 | 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK | ||
618 | 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK | ||
619 | 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK | ||
620 | 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS | ||
621 | 8224, # 0x86 0x2020 DAGGER | ||
622 | 8225, # 0x87 0x2021 DOUBLE DAGGER | ||
623 | 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT | ||
624 | 8240, # 0x89 0x2030 PER MILLE SIGN | ||
625 | 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON | ||
626 | 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK | ||
627 | 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE | ||
628 | 65533, # 0x8D UNDEFINED | ||
629 | 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON | ||
630 | 65533, # 0x8F UNDEFINED | ||
631 | 65533, # 0x90 UNDEFINED | ||
632 | 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK | ||
633 | 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK | ||
634 | 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK | ||
635 | 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK | ||
636 | 8226, # 0x95 0x2022 BULLET | ||
637 | 8211, # 0x96 0x2013 EN DASH | ||
638 | 8212, # 0x97 0x2014 EM DASH | ||
639 | 732, # 0x98 0x02DC SMALL TILDE | ||
640 | 8482, # 0x99 0x2122 TRADE MARK SIGN | ||
641 | 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON | ||
642 | 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK | ||
643 | 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE | ||
644 | 65533, # 0x9D UNDEFINED | ||
645 | 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON | ||
646 | 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS | ||
647 | ) | ||
648 | |||
649 | xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;']) | ||
650 | |||
651 | entities = { | ||
652 | "AElig": "\xc6", | ||
653 | "AElig;": "\xc6", | ||
654 | "AMP": "&", | ||
655 | "AMP;": "&", | ||
656 | "Aacute": "\xc1", | ||
657 | "Aacute;": "\xc1", | ||
658 | "Abreve;": "\u0102", | ||
659 | "Acirc": "\xc2", | ||
660 | "Acirc;": "\xc2", | ||
661 | "Acy;": "\u0410", | ||
662 | "Afr;": "\U0001d504", | ||
663 | "Agrave": "\xc0", | ||
664 | "Agrave;": "\xc0", | ||
665 | "Alpha;": "\u0391", | ||
666 | "Amacr;": "\u0100", | ||
667 | "And;": "\u2a53", | ||
668 | "Aogon;": "\u0104", | ||
669 | "Aopf;": "\U0001d538", | ||
670 | "ApplyFunction;": "\u2061", | ||
671 | "Aring": "\xc5", | ||
672 | "Aring;": "\xc5", | ||
673 | "Ascr;": "\U0001d49c", | ||
674 | "Assign;": "\u2254", | ||
675 | "Atilde": "\xc3", | ||
676 | "Atilde;": "\xc3", | ||
677 | "Auml": "\xc4", | ||
678 | "Auml;": "\xc4", | ||
679 | "Backslash;": "\u2216", | ||
680 | "Barv;": "\u2ae7", | ||
681 | "Barwed;": "\u2306", | ||
682 | "Bcy;": "\u0411", | ||
683 | "Because;": "\u2235", | ||
684 | "Bernoullis;": "\u212c", | ||
685 | "Beta;": "\u0392", | ||
686 | "Bfr;": "\U0001d505", | ||
687 | "Bopf;": "\U0001d539", | ||
688 | "Breve;": "\u02d8", | ||
689 | "Bscr;": "\u212c", | ||
690 | "Bumpeq;": "\u224e", | ||
691 | "CHcy;": "\u0427", | ||
692 | "COPY": "\xa9", | ||
693 | "COPY;": "\xa9", | ||
694 | "Cacute;": "\u0106", | ||
695 | "Cap;": "\u22d2", | ||
696 | "CapitalDifferentialD;": "\u2145", | ||
697 | "Cayleys;": "\u212d", | ||
698 | "Ccaron;": "\u010c", | ||
699 | "Ccedil": "\xc7", | ||
700 | "Ccedil;": "\xc7", | ||
701 | "Ccirc;": "\u0108", | ||
702 | "Cconint;": "\u2230", | ||
703 | "Cdot;": "\u010a", | ||
704 | "Cedilla;": "\xb8", | ||
705 | "CenterDot;": "\xb7", | ||
706 | "Cfr;": "\u212d", | ||
707 | "Chi;": "\u03a7", | ||
708 | "CircleDot;": "\u2299", | ||
709 | "CircleMinus;": "\u2296", | ||
710 | "CirclePlus;": "\u2295", | ||
711 | "CircleTimes;": "\u2297", | ||
712 | "ClockwiseContourIntegral;": "\u2232", | ||
713 | "CloseCurlyDoubleQuote;": "\u201d", | ||
714 | "CloseCurlyQuote;": "\u2019", | ||
715 | "Colon;": "\u2237", | ||
716 | "Colone;": "\u2a74", | ||
717 | "Congruent;": "\u2261", | ||
718 | "Conint;": "\u222f", | ||
719 | "ContourIntegral;": "\u222e", | ||
720 | "Copf;": "\u2102", | ||
721 | "Coproduct;": "\u2210", | ||
722 | "CounterClockwiseContourIntegral;": "\u2233", | ||
723 | "Cross;": "\u2a2f", | ||
724 | "Cscr;": "\U0001d49e", | ||
725 | "Cup;": "\u22d3", | ||
726 | "CupCap;": "\u224d", | ||
727 | "DD;": "\u2145", | ||
728 | "DDotrahd;": "\u2911", | ||
729 | "DJcy;": "\u0402", | ||
730 | "DScy;": "\u0405", | ||
731 | "DZcy;": "\u040f", | ||
732 | "Dagger;": "\u2021", | ||
733 | "Darr;": "\u21a1", | ||
734 | "Dashv;": "\u2ae4", | ||
735 | "Dcaron;": "\u010e", | ||
736 | "Dcy;": "\u0414", | ||
737 | "Del;": "\u2207", | ||
738 | "Delta;": "\u0394", | ||
739 | "Dfr;": "\U0001d507", | ||
740 | "DiacriticalAcute;": "\xb4", | ||
741 | "DiacriticalDot;": "\u02d9", | ||
742 | "DiacriticalDoubleAcute;": "\u02dd", | ||
743 | "DiacriticalGrave;": "`", | ||
744 | "DiacriticalTilde;": "\u02dc", | ||
745 | "Diamond;": "\u22c4", | ||
746 | "DifferentialD;": "\u2146", | ||
747 | "Dopf;": "\U0001d53b", | ||
748 | "Dot;": "\xa8", | ||
749 | "DotDot;": "\u20dc", | ||
750 | "DotEqual;": "\u2250", | ||
751 | "DoubleContourIntegral;": "\u222f", | ||
752 | "DoubleDot;": "\xa8", | ||
753 | "DoubleDownArrow;": "\u21d3", | ||
754 | "DoubleLeftArrow;": "\u21d0", | ||
755 | "DoubleLeftRightArrow;": "\u21d4", | ||
756 | "DoubleLeftTee;": "\u2ae4", | ||
757 | "DoubleLongLeftArrow;": "\u27f8", | ||
758 | "DoubleLongLeftRightArrow;": "\u27fa", | ||
759 | "DoubleLongRightArrow;": "\u27f9", | ||
760 | "DoubleRightArrow;": "\u21d2", | ||
761 | "DoubleRightTee;": "\u22a8", | ||
762 | "DoubleUpArrow;": "\u21d1", | ||
763 | "DoubleUpDownArrow;": "\u21d5", | ||
764 | "DoubleVerticalBar;": "\u2225", | ||
765 | "DownArrow;": "\u2193", | ||
766 | "DownArrowBar;": "\u2913", | ||
767 | "DownArrowUpArrow;": "\u21f5", | ||
768 | "DownBreve;": "\u0311", | ||
769 | "DownLeftRightVector;": "\u2950", | ||
770 | "DownLeftTeeVector;": "\u295e", | ||
771 | "DownLeftVector;": "\u21bd", | ||
772 | "DownLeftVectorBar;": "\u2956", | ||
773 | "DownRightTeeVector;": "\u295f", | ||
774 | "DownRightVector;": "\u21c1", | ||
775 | "DownRightVectorBar;": "\u2957", | ||
776 | "DownTee;": "\u22a4", | ||
777 | "DownTeeArrow;": "\u21a7", | ||
778 | "Downarrow;": "\u21d3", | ||
779 | "Dscr;": "\U0001d49f", | ||
780 | "Dstrok;": "\u0110", | ||
781 | "ENG;": "\u014a", | ||
782 | "ETH": "\xd0", | ||
783 | "ETH;": "\xd0", | ||
784 | "Eacute": "\xc9", | ||
785 | "Eacute;": "\xc9", | ||
786 | "Ecaron;": "\u011a", | ||
787 | "Ecirc": "\xca", | ||
788 | "Ecirc;": "\xca", | ||
789 | "Ecy;": "\u042d", | ||
790 | "Edot;": "\u0116", | ||
791 | "Efr;": "\U0001d508", | ||
792 | "Egrave": "\xc8", | ||
793 | "Egrave;": "\xc8", | ||
794 | "Element;": "\u2208", | ||
795 | "Emacr;": "\u0112", | ||
796 | "EmptySmallSquare;": "\u25fb", | ||
797 | "EmptyVerySmallSquare;": "\u25ab", | ||
798 | "Eogon;": "\u0118", | ||
799 | "Eopf;": "\U0001d53c", | ||
800 | "Epsilon;": "\u0395", | ||
801 | "Equal;": "\u2a75", | ||
802 | "EqualTilde;": "\u2242", | ||
803 | "Equilibrium;": "\u21cc", | ||
804 | "Escr;": "\u2130", | ||
805 | "Esim;": "\u2a73", | ||
806 | "Eta;": "\u0397", | ||
807 | "Euml": "\xcb", | ||
808 | "Euml;": "\xcb", | ||
809 | "Exists;": "\u2203", | ||
810 | "ExponentialE;": "\u2147", | ||
811 | "Fcy;": "\u0424", | ||
812 | "Ffr;": "\U0001d509", | ||
813 | "FilledSmallSquare;": "\u25fc", | ||
814 | "FilledVerySmallSquare;": "\u25aa", | ||
815 | "Fopf;": "\U0001d53d", | ||
816 | "ForAll;": "\u2200", | ||
817 | "Fouriertrf;": "\u2131", | ||
818 | "Fscr;": "\u2131", | ||
819 | "GJcy;": "\u0403", | ||
820 | "GT": ">", | ||
821 | "GT;": ">", | ||
822 | "Gamma;": "\u0393", | ||
823 | "Gammad;": "\u03dc", | ||
824 | "Gbreve;": "\u011e", | ||
825 | "Gcedil;": "\u0122", | ||
826 | "Gcirc;": "\u011c", | ||
827 | "Gcy;": "\u0413", | ||
828 | "Gdot;": "\u0120", | ||
829 | "Gfr;": "\U0001d50a", | ||
830 | "Gg;": "\u22d9", | ||
831 | "Gopf;": "\U0001d53e", | ||
832 | "GreaterEqual;": "\u2265", | ||
833 | "GreaterEqualLess;": "\u22db", | ||
834 | "GreaterFullEqual;": "\u2267", | ||
835 | "GreaterGreater;": "\u2aa2", | ||
836 | "GreaterLess;": "\u2277", | ||
837 | "GreaterSlantEqual;": "\u2a7e", | ||
838 | "GreaterTilde;": "\u2273", | ||
839 | "Gscr;": "\U0001d4a2", | ||
840 | "Gt;": "\u226b", | ||
841 | "HARDcy;": "\u042a", | ||
842 | "Hacek;": "\u02c7", | ||
843 | "Hat;": "^", | ||
844 | "Hcirc;": "\u0124", | ||
845 | "Hfr;": "\u210c", | ||
846 | "HilbertSpace;": "\u210b", | ||
847 | "Hopf;": "\u210d", | ||
848 | "HorizontalLine;": "\u2500", | ||
849 | "Hscr;": "\u210b", | ||
850 | "Hstrok;": "\u0126", | ||
851 | "HumpDownHump;": "\u224e", | ||
852 | "HumpEqual;": "\u224f", | ||
853 | "IEcy;": "\u0415", | ||
854 | "IJlig;": "\u0132", | ||
855 | "IOcy;": "\u0401", | ||
856 | "Iacute": "\xcd", | ||
857 | "Iacute;": "\xcd", | ||
858 | "Icirc": "\xce", | ||
859 | "Icirc;": "\xce", | ||
860 | "Icy;": "\u0418", | ||
861 | "Idot;": "\u0130", | ||
862 | "Ifr;": "\u2111", | ||
863 | "Igrave": "\xcc", | ||
864 | "Igrave;": "\xcc", | ||
865 | "Im;": "\u2111", | ||
866 | "Imacr;": "\u012a", | ||
867 | "ImaginaryI;": "\u2148", | ||
868 | "Implies;": "\u21d2", | ||
869 | "Int;": "\u222c", | ||
870 | "Integral;": "\u222b", | ||
871 | "Intersection;": "\u22c2", | ||
872 | "InvisibleComma;": "\u2063", | ||
873 | "InvisibleTimes;": "\u2062", | ||
874 | "Iogon;": "\u012e", | ||
875 | "Iopf;": "\U0001d540", | ||
876 | "Iota;": "\u0399", | ||
877 | "Iscr;": "\u2110", | ||
878 | "Itilde;": "\u0128", | ||
879 | "Iukcy;": "\u0406", | ||
880 | "Iuml": "\xcf", | ||
881 | "Iuml;": "\xcf", | ||
882 | "Jcirc;": "\u0134", | ||
883 | "Jcy;": "\u0419", | ||
884 | "Jfr;": "\U0001d50d", | ||
885 | "Jopf;": "\U0001d541", | ||
886 | "Jscr;": "\U0001d4a5", | ||
887 | "Jsercy;": "\u0408", | ||
888 | "Jukcy;": "\u0404", | ||
889 | "KHcy;": "\u0425", | ||
890 | "KJcy;": "\u040c", | ||
891 | "Kappa;": "\u039a", | ||
892 | "Kcedil;": "\u0136", | ||
893 | "Kcy;": "\u041a", | ||
894 | "Kfr;": "\U0001d50e", | ||
895 | "Kopf;": "\U0001d542", | ||
896 | "Kscr;": "\U0001d4a6", | ||
897 | "LJcy;": "\u0409", | ||
898 | "LT": "<", | ||
899 | "LT;": "<", | ||
900 | "Lacute;": "\u0139", | ||
901 | "Lambda;": "\u039b", | ||
902 | "Lang;": "\u27ea", | ||
903 | "Laplacetrf;": "\u2112", | ||
904 | "Larr;": "\u219e", | ||
905 | "Lcaron;": "\u013d", | ||
906 | "Lcedil;": "\u013b", | ||
907 | "Lcy;": "\u041b", | ||
908 | "LeftAngleBracket;": "\u27e8", | ||
909 | "LeftArrow;": "\u2190", | ||
910 | "LeftArrowBar;": "\u21e4", | ||
911 | "LeftArrowRightArrow;": "\u21c6", | ||
912 | "LeftCeiling;": "\u2308", | ||
913 | "LeftDoubleBracket;": "\u27e6", | ||
914 | "LeftDownTeeVector;": "\u2961", | ||
915 | "LeftDownVector;": "\u21c3", | ||
916 | "LeftDownVectorBar;": "\u2959", | ||
917 | "LeftFloor;": "\u230a", | ||
918 | "LeftRightArrow;": "\u2194", | ||
919 | "LeftRightVector;": "\u294e", | ||
920 | "LeftTee;": "\u22a3", | ||
921 | "LeftTeeArrow;": "\u21a4", | ||
922 | "LeftTeeVector;": "\u295a", | ||
923 | "LeftTriangle;": "\u22b2", | ||
924 | "LeftTriangleBar;": "\u29cf", | ||
925 | "LeftTriangleEqual;": "\u22b4", | ||
926 | "LeftUpDownVector;": "\u2951", | ||
927 | "LeftUpTeeVector;": "\u2960", | ||
928 | "LeftUpVector;": "\u21bf", | ||
929 | "LeftUpVectorBar;": "\u2958", | ||
930 | "LeftVector;": "\u21bc", | ||
931 | "LeftVectorBar;": "\u2952", | ||
932 | "Leftarrow;": "\u21d0", | ||
933 | "Leftrightarrow;": "\u21d4", | ||
934 | "LessEqualGreater;": "\u22da", | ||
935 | "LessFullEqual;": "\u2266", | ||
936 | "LessGreater;": "\u2276", | ||
937 | "LessLess;": "\u2aa1", | ||
938 | "LessSlantEqual;": "\u2a7d", | ||
939 | "LessTilde;": "\u2272", | ||
940 | "Lfr;": "\U0001d50f", | ||
941 | "Ll;": "\u22d8", | ||
942 | "Lleftarrow;": "\u21da", | ||
943 | "Lmidot;": "\u013f", | ||
944 | "LongLeftArrow;": "\u27f5", | ||
945 | "LongLeftRightArrow;": "\u27f7", | ||
946 | "LongRightArrow;": "\u27f6", | ||
947 | "Longleftarrow;": "\u27f8", | ||
948 | "Longleftrightarrow;": "\u27fa", | ||
949 | "Longrightarrow;": "\u27f9", | ||
950 | "Lopf;": "\U0001d543", | ||
951 | "LowerLeftArrow;": "\u2199", | ||
952 | "LowerRightArrow;": "\u2198", | ||
953 | "Lscr;": "\u2112", | ||
954 | "Lsh;": "\u21b0", | ||
955 | "Lstrok;": "\u0141", | ||
956 | "Lt;": "\u226a", | ||
957 | "Map;": "\u2905", | ||
958 | "Mcy;": "\u041c", | ||
959 | "MediumSpace;": "\u205f", | ||
960 | "Mellintrf;": "\u2133", | ||
961 | "Mfr;": "\U0001d510", | ||
962 | "MinusPlus;": "\u2213", | ||
963 | "Mopf;": "\U0001d544", | ||
964 | "Mscr;": "\u2133", | ||
965 | "Mu;": "\u039c", | ||
966 | "NJcy;": "\u040a", | ||
967 | "Nacute;": "\u0143", | ||
968 | "Ncaron;": "\u0147", | ||
969 | "Ncedil;": "\u0145", | ||
970 | "Ncy;": "\u041d", | ||
971 | "NegativeMediumSpace;": "\u200b", | ||
972 | "NegativeThickSpace;": "\u200b", | ||
973 | "NegativeThinSpace;": "\u200b", | ||
974 | "NegativeVeryThinSpace;": "\u200b", | ||
975 | "NestedGreaterGreater;": "\u226b", | ||
976 | "NestedLessLess;": "\u226a", | ||
977 | "NewLine;": "\n", | ||
978 | "Nfr;": "\U0001d511", | ||
979 | "NoBreak;": "\u2060", | ||
980 | "NonBreakingSpace;": "\xa0", | ||
981 | "Nopf;": "\u2115", | ||
982 | "Not;": "\u2aec", | ||
983 | "NotCongruent;": "\u2262", | ||
984 | "NotCupCap;": "\u226d", | ||
985 | "NotDoubleVerticalBar;": "\u2226", | ||
986 | "NotElement;": "\u2209", | ||
987 | "NotEqual;": "\u2260", | ||
988 | "NotEqualTilde;": "\u2242\u0338", | ||
989 | "NotExists;": "\u2204", | ||
990 | "NotGreater;": "\u226f", | ||
991 | "NotGreaterEqual;": "\u2271", | ||
992 | "NotGreaterFullEqual;": "\u2267\u0338", | ||
993 | "NotGreaterGreater;": "\u226b\u0338", | ||
994 | "NotGreaterLess;": "\u2279", | ||
995 | "NotGreaterSlantEqual;": "\u2a7e\u0338", | ||
996 | "NotGreaterTilde;": "\u2275", | ||
997 | "NotHumpDownHump;": "\u224e\u0338", | ||
998 | "NotHumpEqual;": "\u224f\u0338", | ||
999 | "NotLeftTriangle;": "\u22ea", | ||
1000 | "NotLeftTriangleBar;": "\u29cf\u0338", | ||
1001 | "NotLeftTriangleEqual;": "\u22ec", | ||
1002 | "NotLess;": "\u226e", | ||
1003 | "NotLessEqual;": "\u2270", | ||
1004 | "NotLessGreater;": "\u2278", | ||
1005 | "NotLessLess;": "\u226a\u0338", | ||
1006 | "NotLessSlantEqual;": "\u2a7d\u0338", | ||
1007 | "NotLessTilde;": "\u2274", | ||
1008 | "NotNestedGreaterGreater;": "\u2aa2\u0338", | ||
1009 | "NotNestedLessLess;": "\u2aa1\u0338", | ||
1010 | "NotPrecedes;": "\u2280", | ||
1011 | "NotPrecedesEqual;": "\u2aaf\u0338", | ||
1012 | "NotPrecedesSlantEqual;": "\u22e0", | ||
1013 | "NotReverseElement;": "\u220c", | ||
1014 | "NotRightTriangle;": "\u22eb", | ||
1015 | "NotRightTriangleBar;": "\u29d0\u0338", | ||
1016 | "NotRightTriangleEqual;": "\u22ed", | ||
1017 | "NotSquareSubset;": "\u228f\u0338", | ||
1018 | "NotSquareSubsetEqual;": "\u22e2", | ||
1019 | "NotSquareSuperset;": "\u2290\u0338", | ||
1020 | "NotSquareSupersetEqual;": "\u22e3", | ||
1021 | "NotSubset;": "\u2282\u20d2", | ||
1022 | "NotSubsetEqual;": "\u2288", | ||
1023 | "NotSucceeds;": "\u2281", | ||
1024 | "NotSucceedsEqual;": "\u2ab0\u0338", | ||
1025 | "NotSucceedsSlantEqual;": "\u22e1", | ||
1026 | "NotSucceedsTilde;": "\u227f\u0338", | ||
1027 | "NotSuperset;": "\u2283\u20d2", | ||
1028 | "NotSupersetEqual;": "\u2289", | ||
1029 | "NotTilde;": "\u2241", | ||
1030 | "NotTildeEqual;": "\u2244", | ||
1031 | "NotTildeFullEqual;": "\u2247", | ||
1032 | "NotTildeTilde;": "\u2249", | ||
1033 | "NotVerticalBar;": "\u2224", | ||
1034 | "Nscr;": "\U0001d4a9", | ||
1035 | "Ntilde": "\xd1", | ||
1036 | "Ntilde;": "\xd1", | ||
1037 | "Nu;": "\u039d", | ||
1038 | "OElig;": "\u0152", | ||
1039 | "Oacute": "\xd3", | ||
1040 | "Oacute;": "\xd3", | ||
1041 | "Ocirc": "\xd4", | ||
1042 | "Ocirc;": "\xd4", | ||
1043 | "Ocy;": "\u041e", | ||
1044 | "Odblac;": "\u0150", | ||
1045 | "Ofr;": "\U0001d512", | ||
1046 | "Ograve": "\xd2", | ||
1047 | "Ograve;": "\xd2", | ||
1048 | "Omacr;": "\u014c", | ||
1049 | "Omega;": "\u03a9", | ||
1050 | "Omicron;": "\u039f", | ||
1051 | "Oopf;": "\U0001d546", | ||
1052 | "OpenCurlyDoubleQuote;": "\u201c", | ||
1053 | "OpenCurlyQuote;": "\u2018", | ||
1054 | "Or;": "\u2a54", | ||
1055 | "Oscr;": "\U0001d4aa", | ||
1056 | "Oslash": "\xd8", | ||
1057 | "Oslash;": "\xd8", | ||
1058 | "Otilde": "\xd5", | ||
1059 | "Otilde;": "\xd5", | ||
1060 | "Otimes;": "\u2a37", | ||
1061 | "Ouml": "\xd6", | ||
1062 | "Ouml;": "\xd6", | ||
1063 | "OverBar;": "\u203e", | ||
1064 | "OverBrace;": "\u23de", | ||
1065 | "OverBracket;": "\u23b4", | ||
1066 | "OverParenthesis;": "\u23dc", | ||
1067 | "PartialD;": "\u2202", | ||
1068 | "Pcy;": "\u041f", | ||
1069 | "Pfr;": "\U0001d513", | ||
1070 | "Phi;": "\u03a6", | ||
1071 | "Pi;": "\u03a0", | ||
1072 | "PlusMinus;": "\xb1", | ||
1073 | "Poincareplane;": "\u210c", | ||
1074 | "Popf;": "\u2119", | ||
1075 | "Pr;": "\u2abb", | ||
1076 | "Precedes;": "\u227a", | ||
1077 | "PrecedesEqual;": "\u2aaf", | ||
1078 | "PrecedesSlantEqual;": "\u227c", | ||
1079 | "PrecedesTilde;": "\u227e", | ||
1080 | "Prime;": "\u2033", | ||
1081 | "Product;": "\u220f", | ||
1082 | "Proportion;": "\u2237", | ||
1083 | "Proportional;": "\u221d", | ||
1084 | "Pscr;": "\U0001d4ab", | ||
1085 | "Psi;": "\u03a8", | ||
1086 | "QUOT": "\"", | ||
1087 | "QUOT;": "\"", | ||
1088 | "Qfr;": "\U0001d514", | ||
1089 | "Qopf;": "\u211a", | ||
1090 | "Qscr;": "\U0001d4ac", | ||
1091 | "RBarr;": "\u2910", | ||
1092 | "REG": "\xae", | ||
1093 | "REG;": "\xae", | ||
1094 | "Racute;": "\u0154", | ||
1095 | "Rang;": "\u27eb", | ||
1096 | "Rarr;": "\u21a0", | ||
1097 | "Rarrtl;": "\u2916", | ||
1098 | "Rcaron;": "\u0158", | ||
1099 | "Rcedil;": "\u0156", | ||
1100 | "Rcy;": "\u0420", | ||
1101 | "Re;": "\u211c", | ||
1102 | "ReverseElement;": "\u220b", | ||
1103 | "ReverseEquilibrium;": "\u21cb", | ||
1104 | "ReverseUpEquilibrium;": "\u296f", | ||
1105 | "Rfr;": "\u211c", | ||
1106 | "Rho;": "\u03a1", | ||
1107 | "RightAngleBracket;": "\u27e9", | ||
1108 | "RightArrow;": "\u2192", | ||
1109 | "RightArrowBar;": "\u21e5", | ||
1110 | "RightArrowLeftArrow;": "\u21c4", | ||
1111 | "RightCeiling;": "\u2309", | ||
1112 | "RightDoubleBracket;": "\u27e7", | ||
1113 | "RightDownTeeVector;": "\u295d", | ||
1114 | "RightDownVector;": "\u21c2", | ||
1115 | "RightDownVectorBar;": "\u2955", | ||
1116 | "RightFloor;": "\u230b", | ||
1117 | "RightTee;": "\u22a2", | ||
1118 | "RightTeeArrow;": "\u21a6", | ||
1119 | "RightTeeVector;": "\u295b", | ||
1120 | "RightTriangle;": "\u22b3", | ||
1121 | "RightTriangleBar;": "\u29d0", | ||
1122 | "RightTriangleEqual;": "\u22b5", | ||
1123 | "RightUpDownVector;": "\u294f", | ||
1124 | "RightUpTeeVector;": "\u295c", | ||
1125 | "RightUpVector;": "\u21be", | ||
1126 | "RightUpVectorBar;": "\u2954", | ||
1127 | "RightVector;": "\u21c0", | ||
1128 | "RightVectorBar;": "\u2953", | ||
1129 | "Rightarrow;": "\u21d2", | ||
1130 | "Ropf;": "\u211d", | ||
1131 | "RoundImplies;": "\u2970", | ||
1132 | "Rrightarrow;": "\u21db", | ||
1133 | "Rscr;": "\u211b", | ||
1134 | "Rsh;": "\u21b1", | ||
1135 | "RuleDelayed;": "\u29f4", | ||
1136 | "SHCHcy;": "\u0429", | ||
1137 | "SHcy;": "\u0428", | ||
1138 | "SOFTcy;": "\u042c", | ||
1139 | "Sacute;": "\u015a", | ||
1140 | "Sc;": "\u2abc", | ||
1141 | "Scaron;": "\u0160", | ||
1142 | "Scedil;": "\u015e", | ||
1143 | "Scirc;": "\u015c", | ||
1144 | "Scy;": "\u0421", | ||
1145 | "Sfr;": "\U0001d516", | ||
1146 | "ShortDownArrow;": "\u2193", | ||
1147 | "ShortLeftArrow;": "\u2190", | ||
1148 | "ShortRightArrow;": "\u2192", | ||
1149 | "ShortUpArrow;": "\u2191", | ||
1150 | "Sigma;": "\u03a3", | ||
1151 | "SmallCircle;": "\u2218", | ||
1152 | "Sopf;": "\U0001d54a", | ||
1153 | "Sqrt;": "\u221a", | ||
1154 | "Square;": "\u25a1", | ||
1155 | "SquareIntersection;": "\u2293", | ||
1156 | "SquareSubset;": "\u228f", | ||
1157 | "SquareSubsetEqual;": "\u2291", | ||
1158 | "SquareSuperset;": "\u2290", | ||
1159 | "SquareSupersetEqual;": "\u2292", | ||
1160 | "SquareUnion;": "\u2294", | ||
1161 | "Sscr;": "\U0001d4ae", | ||
1162 | "Star;": "\u22c6", | ||
1163 | "Sub;": "\u22d0", | ||
1164 | "Subset;": "\u22d0", | ||
1165 | "SubsetEqual;": "\u2286", | ||
1166 | "Succeeds;": "\u227b", | ||
1167 | "SucceedsEqual;": "\u2ab0", | ||
1168 | "SucceedsSlantEqual;": "\u227d", | ||
1169 | "SucceedsTilde;": "\u227f", | ||
1170 | "SuchThat;": "\u220b", | ||
1171 | "Sum;": "\u2211", | ||
1172 | "Sup;": "\u22d1", | ||
1173 | "Superset;": "\u2283", | ||
1174 | "SupersetEqual;": "\u2287", | ||
1175 | "Supset;": "\u22d1", | ||
1176 | "THORN": "\xde", | ||
1177 | "THORN;": "\xde", | ||
1178 | "TRADE;": "\u2122", | ||
1179 | "TSHcy;": "\u040b", | ||
1180 | "TScy;": "\u0426", | ||
1181 | "Tab;": "\t", | ||
1182 | "Tau;": "\u03a4", | ||
1183 | "Tcaron;": "\u0164", | ||
1184 | "Tcedil;": "\u0162", | ||
1185 | "Tcy;": "\u0422", | ||
1186 | "Tfr;": "\U0001d517", | ||
1187 | "Therefore;": "\u2234", | ||
1188 | "Theta;": "\u0398", | ||
1189 | "ThickSpace;": "\u205f\u200a", | ||
1190 | "ThinSpace;": "\u2009", | ||
1191 | "Tilde;": "\u223c", | ||
1192 | "TildeEqual;": "\u2243", | ||
1193 | "TildeFullEqual;": "\u2245", | ||
1194 | "TildeTilde;": "\u2248", | ||
1195 | "Topf;": "\U0001d54b", | ||
1196 | "TripleDot;": "\u20db", | ||
1197 | "Tscr;": "\U0001d4af", | ||
1198 | "Tstrok;": "\u0166", | ||
1199 | "Uacute": "\xda", | ||
1200 | "Uacute;": "\xda", | ||
1201 | "Uarr;": "\u219f", | ||
1202 | "Uarrocir;": "\u2949", | ||
1203 | "Ubrcy;": "\u040e", | ||
1204 | "Ubreve;": "\u016c", | ||
1205 | "Ucirc": "\xdb", | ||
1206 | "Ucirc;": "\xdb", | ||
1207 | "Ucy;": "\u0423", | ||
1208 | "Udblac;": "\u0170", | ||
1209 | "Ufr;": "\U0001d518", | ||
1210 | "Ugrave": "\xd9", | ||
1211 | "Ugrave;": "\xd9", | ||
1212 | "Umacr;": "\u016a", | ||
1213 | "UnderBar;": "_", | ||
1214 | "UnderBrace;": "\u23df", | ||
1215 | "UnderBracket;": "\u23b5", | ||
1216 | "UnderParenthesis;": "\u23dd", | ||
1217 | "Union;": "\u22c3", | ||
1218 | "UnionPlus;": "\u228e", | ||
1219 | "Uogon;": "\u0172", | ||
1220 | "Uopf;": "\U0001d54c", | ||
1221 | "UpArrow;": "\u2191", | ||
1222 | "UpArrowBar;": "\u2912", | ||
1223 | "UpArrowDownArrow;": "\u21c5", | ||
1224 | "UpDownArrow;": "\u2195", | ||
1225 | "UpEquilibrium;": "\u296e", | ||
1226 | "UpTee;": "\u22a5", | ||
1227 | "UpTeeArrow;": "\u21a5", | ||
1228 | "Uparrow;": "\u21d1", | ||
1229 | "Updownarrow;": "\u21d5", | ||
1230 | "UpperLeftArrow;": "\u2196", | ||
1231 | "UpperRightArrow;": "\u2197", | ||
1232 | "Upsi;": "\u03d2", | ||
1233 | "Upsilon;": "\u03a5", | ||
1234 | "Uring;": "\u016e", | ||
1235 | "Uscr;": "\U0001d4b0", | ||
1236 | "Utilde;": "\u0168", | ||
1237 | "Uuml": "\xdc", | ||
1238 | "Uuml;": "\xdc", | ||
1239 | "VDash;": "\u22ab", | ||
1240 | "Vbar;": "\u2aeb", | ||
1241 | "Vcy;": "\u0412", | ||
1242 | "Vdash;": "\u22a9", | ||
1243 | "Vdashl;": "\u2ae6", | ||
1244 | "Vee;": "\u22c1", | ||
1245 | "Verbar;": "\u2016", | ||
1246 | "Vert;": "\u2016", | ||
1247 | "VerticalBar;": "\u2223", | ||
1248 | "VerticalLine;": "|", | ||
1249 | "VerticalSeparator;": "\u2758", | ||
1250 | "VerticalTilde;": "\u2240", | ||
1251 | "VeryThinSpace;": "\u200a", | ||
1252 | "Vfr;": "\U0001d519", | ||
1253 | "Vopf;": "\U0001d54d", | ||
1254 | "Vscr;": "\U0001d4b1", | ||
1255 | "Vvdash;": "\u22aa", | ||
1256 | "Wcirc;": "\u0174", | ||
1257 | "Wedge;": "\u22c0", | ||
1258 | "Wfr;": "\U0001d51a", | ||
1259 | "Wopf;": "\U0001d54e", | ||
1260 | "Wscr;": "\U0001d4b2", | ||
1261 | "Xfr;": "\U0001d51b", | ||
1262 | "Xi;": "\u039e", | ||
1263 | "Xopf;": "\U0001d54f", | ||
1264 | "Xscr;": "\U0001d4b3", | ||
1265 | "YAcy;": "\u042f", | ||
1266 | "YIcy;": "\u0407", | ||
1267 | "YUcy;": "\u042e", | ||
1268 | "Yacute": "\xdd", | ||
1269 | "Yacute;": "\xdd", | ||
1270 | "Ycirc;": "\u0176", | ||
1271 | "Ycy;": "\u042b", | ||
1272 | "Yfr;": "\U0001d51c", | ||
1273 | "Yopf;": "\U0001d550", | ||
1274 | "Yscr;": "\U0001d4b4", | ||
1275 | "Yuml;": "\u0178", | ||
1276 | "ZHcy;": "\u0416", | ||
1277 | "Zacute;": "\u0179", | ||
1278 | "Zcaron;": "\u017d", | ||
1279 | "Zcy;": "\u0417", | ||
1280 | "Zdot;": "\u017b", | ||
1281 | "ZeroWidthSpace;": "\u200b", | ||
1282 | "Zeta;": "\u0396", | ||
1283 | "Zfr;": "\u2128", | ||
1284 | "Zopf;": "\u2124", | ||
1285 | "Zscr;": "\U0001d4b5", | ||
1286 | "aacute": "\xe1", | ||
1287 | "aacute;": "\xe1", | ||
1288 | "abreve;": "\u0103", | ||
1289 | "ac;": "\u223e", | ||
1290 | "acE;": "\u223e\u0333", | ||
1291 | "acd;": "\u223f", | ||
1292 | "acirc": "\xe2", | ||
1293 | "acirc;": "\xe2", | ||
1294 | "acute": "\xb4", | ||
1295 | "acute;": "\xb4", | ||
1296 | "acy;": "\u0430", | ||
1297 | "aelig": "\xe6", | ||
1298 | "aelig;": "\xe6", | ||
1299 | "af;": "\u2061", | ||
1300 | "afr;": "\U0001d51e", | ||
1301 | "agrave": "\xe0", | ||
1302 | "agrave;": "\xe0", | ||
1303 | "alefsym;": "\u2135", | ||
1304 | "aleph;": "\u2135", | ||
1305 | "alpha;": "\u03b1", | ||
1306 | "amacr;": "\u0101", | ||
1307 | "amalg;": "\u2a3f", | ||
1308 | "amp": "&", | ||
1309 | "amp;": "&", | ||
1310 | "and;": "\u2227", | ||
1311 | "andand;": "\u2a55", | ||
1312 | "andd;": "\u2a5c", | ||
1313 | "andslope;": "\u2a58", | ||
1314 | "andv;": "\u2a5a", | ||
1315 | "ang;": "\u2220", | ||
1316 | "ange;": "\u29a4", | ||
1317 | "angle;": "\u2220", | ||
1318 | "angmsd;": "\u2221", | ||
1319 | "angmsdaa;": "\u29a8", | ||
1320 | "angmsdab;": "\u29a9", | ||
1321 | "angmsdac;": "\u29aa", | ||
1322 | "angmsdad;": "\u29ab", | ||
1323 | "angmsdae;": "\u29ac", | ||
1324 | "angmsdaf;": "\u29ad", | ||
1325 | "angmsdag;": "\u29ae", | ||
1326 | "angmsdah;": "\u29af", | ||
1327 | "angrt;": "\u221f", | ||
1328 | "angrtvb;": "\u22be", | ||
1329 | "angrtvbd;": "\u299d", | ||
1330 | "angsph;": "\u2222", | ||
1331 | "angst;": "\xc5", | ||
1332 | "angzarr;": "\u237c", | ||
1333 | "aogon;": "\u0105", | ||
1334 | "aopf;": "\U0001d552", | ||
1335 | "ap;": "\u2248", | ||
1336 | "apE;": "\u2a70", | ||
1337 | "apacir;": "\u2a6f", | ||
1338 | "ape;": "\u224a", | ||
1339 | "apid;": "\u224b", | ||
1340 | "apos;": "'", | ||
1341 | "approx;": "\u2248", | ||
1342 | "approxeq;": "\u224a", | ||
1343 | "aring": "\xe5", | ||
1344 | "aring;": "\xe5", | ||
1345 | "ascr;": "\U0001d4b6", | ||
1346 | "ast;": "*", | ||
1347 | "asymp;": "\u2248", | ||
1348 | "asympeq;": "\u224d", | ||
1349 | "atilde": "\xe3", | ||
1350 | "atilde;": "\xe3", | ||
1351 | "auml": "\xe4", | ||
1352 | "auml;": "\xe4", | ||
1353 | "awconint;": "\u2233", | ||
1354 | "awint;": "\u2a11", | ||
1355 | "bNot;": "\u2aed", | ||
1356 | "backcong;": "\u224c", | ||
1357 | "backepsilon;": "\u03f6", | ||
1358 | "backprime;": "\u2035", | ||
1359 | "backsim;": "\u223d", | ||
1360 | "backsimeq;": "\u22cd", | ||
1361 | "barvee;": "\u22bd", | ||
1362 | "barwed;": "\u2305", | ||
1363 | "barwedge;": "\u2305", | ||
1364 | "bbrk;": "\u23b5", | ||
1365 | "bbrktbrk;": "\u23b6", | ||
1366 | "bcong;": "\u224c", | ||
1367 | "bcy;": "\u0431", | ||
1368 | "bdquo;": "\u201e", | ||
1369 | "becaus;": "\u2235", | ||
1370 | "because;": "\u2235", | ||
1371 | "bemptyv;": "\u29b0", | ||
1372 | "bepsi;": "\u03f6", | ||
1373 | "bernou;": "\u212c", | ||
1374 | "beta;": "\u03b2", | ||
1375 | "beth;": "\u2136", | ||
1376 | "between;": "\u226c", | ||
1377 | "bfr;": "\U0001d51f", | ||
1378 | "bigcap;": "\u22c2", | ||
1379 | "bigcirc;": "\u25ef", | ||
1380 | "bigcup;": "\u22c3", | ||
1381 | "bigodot;": "\u2a00", | ||
1382 | "bigoplus;": "\u2a01", | ||
1383 | "bigotimes;": "\u2a02", | ||
1384 | "bigsqcup;": "\u2a06", | ||
1385 | "bigstar;": "\u2605", | ||
1386 | "bigtriangledown;": "\u25bd", | ||
1387 | "bigtriangleup;": "\u25b3", | ||
1388 | "biguplus;": "\u2a04", | ||
1389 | "bigvee;": "\u22c1", | ||
1390 | "bigwedge;": "\u22c0", | ||
1391 | "bkarow;": "\u290d", | ||
1392 | "blacklozenge;": "\u29eb", | ||
1393 | "blacksquare;": "\u25aa", | ||
1394 | "blacktriangle;": "\u25b4", | ||
1395 | "blacktriangledown;": "\u25be", | ||
1396 | "blacktriangleleft;": "\u25c2", | ||
1397 | "blacktriangleright;": "\u25b8", | ||
1398 | "blank;": "\u2423", | ||
1399 | "blk12;": "\u2592", | ||
1400 | "blk14;": "\u2591", | ||
1401 | "blk34;": "\u2593", | ||
1402 | "block;": "\u2588", | ||
1403 | "bne;": "=\u20e5", | ||
1404 | "bnequiv;": "\u2261\u20e5", | ||
1405 | "bnot;": "\u2310", | ||
1406 | "bopf;": "\U0001d553", | ||
1407 | "bot;": "\u22a5", | ||
1408 | "bottom;": "\u22a5", | ||
1409 | "bowtie;": "\u22c8", | ||
1410 | "boxDL;": "\u2557", | ||
1411 | "boxDR;": "\u2554", | ||
1412 | "boxDl;": "\u2556", | ||
1413 | "boxDr;": "\u2553", | ||
1414 | "boxH;": "\u2550", | ||
1415 | "boxHD;": "\u2566", | ||
1416 | "boxHU;": "\u2569", | ||
1417 | "boxHd;": "\u2564", | ||
1418 | "boxHu;": "\u2567", | ||
1419 | "boxUL;": "\u255d", | ||
1420 | "boxUR;": "\u255a", | ||
1421 | "boxUl;": "\u255c", | ||
1422 | "boxUr;": "\u2559", | ||
1423 | "boxV;": "\u2551", | ||
1424 | "boxVH;": "\u256c", | ||
1425 | "boxVL;": "\u2563", | ||
1426 | "boxVR;": "\u2560", | ||
1427 | "boxVh;": "\u256b", | ||
1428 | "boxVl;": "\u2562", | ||
1429 | "boxVr;": "\u255f", | ||
1430 | "boxbox;": "\u29c9", | ||
1431 | "boxdL;": "\u2555", | ||
1432 | "boxdR;": "\u2552", | ||
1433 | "boxdl;": "\u2510", | ||
1434 | "boxdr;": "\u250c", | ||
1435 | "boxh;": "\u2500", | ||
1436 | "boxhD;": "\u2565", | ||
1437 | "boxhU;": "\u2568", | ||
1438 | "boxhd;": "\u252c", | ||
1439 | "boxhu;": "\u2534", | ||
1440 | "boxminus;": "\u229f", | ||
1441 | "boxplus;": "\u229e", | ||
1442 | "boxtimes;": "\u22a0", | ||
1443 | "boxuL;": "\u255b", | ||
1444 | "boxuR;": "\u2558", | ||
1445 | "boxul;": "\u2518", | ||
1446 | "boxur;": "\u2514", | ||
1447 | "boxv;": "\u2502", | ||
1448 | "boxvH;": "\u256a", | ||
1449 | "boxvL;": "\u2561", | ||
1450 | "boxvR;": "\u255e", | ||
1451 | "boxvh;": "\u253c", | ||
1452 | "boxvl;": "\u2524", | ||
1453 | "boxvr;": "\u251c", | ||
1454 | "bprime;": "\u2035", | ||
1455 | "breve;": "\u02d8", | ||
1456 | "brvbar": "\xa6", | ||
1457 | "brvbar;": "\xa6", | ||
1458 | "bscr;": "\U0001d4b7", | ||
1459 | "bsemi;": "\u204f", | ||
1460 | "bsim;": "\u223d", | ||
1461 | "bsime;": "\u22cd", | ||
1462 | "bsol;": "\\", | ||
1463 | "bsolb;": "\u29c5", | ||
1464 | "bsolhsub;": "\u27c8", | ||
1465 | "bull;": "\u2022", | ||
1466 | "bullet;": "\u2022", | ||
1467 | "bump;": "\u224e", | ||
1468 | "bumpE;": "\u2aae", | ||
1469 | "bumpe;": "\u224f", | ||
1470 | "bumpeq;": "\u224f", | ||
1471 | "cacute;": "\u0107", | ||
1472 | "cap;": "\u2229", | ||
1473 | "capand;": "\u2a44", | ||
1474 | "capbrcup;": "\u2a49", | ||
1475 | "capcap;": "\u2a4b", | ||
1476 | "capcup;": "\u2a47", | ||
1477 | "capdot;": "\u2a40", | ||
1478 | "caps;": "\u2229\ufe00", | ||
1479 | "caret;": "\u2041", | ||
1480 | "caron;": "\u02c7", | ||
1481 | "ccaps;": "\u2a4d", | ||
1482 | "ccaron;": "\u010d", | ||
1483 | "ccedil": "\xe7", | ||
1484 | "ccedil;": "\xe7", | ||
1485 | "ccirc;": "\u0109", | ||
1486 | "ccups;": "\u2a4c", | ||
1487 | "ccupssm;": "\u2a50", | ||
1488 | "cdot;": "\u010b", | ||
1489 | "cedil": "\xb8", | ||
1490 | "cedil;": "\xb8", | ||
1491 | "cemptyv;": "\u29b2", | ||
1492 | "cent": "\xa2", | ||
1493 | "cent;": "\xa2", | ||
1494 | "centerdot;": "\xb7", | ||
1495 | "cfr;": "\U0001d520", | ||
1496 | "chcy;": "\u0447", | ||
1497 | "check;": "\u2713", | ||
1498 | "checkmark;": "\u2713", | ||
1499 | "chi;": "\u03c7", | ||
1500 | "cir;": "\u25cb", | ||
1501 | "cirE;": "\u29c3", | ||
1502 | "circ;": "\u02c6", | ||
1503 | "circeq;": "\u2257", | ||
1504 | "circlearrowleft;": "\u21ba", | ||
1505 | "circlearrowright;": "\u21bb", | ||
1506 | "circledR;": "\xae", | ||
1507 | "circledS;": "\u24c8", | ||
1508 | "circledast;": "\u229b", | ||
1509 | "circledcirc;": "\u229a", | ||
1510 | "circleddash;": "\u229d", | ||
1511 | "cire;": "\u2257", | ||
1512 | "cirfnint;": "\u2a10", | ||
1513 | "cirmid;": "\u2aef", | ||
1514 | "cirscir;": "\u29c2", | ||
1515 | "clubs;": "\u2663", | ||
1516 | "clubsuit;": "\u2663", | ||
1517 | "colon;": ":", | ||
1518 | "colone;": "\u2254", | ||
1519 | "coloneq;": "\u2254", | ||
1520 | "comma;": ",", | ||
1521 | "commat;": "@", | ||
1522 | "comp;": "\u2201", | ||
1523 | "compfn;": "\u2218", | ||
1524 | "complement;": "\u2201", | ||
1525 | "complexes;": "\u2102", | ||
1526 | "cong;": "\u2245", | ||
1527 | "congdot;": "\u2a6d", | ||
1528 | "conint;": "\u222e", | ||
1529 | "copf;": "\U0001d554", | ||
1530 | "coprod;": "\u2210", | ||
1531 | "copy": "\xa9", | ||
1532 | "copy;": "\xa9", | ||
1533 | "copysr;": "\u2117", | ||
1534 | "crarr;": "\u21b5", | ||
1535 | "cross;": "\u2717", | ||
1536 | "cscr;": "\U0001d4b8", | ||
1537 | "csub;": "\u2acf", | ||
1538 | "csube;": "\u2ad1", | ||
1539 | "csup;": "\u2ad0", | ||
1540 | "csupe;": "\u2ad2", | ||
1541 | "ctdot;": "\u22ef", | ||
1542 | "cudarrl;": "\u2938", | ||
1543 | "cudarrr;": "\u2935", | ||
1544 | "cuepr;": "\u22de", | ||
1545 | "cuesc;": "\u22df", | ||
1546 | "cularr;": "\u21b6", | ||
1547 | "cularrp;": "\u293d", | ||
1548 | "cup;": "\u222a", | ||
1549 | "cupbrcap;": "\u2a48", | ||
1550 | "cupcap;": "\u2a46", | ||
1551 | "cupcup;": "\u2a4a", | ||
1552 | "cupdot;": "\u228d", | ||
1553 | "cupor;": "\u2a45", | ||
1554 | "cups;": "\u222a\ufe00", | ||
1555 | "curarr;": "\u21b7", | ||
1556 | "curarrm;": "\u293c", | ||
1557 | "curlyeqprec;": "\u22de", | ||
1558 | "curlyeqsucc;": "\u22df", | ||
1559 | "curlyvee;": "\u22ce", | ||
1560 | "curlywedge;": "\u22cf", | ||
1561 | "curren": "\xa4", | ||
1562 | "curren;": "\xa4", | ||
1563 | "curvearrowleft;": "\u21b6", | ||
1564 | "curvearrowright;": "\u21b7", | ||
1565 | "cuvee;": "\u22ce", | ||
1566 | "cuwed;": "\u22cf", | ||
1567 | "cwconint;": "\u2232", | ||
1568 | "cwint;": "\u2231", | ||
1569 | "cylcty;": "\u232d", | ||
1570 | "dArr;": "\u21d3", | ||
1571 | "dHar;": "\u2965", | ||
1572 | "dagger;": "\u2020", | ||
1573 | "daleth;": "\u2138", | ||
1574 | "darr;": "\u2193", | ||
1575 | "dash;": "\u2010", | ||
1576 | "dashv;": "\u22a3", | ||
1577 | "dbkarow;": "\u290f", | ||
1578 | "dblac;": "\u02dd", | ||
1579 | "dcaron;": "\u010f", | ||
1580 | "dcy;": "\u0434", | ||
1581 | "dd;": "\u2146", | ||
1582 | "ddagger;": "\u2021", | ||
1583 | "ddarr;": "\u21ca", | ||
1584 | "ddotseq;": "\u2a77", | ||
1585 | "deg": "\xb0", | ||
1586 | "deg;": "\xb0", | ||
1587 | "delta;": "\u03b4", | ||
1588 | "demptyv;": "\u29b1", | ||
1589 | "dfisht;": "\u297f", | ||
1590 | "dfr;": "\U0001d521", | ||
1591 | "dharl;": "\u21c3", | ||
1592 | "dharr;": "\u21c2", | ||
1593 | "diam;": "\u22c4", | ||
1594 | "diamond;": "\u22c4", | ||
1595 | "diamondsuit;": "\u2666", | ||
1596 | "diams;": "\u2666", | ||
1597 | "die;": "\xa8", | ||
1598 | "digamma;": "\u03dd", | ||
1599 | "disin;": "\u22f2", | ||
1600 | "div;": "\xf7", | ||
1601 | "divide": "\xf7", | ||
1602 | "divide;": "\xf7", | ||
1603 | "divideontimes;": "\u22c7", | ||
1604 | "divonx;": "\u22c7", | ||
1605 | "djcy;": "\u0452", | ||
1606 | "dlcorn;": "\u231e", | ||
1607 | "dlcrop;": "\u230d", | ||
1608 | "dollar;": "$", | ||
1609 | "dopf;": "\U0001d555", | ||
1610 | "dot;": "\u02d9", | ||
1611 | "doteq;": "\u2250", | ||
1612 | "doteqdot;": "\u2251", | ||
1613 | "dotminus;": "\u2238", | ||
1614 | "dotplus;": "\u2214", | ||
1615 | "dotsquare;": "\u22a1", | ||
1616 | "doublebarwedge;": "\u2306", | ||
1617 | "downarrow;": "\u2193", | ||
1618 | "downdownarrows;": "\u21ca", | ||
1619 | "downharpoonleft;": "\u21c3", | ||
1620 | "downharpoonright;": "\u21c2", | ||
1621 | "drbkarow;": "\u2910", | ||
1622 | "drcorn;": "\u231f", | ||
1623 | "drcrop;": "\u230c", | ||
1624 | "dscr;": "\U0001d4b9", | ||
1625 | "dscy;": "\u0455", | ||
1626 | "dsol;": "\u29f6", | ||
1627 | "dstrok;": "\u0111", | ||
1628 | "dtdot;": "\u22f1", | ||
1629 | "dtri;": "\u25bf", | ||
1630 | "dtrif;": "\u25be", | ||
1631 | "duarr;": "\u21f5", | ||
1632 | "duhar;": "\u296f", | ||
1633 | "dwangle;": "\u29a6", | ||
1634 | "dzcy;": "\u045f", | ||
1635 | "dzigrarr;": "\u27ff", | ||
1636 | "eDDot;": "\u2a77", | ||
1637 | "eDot;": "\u2251", | ||
1638 | "eacute": "\xe9", | ||
1639 | "eacute;": "\xe9", | ||
1640 | "easter;": "\u2a6e", | ||
1641 | "ecaron;": "\u011b", | ||
1642 | "ecir;": "\u2256", | ||
1643 | "ecirc": "\xea", | ||
1644 | "ecirc;": "\xea", | ||
1645 | "ecolon;": "\u2255", | ||
1646 | "ecy;": "\u044d", | ||
1647 | "edot;": "\u0117", | ||
1648 | "ee;": "\u2147", | ||
1649 | "efDot;": "\u2252", | ||
1650 | "efr;": "\U0001d522", | ||
1651 | "eg;": "\u2a9a", | ||
1652 | "egrave": "\xe8", | ||
1653 | "egrave;": "\xe8", | ||
1654 | "egs;": "\u2a96", | ||
1655 | "egsdot;": "\u2a98", | ||
1656 | "el;": "\u2a99", | ||
1657 | "elinters;": "\u23e7", | ||
1658 | "ell;": "\u2113", | ||
1659 | "els;": "\u2a95", | ||
1660 | "elsdot;": "\u2a97", | ||
1661 | "emacr;": "\u0113", | ||
1662 | "empty;": "\u2205", | ||
1663 | "emptyset;": "\u2205", | ||
1664 | "emptyv;": "\u2205", | ||
1665 | "emsp13;": "\u2004", | ||
1666 | "emsp14;": "\u2005", | ||
1667 | "emsp;": "\u2003", | ||
1668 | "eng;": "\u014b", | ||
1669 | "ensp;": "\u2002", | ||
1670 | "eogon;": "\u0119", | ||
1671 | "eopf;": "\U0001d556", | ||
1672 | "epar;": "\u22d5", | ||
1673 | "eparsl;": "\u29e3", | ||
1674 | "eplus;": "\u2a71", | ||
1675 | "epsi;": "\u03b5", | ||
1676 | "epsilon;": "\u03b5", | ||
1677 | "epsiv;": "\u03f5", | ||
1678 | "eqcirc;": "\u2256", | ||
1679 | "eqcolon;": "\u2255", | ||
1680 | "eqsim;": "\u2242", | ||
1681 | "eqslantgtr;": "\u2a96", | ||
1682 | "eqslantless;": "\u2a95", | ||
1683 | "equals;": "=", | ||
1684 | "equest;": "\u225f", | ||
1685 | "equiv;": "\u2261", | ||
1686 | "equivDD;": "\u2a78", | ||
1687 | "eqvparsl;": "\u29e5", | ||
1688 | "erDot;": "\u2253", | ||
1689 | "erarr;": "\u2971", | ||
1690 | "escr;": "\u212f", | ||
1691 | "esdot;": "\u2250", | ||
1692 | "esim;": "\u2242", | ||
1693 | "eta;": "\u03b7", | ||
1694 | "eth": "\xf0", | ||
1695 | "eth;": "\xf0", | ||
1696 | "euml": "\xeb", | ||
1697 | "euml;": "\xeb", | ||
1698 | "euro;": "\u20ac", | ||
1699 | "excl;": "!", | ||
1700 | "exist;": "\u2203", | ||
1701 | "expectation;": "\u2130", | ||
1702 | "exponentiale;": "\u2147", | ||
1703 | "fallingdotseq;": "\u2252", | ||
1704 | "fcy;": "\u0444", | ||
1705 | "female;": "\u2640", | ||
1706 | "ffilig;": "\ufb03", | ||
1707 | "fflig;": "\ufb00", | ||
1708 | "ffllig;": "\ufb04", | ||
1709 | "ffr;": "\U0001d523", | ||
1710 | "filig;": "\ufb01", | ||
1711 | "fjlig;": "fj", | ||
1712 | "flat;": "\u266d", | ||
1713 | "fllig;": "\ufb02", | ||
1714 | "fltns;": "\u25b1", | ||
1715 | "fnof;": "\u0192", | ||
1716 | "fopf;": "\U0001d557", | ||
1717 | "forall;": "\u2200", | ||
1718 | "fork;": "\u22d4", | ||
1719 | "forkv;": "\u2ad9", | ||
1720 | "fpartint;": "\u2a0d", | ||
1721 | "frac12": "\xbd", | ||
1722 | "frac12;": "\xbd", | ||
1723 | "frac13;": "\u2153", | ||
1724 | "frac14": "\xbc", | ||
1725 | "frac14;": "\xbc", | ||
1726 | "frac15;": "\u2155", | ||
1727 | "frac16;": "\u2159", | ||
1728 | "frac18;": "\u215b", | ||
1729 | "frac23;": "\u2154", | ||
1730 | "frac25;": "\u2156", | ||
1731 | "frac34": "\xbe", | ||
1732 | "frac34;": "\xbe", | ||
1733 | "frac35;": "\u2157", | ||
1734 | "frac38;": "\u215c", | ||
1735 | "frac45;": "\u2158", | ||
1736 | "frac56;": "\u215a", | ||
1737 | "frac58;": "\u215d", | ||
1738 | "frac78;": "\u215e", | ||
1739 | "frasl;": "\u2044", | ||
1740 | "frown;": "\u2322", | ||
1741 | "fscr;": "\U0001d4bb", | ||
1742 | "gE;": "\u2267", | ||
1743 | "gEl;": "\u2a8c", | ||
1744 | "gacute;": "\u01f5", | ||
1745 | "gamma;": "\u03b3", | ||
1746 | "gammad;": "\u03dd", | ||
1747 | "gap;": "\u2a86", | ||
1748 | "gbreve;": "\u011f", | ||
1749 | "gcirc;": "\u011d", | ||
1750 | "gcy;": "\u0433", | ||
1751 | "gdot;": "\u0121", | ||
1752 | "ge;": "\u2265", | ||
1753 | "gel;": "\u22db", | ||
1754 | "geq;": "\u2265", | ||
1755 | "geqq;": "\u2267", | ||
1756 | "geqslant;": "\u2a7e", | ||
1757 | "ges;": "\u2a7e", | ||
1758 | "gescc;": "\u2aa9", | ||
1759 | "gesdot;": "\u2a80", | ||
1760 | "gesdoto;": "\u2a82", | ||
1761 | "gesdotol;": "\u2a84", | ||
1762 | "gesl;": "\u22db\ufe00", | ||
1763 | "gesles;": "\u2a94", | ||
1764 | "gfr;": "\U0001d524", | ||
1765 | "gg;": "\u226b", | ||
1766 | "ggg;": "\u22d9", | ||
1767 | "gimel;": "\u2137", | ||
1768 | "gjcy;": "\u0453", | ||
1769 | "gl;": "\u2277", | ||
1770 | "glE;": "\u2a92", | ||
1771 | "gla;": "\u2aa5", | ||
1772 | "glj;": "\u2aa4", | ||
1773 | "gnE;": "\u2269", | ||
1774 | "gnap;": "\u2a8a", | ||
1775 | "gnapprox;": "\u2a8a", | ||
1776 | "gne;": "\u2a88", | ||
1777 | "gneq;": "\u2a88", | ||
1778 | "gneqq;": "\u2269", | ||
1779 | "gnsim;": "\u22e7", | ||
1780 | "gopf;": "\U0001d558", | ||
1781 | "grave;": "`", | ||
1782 | "gscr;": "\u210a", | ||
1783 | "gsim;": "\u2273", | ||
1784 | "gsime;": "\u2a8e", | ||
1785 | "gsiml;": "\u2a90", | ||
1786 | "gt": ">", | ||
1787 | "gt;": ">", | ||
1788 | "gtcc;": "\u2aa7", | ||
1789 | "gtcir;": "\u2a7a", | ||
1790 | "gtdot;": "\u22d7", | ||
1791 | "gtlPar;": "\u2995", | ||
1792 | "gtquest;": "\u2a7c", | ||
1793 | "gtrapprox;": "\u2a86", | ||
1794 | "gtrarr;": "\u2978", | ||
1795 | "gtrdot;": "\u22d7", | ||
1796 | "gtreqless;": "\u22db", | ||
1797 | "gtreqqless;": "\u2a8c", | ||
1798 | "gtrless;": "\u2277", | ||
1799 | "gtrsim;": "\u2273", | ||
1800 | "gvertneqq;": "\u2269\ufe00", | ||
1801 | "gvnE;": "\u2269\ufe00", | ||
1802 | "hArr;": "\u21d4", | ||
1803 | "hairsp;": "\u200a", | ||
1804 | "half;": "\xbd", | ||
1805 | "hamilt;": "\u210b", | ||
1806 | "hardcy;": "\u044a", | ||
1807 | "harr;": "\u2194", | ||
1808 | "harrcir;": "\u2948", | ||
1809 | "harrw;": "\u21ad", | ||
1810 | "hbar;": "\u210f", | ||
1811 | "hcirc;": "\u0125", | ||
1812 | "hearts;": "\u2665", | ||
1813 | "heartsuit;": "\u2665", | ||
1814 | "hellip;": "\u2026", | ||
1815 | "hercon;": "\u22b9", | ||
1816 | "hfr;": "\U0001d525", | ||
1817 | "hksearow;": "\u2925", | ||
1818 | "hkswarow;": "\u2926", | ||
1819 | "hoarr;": "\u21ff", | ||
1820 | "homtht;": "\u223b", | ||
1821 | "hookleftarrow;": "\u21a9", | ||
1822 | "hookrightarrow;": "\u21aa", | ||
1823 | "hopf;": "\U0001d559", | ||
1824 | "horbar;": "\u2015", | ||
1825 | "hscr;": "\U0001d4bd", | ||
1826 | "hslash;": "\u210f", | ||
1827 | "hstrok;": "\u0127", | ||
1828 | "hybull;": "\u2043", | ||
1829 | "hyphen;": "\u2010", | ||
1830 | "iacute": "\xed", | ||
1831 | "iacute;": "\xed", | ||
1832 | "ic;": "\u2063", | ||
1833 | "icirc": "\xee", | ||
1834 | "icirc;": "\xee", | ||
1835 | "icy;": "\u0438", | ||
1836 | "iecy;": "\u0435", | ||
1837 | "iexcl": "\xa1", | ||
1838 | "iexcl;": "\xa1", | ||
1839 | "iff;": "\u21d4", | ||
1840 | "ifr;": "\U0001d526", | ||
1841 | "igrave": "\xec", | ||
1842 | "igrave;": "\xec", | ||
1843 | "ii;": "\u2148", | ||
1844 | "iiiint;": "\u2a0c", | ||
1845 | "iiint;": "\u222d", | ||
1846 | "iinfin;": "\u29dc", | ||
1847 | "iiota;": "\u2129", | ||
1848 | "ijlig;": "\u0133", | ||
1849 | "imacr;": "\u012b", | ||
1850 | "image;": "\u2111", | ||
1851 | "imagline;": "\u2110", | ||
1852 | "imagpart;": "\u2111", | ||
1853 | "imath;": "\u0131", | ||
1854 | "imof;": "\u22b7", | ||
1855 | "imped;": "\u01b5", | ||
1856 | "in;": "\u2208", | ||
1857 | "incare;": "\u2105", | ||
1858 | "infin;": "\u221e", | ||
1859 | "infintie;": "\u29dd", | ||
1860 | "inodot;": "\u0131", | ||
1861 | "int;": "\u222b", | ||
1862 | "intcal;": "\u22ba", | ||
1863 | "integers;": "\u2124", | ||
1864 | "intercal;": "\u22ba", | ||
1865 | "intlarhk;": "\u2a17", | ||
1866 | "intprod;": "\u2a3c", | ||
1867 | "iocy;": "\u0451", | ||
1868 | "iogon;": "\u012f", | ||
1869 | "iopf;": "\U0001d55a", | ||
1870 | "iota;": "\u03b9", | ||
1871 | "iprod;": "\u2a3c", | ||
1872 | "iquest": "\xbf", | ||
1873 | "iquest;": "\xbf", | ||
1874 | "iscr;": "\U0001d4be", | ||
1875 | "isin;": "\u2208", | ||
1876 | "isinE;": "\u22f9", | ||
1877 | "isindot;": "\u22f5", | ||
1878 | "isins;": "\u22f4", | ||
1879 | "isinsv;": "\u22f3", | ||
1880 | "isinv;": "\u2208", | ||
1881 | "it;": "\u2062", | ||
1882 | "itilde;": "\u0129", | ||
1883 | "iukcy;": "\u0456", | ||
1884 | "iuml": "\xef", | ||
1885 | "iuml;": "\xef", | ||
1886 | "jcirc;": "\u0135", | ||
1887 | "jcy;": "\u0439", | ||
1888 | "jfr;": "\U0001d527", | ||
1889 | "jmath;": "\u0237", | ||
1890 | "jopf;": "\U0001d55b", | ||
1891 | "jscr;": "\U0001d4bf", | ||
1892 | "jsercy;": "\u0458", | ||
1893 | "jukcy;": "\u0454", | ||
1894 | "kappa;": "\u03ba", | ||
1895 | "kappav;": "\u03f0", | ||
1896 | "kcedil;": "\u0137", | ||
1897 | "kcy;": "\u043a", | ||
1898 | "kfr;": "\U0001d528", | ||
1899 | "kgreen;": "\u0138", | ||
1900 | "khcy;": "\u0445", | ||
1901 | "kjcy;": "\u045c", | ||
1902 | "kopf;": "\U0001d55c", | ||
1903 | "kscr;": "\U0001d4c0", | ||
1904 | "lAarr;": "\u21da", | ||
1905 | "lArr;": "\u21d0", | ||
1906 | "lAtail;": "\u291b", | ||
1907 | "lBarr;": "\u290e", | ||
1908 | "lE;": "\u2266", | ||
1909 | "lEg;": "\u2a8b", | ||
1910 | "lHar;": "\u2962", | ||
1911 | "lacute;": "\u013a", | ||
1912 | "laemptyv;": "\u29b4", | ||
1913 | "lagran;": "\u2112", | ||
1914 | "lambda;": "\u03bb", | ||
1915 | "lang;": "\u27e8", | ||
1916 | "langd;": "\u2991", | ||
1917 | "langle;": "\u27e8", | ||
1918 | "lap;": "\u2a85", | ||
1919 | "laquo": "\xab", | ||
1920 | "laquo;": "\xab", | ||
1921 | "larr;": "\u2190", | ||
1922 | "larrb;": "\u21e4", | ||
1923 | "larrbfs;": "\u291f", | ||
1924 | "larrfs;": "\u291d", | ||
1925 | "larrhk;": "\u21a9", | ||
1926 | "larrlp;": "\u21ab", | ||
1927 | "larrpl;": "\u2939", | ||
1928 | "larrsim;": "\u2973", | ||
1929 | "larrtl;": "\u21a2", | ||
1930 | "lat;": "\u2aab", | ||
1931 | "latail;": "\u2919", | ||
1932 | "late;": "\u2aad", | ||
1933 | "lates;": "\u2aad\ufe00", | ||
1934 | "lbarr;": "\u290c", | ||
1935 | "lbbrk;": "\u2772", | ||
1936 | "lbrace;": "{", | ||
1937 | "lbrack;": "[", | ||
1938 | "lbrke;": "\u298b", | ||
1939 | "lbrksld;": "\u298f", | ||
1940 | "lbrkslu;": "\u298d", | ||
1941 | "lcaron;": "\u013e", | ||
1942 | "lcedil;": "\u013c", | ||
1943 | "lceil;": "\u2308", | ||
1944 | "lcub;": "{", | ||
1945 | "lcy;": "\u043b", | ||
1946 | "ldca;": "\u2936", | ||
1947 | "ldquo;": "\u201c", | ||
1948 | "ldquor;": "\u201e", | ||
1949 | "ldrdhar;": "\u2967", | ||
1950 | "ldrushar;": "\u294b", | ||
1951 | "ldsh;": "\u21b2", | ||
1952 | "le;": "\u2264", | ||
1953 | "leftarrow;": "\u2190", | ||
1954 | "leftarrowtail;": "\u21a2", | ||
1955 | "leftharpoondown;": "\u21bd", | ||
1956 | "leftharpoonup;": "\u21bc", | ||
1957 | "leftleftarrows;": "\u21c7", | ||
1958 | "leftrightarrow;": "\u2194", | ||
1959 | "leftrightarrows;": "\u21c6", | ||
1960 | "leftrightharpoons;": "\u21cb", | ||
1961 | "leftrightsquigarrow;": "\u21ad", | ||
1962 | "leftthreetimes;": "\u22cb", | ||
1963 | "leg;": "\u22da", | ||
1964 | "leq;": "\u2264", | ||
1965 | "leqq;": "\u2266", | ||
1966 | "leqslant;": "\u2a7d", | ||
1967 | "les;": "\u2a7d", | ||
1968 | "lescc;": "\u2aa8", | ||
1969 | "lesdot;": "\u2a7f", | ||
1970 | "lesdoto;": "\u2a81", | ||
1971 | "lesdotor;": "\u2a83", | ||
1972 | "lesg;": "\u22da\ufe00", | ||
1973 | "lesges;": "\u2a93", | ||
1974 | "lessapprox;": "\u2a85", | ||
1975 | "lessdot;": "\u22d6", | ||
1976 | "lesseqgtr;": "\u22da", | ||
1977 | "lesseqqgtr;": "\u2a8b", | ||
1978 | "lessgtr;": "\u2276", | ||
1979 | "lesssim;": "\u2272", | ||
1980 | "lfisht;": "\u297c", | ||
1981 | "lfloor;": "\u230a", | ||
1982 | "lfr;": "\U0001d529", | ||
1983 | "lg;": "\u2276", | ||
1984 | "lgE;": "\u2a91", | ||
1985 | "lhard;": "\u21bd", | ||
1986 | "lharu;": "\u21bc", | ||
1987 | "lharul;": "\u296a", | ||
1988 | "lhblk;": "\u2584", | ||
1989 | "ljcy;": "\u0459", | ||
1990 | "ll;": "\u226a", | ||
1991 | "llarr;": "\u21c7", | ||
1992 | "llcorner;": "\u231e", | ||
1993 | "llhard;": "\u296b", | ||
1994 | "lltri;": "\u25fa", | ||
1995 | "lmidot;": "\u0140", | ||
1996 | "lmoust;": "\u23b0", | ||
1997 | "lmoustache;": "\u23b0", | ||
1998 | "lnE;": "\u2268", | ||
1999 | "lnap;": "\u2a89", | ||
2000 | "lnapprox;": "\u2a89", | ||
2001 | "lne;": "\u2a87", | ||
2002 | "lneq;": "\u2a87", | ||
2003 | "lneqq;": "\u2268", | ||
2004 | "lnsim;": "\u22e6", | ||
2005 | "loang;": "\u27ec", | ||
2006 | "loarr;": "\u21fd", | ||
2007 | "lobrk;": "\u27e6", | ||
2008 | "longleftarrow;": "\u27f5", | ||
2009 | "longleftrightarrow;": "\u27f7", | ||
2010 | "longmapsto;": "\u27fc", | ||
2011 | "longrightarrow;": "\u27f6", | ||
2012 | "looparrowleft;": "\u21ab", | ||
2013 | "looparrowright;": "\u21ac", | ||
2014 | "lopar;": "\u2985", | ||
2015 | "lopf;": "\U0001d55d", | ||
2016 | "loplus;": "\u2a2d", | ||
2017 | "lotimes;": "\u2a34", | ||
2018 | "lowast;": "\u2217", | ||
2019 | "lowbar;": "_", | ||
2020 | "loz;": "\u25ca", | ||
2021 | "lozenge;": "\u25ca", | ||
2022 | "lozf;": "\u29eb", | ||
2023 | "lpar;": "(", | ||
2024 | "lparlt;": "\u2993", | ||
2025 | "lrarr;": "\u21c6", | ||
2026 | "lrcorner;": "\u231f", | ||
2027 | "lrhar;": "\u21cb", | ||
2028 | "lrhard;": "\u296d", | ||
2029 | "lrm;": "\u200e", | ||
2030 | "lrtri;": "\u22bf", | ||
2031 | "lsaquo;": "\u2039", | ||
2032 | "lscr;": "\U0001d4c1", | ||
2033 | "lsh;": "\u21b0", | ||
2034 | "lsim;": "\u2272", | ||
2035 | "lsime;": "\u2a8d", | ||
2036 | "lsimg;": "\u2a8f", | ||
2037 | "lsqb;": "[", | ||
2038 | "lsquo;": "\u2018", | ||
2039 | "lsquor;": "\u201a", | ||
2040 | "lstrok;": "\u0142", | ||
2041 | "lt": "<", | ||
2042 | "lt;": "<", | ||
2043 | "ltcc;": "\u2aa6", | ||
2044 | "ltcir;": "\u2a79", | ||
2045 | "ltdot;": "\u22d6", | ||
2046 | "lthree;": "\u22cb", | ||
2047 | "ltimes;": "\u22c9", | ||
2048 | "ltlarr;": "\u2976", | ||
2049 | "ltquest;": "\u2a7b", | ||
2050 | "ltrPar;": "\u2996", | ||
2051 | "ltri;": "\u25c3", | ||
2052 | "ltrie;": "\u22b4", | ||
2053 | "ltrif;": "\u25c2", | ||
2054 | "lurdshar;": "\u294a", | ||
2055 | "luruhar;": "\u2966", | ||
2056 | "lvertneqq;": "\u2268\ufe00", | ||
2057 | "lvnE;": "\u2268\ufe00", | ||
2058 | "mDDot;": "\u223a", | ||
2059 | "macr": "\xaf", | ||
2060 | "macr;": "\xaf", | ||
2061 | "male;": "\u2642", | ||
2062 | "malt;": "\u2720", | ||
2063 | "maltese;": "\u2720", | ||
2064 | "map;": "\u21a6", | ||
2065 | "mapsto;": "\u21a6", | ||
2066 | "mapstodown;": "\u21a7", | ||
2067 | "mapstoleft;": "\u21a4", | ||
2068 | "mapstoup;": "\u21a5", | ||
2069 | "marker;": "\u25ae", | ||
2070 | "mcomma;": "\u2a29", | ||
2071 | "mcy;": "\u043c", | ||
2072 | "mdash;": "\u2014", | ||
2073 | "measuredangle;": "\u2221", | ||
2074 | "mfr;": "\U0001d52a", | ||
2075 | "mho;": "\u2127", | ||
2076 | "micro": "\xb5", | ||
2077 | "micro;": "\xb5", | ||
2078 | "mid;": "\u2223", | ||
2079 | "midast;": "*", | ||
2080 | "midcir;": "\u2af0", | ||
2081 | "middot": "\xb7", | ||
2082 | "middot;": "\xb7", | ||
2083 | "minus;": "\u2212", | ||
2084 | "minusb;": "\u229f", | ||
2085 | "minusd;": "\u2238", | ||
2086 | "minusdu;": "\u2a2a", | ||
2087 | "mlcp;": "\u2adb", | ||
2088 | "mldr;": "\u2026", | ||
2089 | "mnplus;": "\u2213", | ||
2090 | "models;": "\u22a7", | ||
2091 | "mopf;": "\U0001d55e", | ||
2092 | "mp;": "\u2213", | ||
2093 | "mscr;": "\U0001d4c2", | ||
2094 | "mstpos;": "\u223e", | ||
2095 | "mu;": "\u03bc", | ||
2096 | "multimap;": "\u22b8", | ||
2097 | "mumap;": "\u22b8", | ||
2098 | "nGg;": "\u22d9\u0338", | ||
2099 | "nGt;": "\u226b\u20d2", | ||
2100 | "nGtv;": "\u226b\u0338", | ||
2101 | "nLeftarrow;": "\u21cd", | ||
2102 | "nLeftrightarrow;": "\u21ce", | ||
2103 | "nLl;": "\u22d8\u0338", | ||
2104 | "nLt;": "\u226a\u20d2", | ||
2105 | "nLtv;": "\u226a\u0338", | ||
2106 | "nRightarrow;": "\u21cf", | ||
2107 | "nVDash;": "\u22af", | ||
2108 | "nVdash;": "\u22ae", | ||
2109 | "nabla;": "\u2207", | ||
2110 | "nacute;": "\u0144", | ||
2111 | "nang;": "\u2220\u20d2", | ||
2112 | "nap;": "\u2249", | ||
2113 | "napE;": "\u2a70\u0338", | ||
2114 | "napid;": "\u224b\u0338", | ||
2115 | "napos;": "\u0149", | ||
2116 | "napprox;": "\u2249", | ||
2117 | "natur;": "\u266e", | ||
2118 | "natural;": "\u266e", | ||
2119 | "naturals;": "\u2115", | ||
2120 | "nbsp": "\xa0", | ||
2121 | "nbsp;": "\xa0", | ||
2122 | "nbump;": "\u224e\u0338", | ||
2123 | "nbumpe;": "\u224f\u0338", | ||
2124 | "ncap;": "\u2a43", | ||
2125 | "ncaron;": "\u0148", | ||
2126 | "ncedil;": "\u0146", | ||
2127 | "ncong;": "\u2247", | ||
2128 | "ncongdot;": "\u2a6d\u0338", | ||
2129 | "ncup;": "\u2a42", | ||
2130 | "ncy;": "\u043d", | ||
2131 | "ndash;": "\u2013", | ||
2132 | "ne;": "\u2260", | ||
2133 | "neArr;": "\u21d7", | ||
2134 | "nearhk;": "\u2924", | ||
2135 | "nearr;": "\u2197", | ||
2136 | "nearrow;": "\u2197", | ||
2137 | "nedot;": "\u2250\u0338", | ||
2138 | "nequiv;": "\u2262", | ||
2139 | "nesear;": "\u2928", | ||
2140 | "nesim;": "\u2242\u0338", | ||
2141 | "nexist;": "\u2204", | ||
2142 | "nexists;": "\u2204", | ||
2143 | "nfr;": "\U0001d52b", | ||
2144 | "ngE;": "\u2267\u0338", | ||
2145 | "nge;": "\u2271", | ||
2146 | "ngeq;": "\u2271", | ||
2147 | "ngeqq;": "\u2267\u0338", | ||
2148 | "ngeqslant;": "\u2a7e\u0338", | ||
2149 | "nges;": "\u2a7e\u0338", | ||
2150 | "ngsim;": "\u2275", | ||
2151 | "ngt;": "\u226f", | ||
2152 | "ngtr;": "\u226f", | ||
2153 | "nhArr;": "\u21ce", | ||
2154 | "nharr;": "\u21ae", | ||
2155 | "nhpar;": "\u2af2", | ||
2156 | "ni;": "\u220b", | ||
2157 | "nis;": "\u22fc", | ||
2158 | "nisd;": "\u22fa", | ||
2159 | "niv;": "\u220b", | ||
2160 | "njcy;": "\u045a", | ||
2161 | "nlArr;": "\u21cd", | ||
2162 | "nlE;": "\u2266\u0338", | ||
2163 | "nlarr;": "\u219a", | ||
2164 | "nldr;": "\u2025", | ||
2165 | "nle;": "\u2270", | ||
2166 | "nleftarrow;": "\u219a", | ||
2167 | "nleftrightarrow;": "\u21ae", | ||
2168 | "nleq;": "\u2270", | ||
2169 | "nleqq;": "\u2266\u0338", | ||
2170 | "nleqslant;": "\u2a7d\u0338", | ||
2171 | "nles;": "\u2a7d\u0338", | ||
2172 | "nless;": "\u226e", | ||
2173 | "nlsim;": "\u2274", | ||
2174 | "nlt;": "\u226e", | ||
2175 | "nltri;": "\u22ea", | ||
2176 | "nltrie;": "\u22ec", | ||
2177 | "nmid;": "\u2224", | ||
2178 | "nopf;": "\U0001d55f", | ||
2179 | "not": "\xac", | ||
2180 | "not;": "\xac", | ||
2181 | "notin;": "\u2209", | ||
2182 | "notinE;": "\u22f9\u0338", | ||
2183 | "notindot;": "\u22f5\u0338", | ||
2184 | "notinva;": "\u2209", | ||
2185 | "notinvb;": "\u22f7", | ||
2186 | "notinvc;": "\u22f6", | ||
2187 | "notni;": "\u220c", | ||
2188 | "notniva;": "\u220c", | ||
2189 | "notnivb;": "\u22fe", | ||
2190 | "notnivc;": "\u22fd", | ||
2191 | "npar;": "\u2226", | ||
2192 | "nparallel;": "\u2226", | ||
2193 | "nparsl;": "\u2afd\u20e5", | ||
2194 | "npart;": "\u2202\u0338", | ||
2195 | "npolint;": "\u2a14", | ||
2196 | "npr;": "\u2280", | ||
2197 | "nprcue;": "\u22e0", | ||
2198 | "npre;": "\u2aaf\u0338", | ||
2199 | "nprec;": "\u2280", | ||
2200 | "npreceq;": "\u2aaf\u0338", | ||
2201 | "nrArr;": "\u21cf", | ||
2202 | "nrarr;": "\u219b", | ||
2203 | "nrarrc;": "\u2933\u0338", | ||
2204 | "nrarrw;": "\u219d\u0338", | ||
2205 | "nrightarrow;": "\u219b", | ||
2206 | "nrtri;": "\u22eb", | ||
2207 | "nrtrie;": "\u22ed", | ||
2208 | "nsc;": "\u2281", | ||
2209 | "nsccue;": "\u22e1", | ||
2210 | "nsce;": "\u2ab0\u0338", | ||
2211 | "nscr;": "\U0001d4c3", | ||
2212 | "nshortmid;": "\u2224", | ||
2213 | "nshortparallel;": "\u2226", | ||
2214 | "nsim;": "\u2241", | ||
2215 | "nsime;": "\u2244", | ||
2216 | "nsimeq;": "\u2244", | ||
2217 | "nsmid;": "\u2224", | ||
2218 | "nspar;": "\u2226", | ||
2219 | "nsqsube;": "\u22e2", | ||
2220 | "nsqsupe;": "\u22e3", | ||
2221 | "nsub;": "\u2284", | ||
2222 | "nsubE;": "\u2ac5\u0338", | ||
2223 | "nsube;": "\u2288", | ||
2224 | "nsubset;": "\u2282\u20d2", | ||
2225 | "nsubseteq;": "\u2288", | ||
2226 | "nsubseteqq;": "\u2ac5\u0338", | ||
2227 | "nsucc;": "\u2281", | ||
2228 | "nsucceq;": "\u2ab0\u0338", | ||
2229 | "nsup;": "\u2285", | ||
2230 | "nsupE;": "\u2ac6\u0338", | ||
2231 | "nsupe;": "\u2289", | ||
2232 | "nsupset;": "\u2283\u20d2", | ||
2233 | "nsupseteq;": "\u2289", | ||
2234 | "nsupseteqq;": "\u2ac6\u0338", | ||
2235 | "ntgl;": "\u2279", | ||
2236 | "ntilde": "\xf1", | ||
2237 | "ntilde;": "\xf1", | ||
2238 | "ntlg;": "\u2278", | ||
2239 | "ntriangleleft;": "\u22ea", | ||
2240 | "ntrianglelefteq;": "\u22ec", | ||
2241 | "ntriangleright;": "\u22eb", | ||
2242 | "ntrianglerighteq;": "\u22ed", | ||
2243 | "nu;": "\u03bd", | ||
2244 | "num;": "#", | ||
2245 | "numero;": "\u2116", | ||
2246 | "numsp;": "\u2007", | ||
2247 | "nvDash;": "\u22ad", | ||
2248 | "nvHarr;": "\u2904", | ||
2249 | "nvap;": "\u224d\u20d2", | ||
2250 | "nvdash;": "\u22ac", | ||
2251 | "nvge;": "\u2265\u20d2", | ||
2252 | "nvgt;": ">\u20d2", | ||
2253 | "nvinfin;": "\u29de", | ||
2254 | "nvlArr;": "\u2902", | ||
2255 | "nvle;": "\u2264\u20d2", | ||
2256 | "nvlt;": "<\u20d2", | ||
2257 | "nvltrie;": "\u22b4\u20d2", | ||
2258 | "nvrArr;": "\u2903", | ||
2259 | "nvrtrie;": "\u22b5\u20d2", | ||
2260 | "nvsim;": "\u223c\u20d2", | ||
2261 | "nwArr;": "\u21d6", | ||
2262 | "nwarhk;": "\u2923", | ||
2263 | "nwarr;": "\u2196", | ||
2264 | "nwarrow;": "\u2196", | ||
2265 | "nwnear;": "\u2927", | ||
2266 | "oS;": "\u24c8", | ||
2267 | "oacute": "\xf3", | ||
2268 | "oacute;": "\xf3", | ||
2269 | "oast;": "\u229b", | ||
2270 | "ocir;": "\u229a", | ||
2271 | "ocirc": "\xf4", | ||
2272 | "ocirc;": "\xf4", | ||
2273 | "ocy;": "\u043e", | ||
2274 | "odash;": "\u229d", | ||
2275 | "odblac;": "\u0151", | ||
2276 | "odiv;": "\u2a38", | ||
2277 | "odot;": "\u2299", | ||
2278 | "odsold;": "\u29bc", | ||
2279 | "oelig;": "\u0153", | ||
2280 | "ofcir;": "\u29bf", | ||
2281 | "ofr;": "\U0001d52c", | ||
2282 | "ogon;": "\u02db", | ||
2283 | "ograve": "\xf2", | ||
2284 | "ograve;": "\xf2", | ||
2285 | "ogt;": "\u29c1", | ||
2286 | "ohbar;": "\u29b5", | ||
2287 | "ohm;": "\u03a9", | ||
2288 | "oint;": "\u222e", | ||
2289 | "olarr;": "\u21ba", | ||
2290 | "olcir;": "\u29be", | ||
2291 | "olcross;": "\u29bb", | ||
2292 | "oline;": "\u203e", | ||
2293 | "olt;": "\u29c0", | ||
2294 | "omacr;": "\u014d", | ||
2295 | "omega;": "\u03c9", | ||
2296 | "omicron;": "\u03bf", | ||
2297 | "omid;": "\u29b6", | ||
2298 | "ominus;": "\u2296", | ||
2299 | "oopf;": "\U0001d560", | ||
2300 | "opar;": "\u29b7", | ||
2301 | "operp;": "\u29b9", | ||
2302 | "oplus;": "\u2295", | ||
2303 | "or;": "\u2228", | ||
2304 | "orarr;": "\u21bb", | ||
2305 | "ord;": "\u2a5d", | ||
2306 | "order;": "\u2134", | ||
2307 | "orderof;": "\u2134", | ||
2308 | "ordf": "\xaa", | ||
2309 | "ordf;": "\xaa", | ||
2310 | "ordm": "\xba", | ||
2311 | "ordm;": "\xba", | ||
2312 | "origof;": "\u22b6", | ||
2313 | "oror;": "\u2a56", | ||
2314 | "orslope;": "\u2a57", | ||
2315 | "orv;": "\u2a5b", | ||
2316 | "oscr;": "\u2134", | ||
2317 | "oslash": "\xf8", | ||
2318 | "oslash;": "\xf8", | ||
2319 | "osol;": "\u2298", | ||
2320 | "otilde": "\xf5", | ||
2321 | "otilde;": "\xf5", | ||
2322 | "otimes;": "\u2297", | ||
2323 | "otimesas;": "\u2a36", | ||
2324 | "ouml": "\xf6", | ||
2325 | "ouml;": "\xf6", | ||
2326 | "ovbar;": "\u233d", | ||
2327 | "par;": "\u2225", | ||
2328 | "para": "\xb6", | ||
2329 | "para;": "\xb6", | ||
2330 | "parallel;": "\u2225", | ||
2331 | "parsim;": "\u2af3", | ||
2332 | "parsl;": "\u2afd", | ||
2333 | "part;": "\u2202", | ||
2334 | "pcy;": "\u043f", | ||
2335 | "percnt;": "%", | ||
2336 | "period;": ".", | ||
2337 | "permil;": "\u2030", | ||
2338 | "perp;": "\u22a5", | ||
2339 | "pertenk;": "\u2031", | ||
2340 | "pfr;": "\U0001d52d", | ||
2341 | "phi;": "\u03c6", | ||
2342 | "phiv;": "\u03d5", | ||
2343 | "phmmat;": "\u2133", | ||
2344 | "phone;": "\u260e", | ||
2345 | "pi;": "\u03c0", | ||
2346 | "pitchfork;": "\u22d4", | ||
2347 | "piv;": "\u03d6", | ||
2348 | "planck;": "\u210f", | ||
2349 | "planckh;": "\u210e", | ||
2350 | "plankv;": "\u210f", | ||
2351 | "plus;": "+", | ||
2352 | "plusacir;": "\u2a23", | ||
2353 | "plusb;": "\u229e", | ||
2354 | "pluscir;": "\u2a22", | ||
2355 | "plusdo;": "\u2214", | ||
2356 | "plusdu;": "\u2a25", | ||
2357 | "pluse;": "\u2a72", | ||
2358 | "plusmn": "\xb1", | ||
2359 | "plusmn;": "\xb1", | ||
2360 | "plussim;": "\u2a26", | ||
2361 | "plustwo;": "\u2a27", | ||
2362 | "pm;": "\xb1", | ||
2363 | "pointint;": "\u2a15", | ||
2364 | "popf;": "\U0001d561", | ||
2365 | "pound": "\xa3", | ||
2366 | "pound;": "\xa3", | ||
2367 | "pr;": "\u227a", | ||
2368 | "prE;": "\u2ab3", | ||
2369 | "prap;": "\u2ab7", | ||
2370 | "prcue;": "\u227c", | ||
2371 | "pre;": "\u2aaf", | ||
2372 | "prec;": "\u227a", | ||
2373 | "precapprox;": "\u2ab7", | ||
2374 | "preccurlyeq;": "\u227c", | ||
2375 | "preceq;": "\u2aaf", | ||
2376 | "precnapprox;": "\u2ab9", | ||
2377 | "precneqq;": "\u2ab5", | ||
2378 | "precnsim;": "\u22e8", | ||
2379 | "precsim;": "\u227e", | ||
2380 | "prime;": "\u2032", | ||
2381 | "primes;": "\u2119", | ||
2382 | "prnE;": "\u2ab5", | ||
2383 | "prnap;": "\u2ab9", | ||
2384 | "prnsim;": "\u22e8", | ||
2385 | "prod;": "\u220f", | ||
2386 | "profalar;": "\u232e", | ||
2387 | "profline;": "\u2312", | ||
2388 | "profsurf;": "\u2313", | ||
2389 | "prop;": "\u221d", | ||
2390 | "propto;": "\u221d", | ||
2391 | "prsim;": "\u227e", | ||
2392 | "prurel;": "\u22b0", | ||
2393 | "pscr;": "\U0001d4c5", | ||
2394 | "psi;": "\u03c8", | ||
2395 | "puncsp;": "\u2008", | ||
2396 | "qfr;": "\U0001d52e", | ||
2397 | "qint;": "\u2a0c", | ||
2398 | "qopf;": "\U0001d562", | ||
2399 | "qprime;": "\u2057", | ||
2400 | "qscr;": "\U0001d4c6", | ||
2401 | "quaternions;": "\u210d", | ||
2402 | "quatint;": "\u2a16", | ||
2403 | "quest;": "?", | ||
2404 | "questeq;": "\u225f", | ||
2405 | "quot": "\"", | ||
2406 | "quot;": "\"", | ||
2407 | "rAarr;": "\u21db", | ||
2408 | "rArr;": "\u21d2", | ||
2409 | "rAtail;": "\u291c", | ||
2410 | "rBarr;": "\u290f", | ||
2411 | "rHar;": "\u2964", | ||
2412 | "race;": "\u223d\u0331", | ||
2413 | "racute;": "\u0155", | ||
2414 | "radic;": "\u221a", | ||
2415 | "raemptyv;": "\u29b3", | ||
2416 | "rang;": "\u27e9", | ||
2417 | "rangd;": "\u2992", | ||
2418 | "range;": "\u29a5", | ||
2419 | "rangle;": "\u27e9", | ||
2420 | "raquo": "\xbb", | ||
2421 | "raquo;": "\xbb", | ||
2422 | "rarr;": "\u2192", | ||
2423 | "rarrap;": "\u2975", | ||
2424 | "rarrb;": "\u21e5", | ||
2425 | "rarrbfs;": "\u2920", | ||
2426 | "rarrc;": "\u2933", | ||
2427 | "rarrfs;": "\u291e", | ||
2428 | "rarrhk;": "\u21aa", | ||
2429 | "rarrlp;": "\u21ac", | ||
2430 | "rarrpl;": "\u2945", | ||
2431 | "rarrsim;": "\u2974", | ||
2432 | "rarrtl;": "\u21a3", | ||
2433 | "rarrw;": "\u219d", | ||
2434 | "ratail;": "\u291a", | ||
2435 | "ratio;": "\u2236", | ||
2436 | "rationals;": "\u211a", | ||
2437 | "rbarr;": "\u290d", | ||
2438 | "rbbrk;": "\u2773", | ||
2439 | "rbrace;": "}", | ||
2440 | "rbrack;": "]", | ||
2441 | "rbrke;": "\u298c", | ||
2442 | "rbrksld;": "\u298e", | ||
2443 | "rbrkslu;": "\u2990", | ||
2444 | "rcaron;": "\u0159", | ||
2445 | "rcedil;": "\u0157", | ||
2446 | "rceil;": "\u2309", | ||
2447 | "rcub;": "}", | ||
2448 | "rcy;": "\u0440", | ||
2449 | "rdca;": "\u2937", | ||
2450 | "rdldhar;": "\u2969", | ||
2451 | "rdquo;": "\u201d", | ||
2452 | "rdquor;": "\u201d", | ||
2453 | "rdsh;": "\u21b3", | ||
2454 | "real;": "\u211c", | ||
2455 | "realine;": "\u211b", | ||
2456 | "realpart;": "\u211c", | ||
2457 | "reals;": "\u211d", | ||
2458 | "rect;": "\u25ad", | ||
2459 | "reg": "\xae", | ||
2460 | "reg;": "\xae", | ||
2461 | "rfisht;": "\u297d", | ||
2462 | "rfloor;": "\u230b", | ||
2463 | "rfr;": "\U0001d52f", | ||
2464 | "rhard;": "\u21c1", | ||
2465 | "rharu;": "\u21c0", | ||
2466 | "rharul;": "\u296c", | ||
2467 | "rho;": "\u03c1", | ||
2468 | "rhov;": "\u03f1", | ||
2469 | "rightarrow;": "\u2192", | ||
2470 | "rightarrowtail;": "\u21a3", | ||
2471 | "rightharpoondown;": "\u21c1", | ||
2472 | "rightharpoonup;": "\u21c0", | ||
2473 | "rightleftarrows;": "\u21c4", | ||
2474 | "rightleftharpoons;": "\u21cc", | ||
2475 | "rightrightarrows;": "\u21c9", | ||
2476 | "rightsquigarrow;": "\u219d", | ||
2477 | "rightthreetimes;": "\u22cc", | ||
2478 | "ring;": "\u02da", | ||
2479 | "risingdotseq;": "\u2253", | ||
2480 | "rlarr;": "\u21c4", | ||
2481 | "rlhar;": "\u21cc", | ||
2482 | "rlm;": "\u200f", | ||
2483 | "rmoust;": "\u23b1", | ||
2484 | "rmoustache;": "\u23b1", | ||
2485 | "rnmid;": "\u2aee", | ||
2486 | "roang;": "\u27ed", | ||
2487 | "roarr;": "\u21fe", | ||
2488 | "robrk;": "\u27e7", | ||
2489 | "ropar;": "\u2986", | ||
2490 | "ropf;": "\U0001d563", | ||
2491 | "roplus;": "\u2a2e", | ||
2492 | "rotimes;": "\u2a35", | ||
2493 | "rpar;": ")", | ||
2494 | "rpargt;": "\u2994", | ||
2495 | "rppolint;": "\u2a12", | ||
2496 | "rrarr;": "\u21c9", | ||
2497 | "rsaquo;": "\u203a", | ||
2498 | "rscr;": "\U0001d4c7", | ||
2499 | "rsh;": "\u21b1", | ||
2500 | "rsqb;": "]", | ||
2501 | "rsquo;": "\u2019", | ||
2502 | "rsquor;": "\u2019", | ||
2503 | "rthree;": "\u22cc", | ||
2504 | "rtimes;": "\u22ca", | ||
2505 | "rtri;": "\u25b9", | ||
2506 | "rtrie;": "\u22b5", | ||
2507 | "rtrif;": "\u25b8", | ||
2508 | "rtriltri;": "\u29ce", | ||
2509 | "ruluhar;": "\u2968", | ||
2510 | "rx;": "\u211e", | ||
2511 | "sacute;": "\u015b", | ||
2512 | "sbquo;": "\u201a", | ||
2513 | "sc;": "\u227b", | ||
2514 | "scE;": "\u2ab4", | ||
2515 | "scap;": "\u2ab8", | ||
2516 | "scaron;": "\u0161", | ||
2517 | "sccue;": "\u227d", | ||
2518 | "sce;": "\u2ab0", | ||
2519 | "scedil;": "\u015f", | ||
2520 | "scirc;": "\u015d", | ||
2521 | "scnE;": "\u2ab6", | ||
2522 | "scnap;": "\u2aba", | ||
2523 | "scnsim;": "\u22e9", | ||
2524 | "scpolint;": "\u2a13", | ||
2525 | "scsim;": "\u227f", | ||
2526 | "scy;": "\u0441", | ||
2527 | "sdot;": "\u22c5", | ||
2528 | "sdotb;": "\u22a1", | ||
2529 | "sdote;": "\u2a66", | ||
2530 | "seArr;": "\u21d8", | ||
2531 | "searhk;": "\u2925", | ||
2532 | "searr;": "\u2198", | ||
2533 | "searrow;": "\u2198", | ||
2534 | "sect": "\xa7", | ||
2535 | "sect;": "\xa7", | ||
2536 | "semi;": ";", | ||
2537 | "seswar;": "\u2929", | ||
2538 | "setminus;": "\u2216", | ||
2539 | "setmn;": "\u2216", | ||
2540 | "sext;": "\u2736", | ||
2541 | "sfr;": "\U0001d530", | ||
2542 | "sfrown;": "\u2322", | ||
2543 | "sharp;": "\u266f", | ||
2544 | "shchcy;": "\u0449", | ||
2545 | "shcy;": "\u0448", | ||
2546 | "shortmid;": "\u2223", | ||
2547 | "shortparallel;": "\u2225", | ||
2548 | "shy": "\xad", | ||
2549 | "shy;": "\xad", | ||
2550 | "sigma;": "\u03c3", | ||
2551 | "sigmaf;": "\u03c2", | ||
2552 | "sigmav;": "\u03c2", | ||
2553 | "sim;": "\u223c", | ||
2554 | "simdot;": "\u2a6a", | ||
2555 | "sime;": "\u2243", | ||
2556 | "simeq;": "\u2243", | ||
2557 | "simg;": "\u2a9e", | ||
2558 | "simgE;": "\u2aa0", | ||
2559 | "siml;": "\u2a9d", | ||
2560 | "simlE;": "\u2a9f", | ||
2561 | "simne;": "\u2246", | ||
2562 | "simplus;": "\u2a24", | ||
2563 | "simrarr;": "\u2972", | ||
2564 | "slarr;": "\u2190", | ||
2565 | "smallsetminus;": "\u2216", | ||
2566 | "smashp;": "\u2a33", | ||
2567 | "smeparsl;": "\u29e4", | ||
2568 | "smid;": "\u2223", | ||
2569 | "smile;": "\u2323", | ||
2570 | "smt;": "\u2aaa", | ||
2571 | "smte;": "\u2aac", | ||
2572 | "smtes;": "\u2aac\ufe00", | ||
2573 | "softcy;": "\u044c", | ||
2574 | "sol;": "/", | ||
2575 | "solb;": "\u29c4", | ||
2576 | "solbar;": "\u233f", | ||
2577 | "sopf;": "\U0001d564", | ||
2578 | "spades;": "\u2660", | ||
2579 | "spadesuit;": "\u2660", | ||
2580 | "spar;": "\u2225", | ||
2581 | "sqcap;": "\u2293", | ||
2582 | "sqcaps;": "\u2293\ufe00", | ||
2583 | "sqcup;": "\u2294", | ||
2584 | "sqcups;": "\u2294\ufe00", | ||
2585 | "sqsub;": "\u228f", | ||
2586 | "sqsube;": "\u2291", | ||
2587 | "sqsubset;": "\u228f", | ||
2588 | "sqsubseteq;": "\u2291", | ||
2589 | "sqsup;": "\u2290", | ||
2590 | "sqsupe;": "\u2292", | ||
2591 | "sqsupset;": "\u2290", | ||
2592 | "sqsupseteq;": "\u2292", | ||
2593 | "squ;": "\u25a1", | ||
2594 | "square;": "\u25a1", | ||
2595 | "squarf;": "\u25aa", | ||
2596 | "squf;": "\u25aa", | ||
2597 | "srarr;": "\u2192", | ||
2598 | "sscr;": "\U0001d4c8", | ||
2599 | "ssetmn;": "\u2216", | ||
2600 | "ssmile;": "\u2323", | ||
2601 | "sstarf;": "\u22c6", | ||
2602 | "star;": "\u2606", | ||
2603 | "starf;": "\u2605", | ||
2604 | "straightepsilon;": "\u03f5", | ||
2605 | "straightphi;": "\u03d5", | ||
2606 | "strns;": "\xaf", | ||
2607 | "sub;": "\u2282", | ||
2608 | "subE;": "\u2ac5", | ||
2609 | "subdot;": "\u2abd", | ||
2610 | "sube;": "\u2286", | ||
2611 | "subedot;": "\u2ac3", | ||
2612 | "submult;": "\u2ac1", | ||
2613 | "subnE;": "\u2acb", | ||
2614 | "subne;": "\u228a", | ||
2615 | "subplus;": "\u2abf", | ||
2616 | "subrarr;": "\u2979", | ||
2617 | "subset;": "\u2282", | ||
2618 | "subseteq;": "\u2286", | ||
2619 | "subseteqq;": "\u2ac5", | ||
2620 | "subsetneq;": "\u228a", | ||
2621 | "subsetneqq;": "\u2acb", | ||
2622 | "subsim;": "\u2ac7", | ||
2623 | "subsub;": "\u2ad5", | ||
2624 | "subsup;": "\u2ad3", | ||
2625 | "succ;": "\u227b", | ||
2626 | "succapprox;": "\u2ab8", | ||
2627 | "succcurlyeq;": "\u227d", | ||
2628 | "succeq;": "\u2ab0", | ||
2629 | "succnapprox;": "\u2aba", | ||
2630 | "succneqq;": "\u2ab6", | ||
2631 | "succnsim;": "\u22e9", | ||
2632 | "succsim;": "\u227f", | ||
2633 | "sum;": "\u2211", | ||
2634 | "sung;": "\u266a", | ||
2635 | "sup1": "\xb9", | ||
2636 | "sup1;": "\xb9", | ||
2637 | "sup2": "\xb2", | ||
2638 | "sup2;": "\xb2", | ||
2639 | "sup3": "\xb3", | ||
2640 | "sup3;": "\xb3", | ||
2641 | "sup;": "\u2283", | ||
2642 | "supE;": "\u2ac6", | ||
2643 | "supdot;": "\u2abe", | ||
2644 | "supdsub;": "\u2ad8", | ||
2645 | "supe;": "\u2287", | ||
2646 | "supedot;": "\u2ac4", | ||
2647 | "suphsol;": "\u27c9", | ||
2648 | "suphsub;": "\u2ad7", | ||
2649 | "suplarr;": "\u297b", | ||
2650 | "supmult;": "\u2ac2", | ||
2651 | "supnE;": "\u2acc", | ||
2652 | "supne;": "\u228b", | ||
2653 | "supplus;": "\u2ac0", | ||
2654 | "supset;": "\u2283", | ||
2655 | "supseteq;": "\u2287", | ||
2656 | "supseteqq;": "\u2ac6", | ||
2657 | "supsetneq;": "\u228b", | ||
2658 | "supsetneqq;": "\u2acc", | ||
2659 | "supsim;": "\u2ac8", | ||
2660 | "supsub;": "\u2ad4", | ||
2661 | "supsup;": "\u2ad6", | ||
2662 | "swArr;": "\u21d9", | ||
2663 | "swarhk;": "\u2926", | ||
2664 | "swarr;": "\u2199", | ||
2665 | "swarrow;": "\u2199", | ||
2666 | "swnwar;": "\u292a", | ||
2667 | "szlig": "\xdf", | ||
2668 | "szlig;": "\xdf", | ||
2669 | "target;": "\u2316", | ||
2670 | "tau;": "\u03c4", | ||
2671 | "tbrk;": "\u23b4", | ||
2672 | "tcaron;": "\u0165", | ||
2673 | "tcedil;": "\u0163", | ||
2674 | "tcy;": "\u0442", | ||
2675 | "tdot;": "\u20db", | ||
2676 | "telrec;": "\u2315", | ||
2677 | "tfr;": "\U0001d531", | ||
2678 | "there4;": "\u2234", | ||
2679 | "therefore;": "\u2234", | ||
2680 | "theta;": "\u03b8", | ||
2681 | "thetasym;": "\u03d1", | ||
2682 | "thetav;": "\u03d1", | ||
2683 | "thickapprox;": "\u2248", | ||
2684 | "thicksim;": "\u223c", | ||
2685 | "thinsp;": "\u2009", | ||
2686 | "thkap;": "\u2248", | ||
2687 | "thksim;": "\u223c", | ||
2688 | "thorn": "\xfe", | ||
2689 | "thorn;": "\xfe", | ||
2690 | "tilde;": "\u02dc", | ||
2691 | "times": "\xd7", | ||
2692 | "times;": "\xd7", | ||
2693 | "timesb;": "\u22a0", | ||
2694 | "timesbar;": "\u2a31", | ||
2695 | "timesd;": "\u2a30", | ||
2696 | "tint;": "\u222d", | ||
2697 | "toea;": "\u2928", | ||
2698 | "top;": "\u22a4", | ||
2699 | "topbot;": "\u2336", | ||
2700 | "topcir;": "\u2af1", | ||
2701 | "topf;": "\U0001d565", | ||
2702 | "topfork;": "\u2ada", | ||
2703 | "tosa;": "\u2929", | ||
2704 | "tprime;": "\u2034", | ||
2705 | "trade;": "\u2122", | ||
2706 | "triangle;": "\u25b5", | ||
2707 | "triangledown;": "\u25bf", | ||
2708 | "triangleleft;": "\u25c3", | ||
2709 | "trianglelefteq;": "\u22b4", | ||
2710 | "triangleq;": "\u225c", | ||
2711 | "triangleright;": "\u25b9", | ||
2712 | "trianglerighteq;": "\u22b5", | ||
2713 | "tridot;": "\u25ec", | ||
2714 | "trie;": "\u225c", | ||
2715 | "triminus;": "\u2a3a", | ||
2716 | "triplus;": "\u2a39", | ||
2717 | "trisb;": "\u29cd", | ||
2718 | "tritime;": "\u2a3b", | ||
2719 | "trpezium;": "\u23e2", | ||
2720 | "tscr;": "\U0001d4c9", | ||
2721 | "tscy;": "\u0446", | ||
2722 | "tshcy;": "\u045b", | ||
2723 | "tstrok;": "\u0167", | ||
2724 | "twixt;": "\u226c", | ||
2725 | "twoheadleftarrow;": "\u219e", | ||
2726 | "twoheadrightarrow;": "\u21a0", | ||
2727 | "uArr;": "\u21d1", | ||
2728 | "uHar;": "\u2963", | ||
2729 | "uacute": "\xfa", | ||
2730 | "uacute;": "\xfa", | ||
2731 | "uarr;": "\u2191", | ||
2732 | "ubrcy;": "\u045e", | ||
2733 | "ubreve;": "\u016d", | ||
2734 | "ucirc": "\xfb", | ||
2735 | "ucirc;": "\xfb", | ||
2736 | "ucy;": "\u0443", | ||
2737 | "udarr;": "\u21c5", | ||
2738 | "udblac;": "\u0171", | ||
2739 | "udhar;": "\u296e", | ||
2740 | "ufisht;": "\u297e", | ||
2741 | "ufr;": "\U0001d532", | ||
2742 | "ugrave": "\xf9", | ||
2743 | "ugrave;": "\xf9", | ||
2744 | "uharl;": "\u21bf", | ||
2745 | "uharr;": "\u21be", | ||
2746 | "uhblk;": "\u2580", | ||
2747 | "ulcorn;": "\u231c", | ||
2748 | "ulcorner;": "\u231c", | ||
2749 | "ulcrop;": "\u230f", | ||
2750 | "ultri;": "\u25f8", | ||
2751 | "umacr;": "\u016b", | ||
2752 | "uml": "\xa8", | ||
2753 | "uml;": "\xa8", | ||
2754 | "uogon;": "\u0173", | ||
2755 | "uopf;": "\U0001d566", | ||
2756 | "uparrow;": "\u2191", | ||
2757 | "updownarrow;": "\u2195", | ||
2758 | "upharpoonleft;": "\u21bf", | ||
2759 | "upharpoonright;": "\u21be", | ||
2760 | "uplus;": "\u228e", | ||
2761 | "upsi;": "\u03c5", | ||
2762 | "upsih;": "\u03d2", | ||
2763 | "upsilon;": "\u03c5", | ||
2764 | "upuparrows;": "\u21c8", | ||
2765 | "urcorn;": "\u231d", | ||
2766 | "urcorner;": "\u231d", | ||
2767 | "urcrop;": "\u230e", | ||
2768 | "uring;": "\u016f", | ||
2769 | "urtri;": "\u25f9", | ||
2770 | "uscr;": "\U0001d4ca", | ||
2771 | "utdot;": "\u22f0", | ||
2772 | "utilde;": "\u0169", | ||
2773 | "utri;": "\u25b5", | ||
2774 | "utrif;": "\u25b4", | ||
2775 | "uuarr;": "\u21c8", | ||
2776 | "uuml": "\xfc", | ||
2777 | "uuml;": "\xfc", | ||
2778 | "uwangle;": "\u29a7", | ||
2779 | "vArr;": "\u21d5", | ||
2780 | "vBar;": "\u2ae8", | ||
2781 | "vBarv;": "\u2ae9", | ||
2782 | "vDash;": "\u22a8", | ||
2783 | "vangrt;": "\u299c", | ||
2784 | "varepsilon;": "\u03f5", | ||
2785 | "varkappa;": "\u03f0", | ||
2786 | "varnothing;": "\u2205", | ||
2787 | "varphi;": "\u03d5", | ||
2788 | "varpi;": "\u03d6", | ||
2789 | "varpropto;": "\u221d", | ||
2790 | "varr;": "\u2195", | ||
2791 | "varrho;": "\u03f1", | ||
2792 | "varsigma;": "\u03c2", | ||
2793 | "varsubsetneq;": "\u228a\ufe00", | ||
2794 | "varsubsetneqq;": "\u2acb\ufe00", | ||
2795 | "varsupsetneq;": "\u228b\ufe00", | ||
2796 | "varsupsetneqq;": "\u2acc\ufe00", | ||
2797 | "vartheta;": "\u03d1", | ||
2798 | "vartriangleleft;": "\u22b2", | ||
2799 | "vartriangleright;": "\u22b3", | ||
2800 | "vcy;": "\u0432", | ||
2801 | "vdash;": "\u22a2", | ||
2802 | "vee;": "\u2228", | ||
2803 | "veebar;": "\u22bb", | ||
2804 | "veeeq;": "\u225a", | ||
2805 | "vellip;": "\u22ee", | ||
2806 | "verbar;": "|", | ||
2807 | "vert;": "|", | ||
2808 | "vfr;": "\U0001d533", | ||
2809 | "vltri;": "\u22b2", | ||
2810 | "vnsub;": "\u2282\u20d2", | ||
2811 | "vnsup;": "\u2283\u20d2", | ||
2812 | "vopf;": "\U0001d567", | ||
2813 | "vprop;": "\u221d", | ||
2814 | "vrtri;": "\u22b3", | ||
2815 | "vscr;": "\U0001d4cb", | ||
2816 | "vsubnE;": "\u2acb\ufe00", | ||
2817 | "vsubne;": "\u228a\ufe00", | ||
2818 | "vsupnE;": "\u2acc\ufe00", | ||
2819 | "vsupne;": "\u228b\ufe00", | ||
2820 | "vzigzag;": "\u299a", | ||
2821 | "wcirc;": "\u0175", | ||
2822 | "wedbar;": "\u2a5f", | ||
2823 | "wedge;": "\u2227", | ||
2824 | "wedgeq;": "\u2259", | ||
2825 | "weierp;": "\u2118", | ||
2826 | "wfr;": "\U0001d534", | ||
2827 | "wopf;": "\U0001d568", | ||
2828 | "wp;": "\u2118", | ||
2829 | "wr;": "\u2240", | ||
2830 | "wreath;": "\u2240", | ||
2831 | "wscr;": "\U0001d4cc", | ||
2832 | "xcap;": "\u22c2", | ||
2833 | "xcirc;": "\u25ef", | ||
2834 | "xcup;": "\u22c3", | ||
2835 | "xdtri;": "\u25bd", | ||
2836 | "xfr;": "\U0001d535", | ||
2837 | "xhArr;": "\u27fa", | ||
2838 | "xharr;": "\u27f7", | ||
2839 | "xi;": "\u03be", | ||
2840 | "xlArr;": "\u27f8", | ||
2841 | "xlarr;": "\u27f5", | ||
2842 | "xmap;": "\u27fc", | ||
2843 | "xnis;": "\u22fb", | ||
2844 | "xodot;": "\u2a00", | ||
2845 | "xopf;": "\U0001d569", | ||
2846 | "xoplus;": "\u2a01", | ||
2847 | "xotime;": "\u2a02", | ||
2848 | "xrArr;": "\u27f9", | ||
2849 | "xrarr;": "\u27f6", | ||
2850 | "xscr;": "\U0001d4cd", | ||
2851 | "xsqcup;": "\u2a06", | ||
2852 | "xuplus;": "\u2a04", | ||
2853 | "xutri;": "\u25b3", | ||
2854 | "xvee;": "\u22c1", | ||
2855 | "xwedge;": "\u22c0", | ||
2856 | "yacute": "\xfd", | ||
2857 | "yacute;": "\xfd", | ||
2858 | "yacy;": "\u044f", | ||
2859 | "ycirc;": "\u0177", | ||
2860 | "ycy;": "\u044b", | ||
2861 | "yen": "\xa5", | ||
2862 | "yen;": "\xa5", | ||
2863 | "yfr;": "\U0001d536", | ||
2864 | "yicy;": "\u0457", | ||
2865 | "yopf;": "\U0001d56a", | ||
2866 | "yscr;": "\U0001d4ce", | ||
2867 | "yucy;": "\u044e", | ||
2868 | "yuml": "\xff", | ||
2869 | "yuml;": "\xff", | ||
2870 | "zacute;": "\u017a", | ||
2871 | "zcaron;": "\u017e", | ||
2872 | "zcy;": "\u0437", | ||
2873 | "zdot;": "\u017c", | ||
2874 | "zeetrf;": "\u2128", | ||
2875 | "zeta;": "\u03b6", | ||
2876 | "zfr;": "\U0001d537", | ||
2877 | "zhcy;": "\u0436", | ||
2878 | "zigrarr;": "\u21dd", | ||
2879 | "zopf;": "\U0001d56b", | ||
2880 | "zscr;": "\U0001d4cf", | ||
2881 | "zwj;": "\u200d", | ||
2882 | "zwnj;": "\u200c", | ||
2883 | } | ||
2884 | |||
2885 | replacementCharacters = { | ||
2886 | 0x0: "\uFFFD", | ||
2887 | 0x0d: "\u000D", | ||
2888 | 0x80: "\u20AC", | ||
2889 | 0x81: "\u0081", | ||
2890 | 0x82: "\u201A", | ||
2891 | 0x83: "\u0192", | ||
2892 | 0x84: "\u201E", | ||
2893 | 0x85: "\u2026", | ||
2894 | 0x86: "\u2020", | ||
2895 | 0x87: "\u2021", | ||
2896 | 0x88: "\u02C6", | ||
2897 | 0x89: "\u2030", | ||
2898 | 0x8A: "\u0160", | ||
2899 | 0x8B: "\u2039", | ||
2900 | 0x8C: "\u0152", | ||
2901 | 0x8D: "\u008D", | ||
2902 | 0x8E: "\u017D", | ||
2903 | 0x8F: "\u008F", | ||
2904 | 0x90: "\u0090", | ||
2905 | 0x91: "\u2018", | ||
2906 | 0x92: "\u2019", | ||
2907 | 0x93: "\u201C", | ||
2908 | 0x94: "\u201D", | ||
2909 | 0x95: "\u2022", | ||
2910 | 0x96: "\u2013", | ||
2911 | 0x97: "\u2014", | ||
2912 | 0x98: "\u02DC", | ||
2913 | 0x99: "\u2122", | ||
2914 | 0x9A: "\u0161", | ||
2915 | 0x9B: "\u203A", | ||
2916 | 0x9C: "\u0153", | ||
2917 | 0x9D: "\u009D", | ||
2918 | 0x9E: "\u017E", | ||
2919 | 0x9F: "\u0178", | ||
2920 | } | ||
2921 | |||
2922 | tokenTypes = { | ||
2923 | "Doctype": 0, | ||
2924 | "Characters": 1, | ||
2925 | "SpaceCharacters": 2, | ||
2926 | "StartTag": 3, | ||
2927 | "EndTag": 4, | ||
2928 | "EmptyTag": 5, | ||
2929 | "Comment": 6, | ||
2930 | "ParseError": 7 | ||
2931 | } | ||
2932 | |||
2933 | tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"], | ||
2934 | tokenTypes["EmptyTag"]]) | ||
2935 | |||
2936 | |||
2937 | prefixes = dict([(v, k) for k, v in namespaces.items()]) | ||
2938 | prefixes["http://www.w3.org/1998/Math/MathML"] = "math" | ||
2939 | |||
2940 | |||
2941 | class DataLossWarning(UserWarning): | ||
2942 | """Raised when the current tree is unable to represent the input data""" | ||
2943 | pass | ||
2944 | |||
2945 | |||
2946 | class _ReparseException(Exception): | ||
2947 | pass | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/__init__.py | |||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/alphabeticalattributes.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/alphabeticalattributes.py new file mode 100644 index 0000000..d9e234a --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/alphabeticalattributes.py | |||
@@ -0,0 +1,29 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from . import base | ||
4 | |||
5 | from collections import OrderedDict | ||
6 | |||
7 | |||
8 | def _attr_key(attr): | ||
9 | """Return an appropriate key for an attribute for sorting | ||
10 | |||
11 | Attributes have a namespace that can be either ``None`` or a string. We | ||
12 | can't compare the two because they're different types, so we convert | ||
13 | ``None`` to an empty string first. | ||
14 | |||
15 | """ | ||
16 | return (attr[0][0] or ''), attr[0][1] | ||
17 | |||
18 | |||
19 | class Filter(base.Filter): | ||
20 | """Alphabetizes attributes for elements""" | ||
21 | def __iter__(self): | ||
22 | for token in base.Filter.__iter__(self): | ||
23 | if token["type"] in ("StartTag", "EmptyTag"): | ||
24 | attrs = OrderedDict() | ||
25 | for name, value in sorted(token["data"].items(), | ||
26 | key=_attr_key): | ||
27 | attrs[name] = value | ||
28 | token["data"] = attrs | ||
29 | yield token | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/base.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/base.py new file mode 100644 index 0000000..f5aa523 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/base.py | |||
@@ -0,0 +1,12 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | |||
4 | class Filter(object): | ||
5 | def __init__(self, source): | ||
6 | self.source = source | ||
7 | |||
8 | def __iter__(self): | ||
9 | return iter(self.source) | ||
10 | |||
11 | def __getattr__(self, name): | ||
12 | return getattr(self.source, name) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/inject_meta_charset.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/inject_meta_charset.py new file mode 100644 index 0000000..2f8ec4f --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/inject_meta_charset.py | |||
@@ -0,0 +1,73 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from . import base | ||
4 | |||
5 | |||
6 | class Filter(base.Filter): | ||
7 | """Injects ``<meta charset=ENCODING>`` tag into head of document""" | ||
8 | def __init__(self, source, encoding): | ||
9 | """Creates a Filter | ||
10 | |||
11 | :arg source: the source token stream | ||
12 | |||
13 | :arg encoding: the encoding to set | ||
14 | |||
15 | """ | ||
16 | base.Filter.__init__(self, source) | ||
17 | self.encoding = encoding | ||
18 | |||
19 | def __iter__(self): | ||
20 | state = "pre_head" | ||
21 | meta_found = (self.encoding is None) | ||
22 | pending = [] | ||
23 | |||
24 | for token in base.Filter.__iter__(self): | ||
25 | type = token["type"] | ||
26 | if type == "StartTag": | ||
27 | if token["name"].lower() == "head": | ||
28 | state = "in_head" | ||
29 | |||
30 | elif type == "EmptyTag": | ||
31 | if token["name"].lower() == "meta": | ||
32 | # replace charset with actual encoding | ||
33 | has_http_equiv_content_type = False | ||
34 | for (namespace, name), value in token["data"].items(): | ||
35 | if namespace is not None: | ||
36 | continue | ||
37 | elif name.lower() == 'charset': | ||
38 | token["data"][(namespace, name)] = self.encoding | ||
39 | meta_found = True | ||
40 | break | ||
41 | elif name == 'http-equiv' and value.lower() == 'content-type': | ||
42 | has_http_equiv_content_type = True | ||
43 | else: | ||
44 | if has_http_equiv_content_type and (None, "content") in token["data"]: | ||
45 | token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding | ||
46 | meta_found = True | ||
47 | |||
48 | elif token["name"].lower() == "head" and not meta_found: | ||
49 | # insert meta into empty head | ||
50 | yield {"type": "StartTag", "name": "head", | ||
51 | "data": token["data"]} | ||
52 | yield {"type": "EmptyTag", "name": "meta", | ||
53 | "data": {(None, "charset"): self.encoding}} | ||
54 | yield {"type": "EndTag", "name": "head"} | ||
55 | meta_found = True | ||
56 | continue | ||
57 | |||
58 | elif type == "EndTag": | ||
59 | if token["name"].lower() == "head" and pending: | ||
60 | # insert meta into head (if necessary) and flush pending queue | ||
61 | yield pending.pop(0) | ||
62 | if not meta_found: | ||
63 | yield {"type": "EmptyTag", "name": "meta", | ||
64 | "data": {(None, "charset"): self.encoding}} | ||
65 | while pending: | ||
66 | yield pending.pop(0) | ||
67 | meta_found = True | ||
68 | state = "post_head" | ||
69 | |||
70 | if state == "in_head": | ||
71 | pending.append(token) | ||
72 | else: | ||
73 | yield token | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/lint.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/lint.py new file mode 100644 index 0000000..b5bbd97 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/lint.py | |||
@@ -0,0 +1,93 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from pip._vendor.six import text_type | ||
4 | |||
5 | from . import base | ||
6 | from ..constants import namespaces, voidElements | ||
7 | |||
8 | from ..constants import spaceCharacters | ||
9 | spaceCharacters = "".join(spaceCharacters) | ||
10 | |||
11 | |||
12 | class Filter(base.Filter): | ||
13 | """Lints the token stream for errors | ||
14 | |||
15 | If it finds any errors, it'll raise an ``AssertionError``. | ||
16 | |||
17 | """ | ||
18 | def __init__(self, source, require_matching_tags=True): | ||
19 | """Creates a Filter | ||
20 | |||
21 | :arg source: the source token stream | ||
22 | |||
23 | :arg require_matching_tags: whether or not to require matching tags | ||
24 | |||
25 | """ | ||
26 | super(Filter, self).__init__(source) | ||
27 | self.require_matching_tags = require_matching_tags | ||
28 | |||
29 | def __iter__(self): | ||
30 | open_elements = [] | ||
31 | for token in base.Filter.__iter__(self): | ||
32 | type = token["type"] | ||
33 | if type in ("StartTag", "EmptyTag"): | ||
34 | namespace = token["namespace"] | ||
35 | name = token["name"] | ||
36 | assert namespace is None or isinstance(namespace, text_type) | ||
37 | assert namespace != "" | ||
38 | assert isinstance(name, text_type) | ||
39 | assert name != "" | ||
40 | assert isinstance(token["data"], dict) | ||
41 | if (not namespace or namespace == namespaces["html"]) and name in voidElements: | ||
42 | assert type == "EmptyTag" | ||
43 | else: | ||
44 | assert type == "StartTag" | ||
45 | if type == "StartTag" and self.require_matching_tags: | ||
46 | open_elements.append((namespace, name)) | ||
47 | for (namespace, name), value in token["data"].items(): | ||
48 | assert namespace is None or isinstance(namespace, text_type) | ||
49 | assert namespace != "" | ||
50 | assert isinstance(name, text_type) | ||
51 | assert name != "" | ||
52 | assert isinstance(value, text_type) | ||
53 | |||
54 | elif type == "EndTag": | ||
55 | namespace = token["namespace"] | ||
56 | name = token["name"] | ||
57 | assert namespace is None or isinstance(namespace, text_type) | ||
58 | assert namespace != "" | ||
59 | assert isinstance(name, text_type) | ||
60 | assert name != "" | ||
61 | if (not namespace or namespace == namespaces["html"]) and name in voidElements: | ||
62 | assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name} | ||
63 | elif self.require_matching_tags: | ||
64 | start = open_elements.pop() | ||
65 | assert start == (namespace, name) | ||
66 | |||
67 | elif type == "Comment": | ||
68 | data = token["data"] | ||
69 | assert isinstance(data, text_type) | ||
70 | |||
71 | elif type in ("Characters", "SpaceCharacters"): | ||
72 | data = token["data"] | ||
73 | assert isinstance(data, text_type) | ||
74 | assert data != "" | ||
75 | if type == "SpaceCharacters": | ||
76 | assert data.strip(spaceCharacters) == "" | ||
77 | |||
78 | elif type == "Doctype": | ||
79 | name = token["name"] | ||
80 | assert name is None or isinstance(name, text_type) | ||
81 | assert token["publicId"] is None or isinstance(name, text_type) | ||
82 | assert token["systemId"] is None or isinstance(name, text_type) | ||
83 | |||
84 | elif type == "Entity": | ||
85 | assert isinstance(token["name"], text_type) | ||
86 | |||
87 | elif type == "SerializerError": | ||
88 | assert isinstance(token["data"], text_type) | ||
89 | |||
90 | else: | ||
91 | assert False, "Unknown token type: %(type)s" % {"type": type} | ||
92 | |||
93 | yield token | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py new file mode 100644 index 0000000..c8d5e54 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py | |||
@@ -0,0 +1,207 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from . import base | ||
4 | |||
5 | |||
6 | class Filter(base.Filter): | ||
7 | """Removes optional tags from the token stream""" | ||
8 | def slider(self): | ||
9 | previous1 = previous2 = None | ||
10 | for token in self.source: | ||
11 | if previous1 is not None: | ||
12 | yield previous2, previous1, token | ||
13 | previous2 = previous1 | ||
14 | previous1 = token | ||
15 | if previous1 is not None: | ||
16 | yield previous2, previous1, None | ||
17 | |||
18 | def __iter__(self): | ||
19 | for previous, token, next in self.slider(): | ||
20 | type = token["type"] | ||
21 | if type == "StartTag": | ||
22 | if (token["data"] or | ||
23 | not self.is_optional_start(token["name"], previous, next)): | ||
24 | yield token | ||
25 | elif type == "EndTag": | ||
26 | if not self.is_optional_end(token["name"], next): | ||
27 | yield token | ||
28 | else: | ||
29 | yield token | ||
30 | |||
31 | def is_optional_start(self, tagname, previous, next): | ||
32 | type = next and next["type"] or None | ||
33 | if tagname in 'html': | ||
34 | # An html element's start tag may be omitted if the first thing | ||
35 | # inside the html element is not a space character or a comment. | ||
36 | return type not in ("Comment", "SpaceCharacters") | ||
37 | elif tagname == 'head': | ||
38 | # A head element's start tag may be omitted if the first thing | ||
39 | # inside the head element is an element. | ||
40 | # XXX: we also omit the start tag if the head element is empty | ||
41 | if type in ("StartTag", "EmptyTag"): | ||
42 | return True | ||
43 | elif type == "EndTag": | ||
44 | return next["name"] == "head" | ||
45 | elif tagname == 'body': | ||
46 | # A body element's start tag may be omitted if the first thing | ||
47 | # inside the body element is not a space character or a comment, | ||
48 | # except if the first thing inside the body element is a script | ||
49 | # or style element and the node immediately preceding the body | ||
50 | # element is a head element whose end tag has been omitted. | ||
51 | if type in ("Comment", "SpaceCharacters"): | ||
52 | return False | ||
53 | elif type == "StartTag": | ||
54 | # XXX: we do not look at the preceding event, so we never omit | ||
55 | # the body element's start tag if it's followed by a script or | ||
56 | # a style element. | ||
57 | return next["name"] not in ('script', 'style') | ||
58 | else: | ||
59 | return True | ||
60 | elif tagname == 'colgroup': | ||
61 | # A colgroup element's start tag may be omitted if the first thing | ||
62 | # inside the colgroup element is a col element, and if the element | ||
63 | # is not immediately preceded by another colgroup element whose | ||
64 | # end tag has been omitted. | ||
65 | if type in ("StartTag", "EmptyTag"): | ||
66 | # XXX: we do not look at the preceding event, so instead we never | ||
67 | # omit the colgroup element's end tag when it is immediately | ||
68 | # followed by another colgroup element. See is_optional_end. | ||
69 | return next["name"] == "col" | ||
70 | else: | ||
71 | return False | ||
72 | elif tagname == 'tbody': | ||
73 | # A tbody element's start tag may be omitted if the first thing | ||
74 | # inside the tbody element is a tr element, and if the element is | ||
75 | # not immediately preceded by a tbody, thead, or tfoot element | ||
76 | # whose end tag has been omitted. | ||
77 | if type == "StartTag": | ||
78 | # omit the thead and tfoot elements' end tag when they are | ||
79 | # immediately followed by a tbody element. See is_optional_end. | ||
80 | if previous and previous['type'] == 'EndTag' and \ | ||
81 | previous['name'] in ('tbody', 'thead', 'tfoot'): | ||
82 | return False | ||
83 | return next["name"] == 'tr' | ||
84 | else: | ||
85 | return False | ||
86 | return False | ||
87 | |||
88 | def is_optional_end(self, tagname, next): | ||
89 | type = next and next["type"] or None | ||
90 | if tagname in ('html', 'head', 'body'): | ||
91 | # An html element's end tag may be omitted if the html element | ||
92 | # is not immediately followed by a space character or a comment. | ||
93 | return type not in ("Comment", "SpaceCharacters") | ||
94 | elif tagname in ('li', 'optgroup', 'tr'): | ||
95 | # A li element's end tag may be omitted if the li element is | ||
96 | # immediately followed by another li element or if there is | ||
97 | # no more content in the parent element. | ||
98 | # An optgroup element's end tag may be omitted if the optgroup | ||
99 | # element is immediately followed by another optgroup element, | ||
100 | # or if there is no more content in the parent element. | ||
101 | # A tr element's end tag may be omitted if the tr element is | ||
102 | # immediately followed by another tr element, or if there is | ||
103 | # no more content in the parent element. | ||
104 | if type == "StartTag": | ||
105 | return next["name"] == tagname | ||
106 | else: | ||
107 | return type == "EndTag" or type is None | ||
108 | elif tagname in ('dt', 'dd'): | ||
109 | # A dt element's end tag may be omitted if the dt element is | ||
110 | # immediately followed by another dt element or a dd element. | ||
111 | # A dd element's end tag may be omitted if the dd element is | ||
112 | # immediately followed by another dd element or a dt element, | ||
113 | # or if there is no more content in the parent element. | ||
114 | if type == "StartTag": | ||
115 | return next["name"] in ('dt', 'dd') | ||
116 | elif tagname == 'dd': | ||
117 | return type == "EndTag" or type is None | ||
118 | else: | ||
119 | return False | ||
120 | elif tagname == 'p': | ||
121 | # A p element's end tag may be omitted if the p element is | ||
122 | # immediately followed by an address, article, aside, | ||
123 | # blockquote, datagrid, dialog, dir, div, dl, fieldset, | ||
124 | # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu, | ||
125 | # nav, ol, p, pre, section, table, or ul, element, or if | ||
126 | # there is no more content in the parent element. | ||
127 | if type in ("StartTag", "EmptyTag"): | ||
128 | return next["name"] in ('address', 'article', 'aside', | ||
129 | 'blockquote', 'datagrid', 'dialog', | ||
130 | 'dir', 'div', 'dl', 'fieldset', 'footer', | ||
131 | 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', | ||
132 | 'header', 'hr', 'menu', 'nav', 'ol', | ||
133 | 'p', 'pre', 'section', 'table', 'ul') | ||
134 | else: | ||
135 | return type == "EndTag" or type is None | ||
136 | elif tagname == 'option': | ||
137 | # An option element's end tag may be omitted if the option | ||
138 | # element is immediately followed by another option element, | ||
139 | # or if it is immediately followed by an <code>optgroup</code> | ||
140 | # element, or if there is no more content in the parent | ||
141 | # element. | ||
142 | if type == "StartTag": | ||
143 | return next["name"] in ('option', 'optgroup') | ||
144 | else: | ||
145 | return type == "EndTag" or type is None | ||
146 | elif tagname in ('rt', 'rp'): | ||
147 | # An rt element's end tag may be omitted if the rt element is | ||
148 | # immediately followed by an rt or rp element, or if there is | ||
149 | # no more content in the parent element. | ||
150 | # An rp element's end tag may be omitted if the rp element is | ||
151 | # immediately followed by an rt or rp element, or if there is | ||
152 | # no more content in the parent element. | ||
153 | if type == "StartTag": | ||
154 | return next["name"] in ('rt', 'rp') | ||
155 | else: | ||
156 | return type == "EndTag" or type is None | ||
157 | elif tagname == 'colgroup': | ||
158 | # A colgroup element's end tag may be omitted if the colgroup | ||
159 | # element is not immediately followed by a space character or | ||
160 | # a comment. | ||
161 | if type in ("Comment", "SpaceCharacters"): | ||
162 | return False | ||
163 | elif type == "StartTag": | ||
164 | # XXX: we also look for an immediately following colgroup | ||
165 | # element. See is_optional_start. | ||
166 | return next["name"] != 'colgroup' | ||
167 | else: | ||
168 | return True | ||
169 | elif tagname in ('thead', 'tbody'): | ||
170 | # A thead element's end tag may be omitted if the thead element | ||
171 | # is immediately followed by a tbody or tfoot element. | ||
172 | # A tbody element's end tag may be omitted if the tbody element | ||
173 | # is immediately followed by a tbody or tfoot element, or if | ||
174 | # there is no more content in the parent element. | ||
175 | # A tfoot element's end tag may be omitted if the tfoot element | ||
176 | # is immediately followed by a tbody element, or if there is no | ||
177 | # more content in the parent element. | ||
178 | # XXX: we never omit the end tag when the following element is | ||
179 | # a tbody. See is_optional_start. | ||
180 | if type == "StartTag": | ||
181 | return next["name"] in ['tbody', 'tfoot'] | ||
182 | elif tagname == 'tbody': | ||
183 | return type == "EndTag" or type is None | ||
184 | else: | ||
185 | return False | ||
186 | elif tagname == 'tfoot': | ||
187 | # A tfoot element's end tag may be omitted if the tfoot element | ||
188 | # is immediately followed by a tbody element, or if there is no | ||
189 | # more content in the parent element. | ||
190 | # XXX: we never omit the end tag when the following element is | ||
191 | # a tbody. See is_optional_start. | ||
192 | if type == "StartTag": | ||
193 | return next["name"] == 'tbody' | ||
194 | else: | ||
195 | return type == "EndTag" or type is None | ||
196 | elif tagname in ('td', 'th'): | ||
197 | # A td element's end tag may be omitted if the td element is | ||
198 | # immediately followed by a td or th element, or if there is | ||
199 | # no more content in the parent element. | ||
200 | # A th element's end tag may be omitted if the th element is | ||
201 | # immediately followed by a td or th element, or if there is | ||
202 | # no more content in the parent element. | ||
203 | if type == "StartTag": | ||
204 | return next["name"] in ('td', 'th') | ||
205 | else: | ||
206 | return type == "EndTag" or type is None | ||
207 | return False | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py new file mode 100644 index 0000000..c3199a5 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py | |||
@@ -0,0 +1,896 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | import re | ||
4 | from xml.sax.saxutils import escape, unescape | ||
5 | |||
6 | from pip._vendor.six.moves import urllib_parse as urlparse | ||
7 | |||
8 | from . import base | ||
9 | from ..constants import namespaces, prefixes | ||
10 | |||
11 | __all__ = ["Filter"] | ||
12 | |||
13 | |||
14 | allowed_elements = frozenset(( | ||
15 | (namespaces['html'], 'a'), | ||
16 | (namespaces['html'], 'abbr'), | ||
17 | (namespaces['html'], 'acronym'), | ||
18 | (namespaces['html'], 'address'), | ||
19 | (namespaces['html'], 'area'), | ||
20 | (namespaces['html'], 'article'), | ||
21 | (namespaces['html'], 'aside'), | ||
22 | (namespaces['html'], 'audio'), | ||
23 | (namespaces['html'], 'b'), | ||
24 | (namespaces['html'], 'big'), | ||
25 | (namespaces['html'], 'blockquote'), | ||
26 | (namespaces['html'], 'br'), | ||
27 | (namespaces['html'], 'button'), | ||
28 | (namespaces['html'], 'canvas'), | ||
29 | (namespaces['html'], 'caption'), | ||
30 | (namespaces['html'], 'center'), | ||
31 | (namespaces['html'], 'cite'), | ||
32 | (namespaces['html'], 'code'), | ||
33 | (namespaces['html'], 'col'), | ||
34 | (namespaces['html'], 'colgroup'), | ||
35 | (namespaces['html'], 'command'), | ||
36 | (namespaces['html'], 'datagrid'), | ||
37 | (namespaces['html'], 'datalist'), | ||
38 | (namespaces['html'], 'dd'), | ||
39 | (namespaces['html'], 'del'), | ||
40 | (namespaces['html'], 'details'), | ||
41 | (namespaces['html'], 'dfn'), | ||
42 | (namespaces['html'], 'dialog'), | ||
43 | (namespaces['html'], 'dir'), | ||
44 | (namespaces['html'], 'div'), | ||
45 | (namespaces['html'], 'dl'), | ||
46 | (namespaces['html'], 'dt'), | ||
47 | (namespaces['html'], 'em'), | ||
48 | (namespaces['html'], 'event-source'), | ||
49 | (namespaces['html'], 'fieldset'), | ||
50 | (namespaces['html'], 'figcaption'), | ||
51 | (namespaces['html'], 'figure'), | ||
52 | (namespaces['html'], 'footer'), | ||
53 | (namespaces['html'], 'font'), | ||
54 | (namespaces['html'], 'form'), | ||
55 | (namespaces['html'], 'header'), | ||
56 | (namespaces['html'], 'h1'), | ||
57 | (namespaces['html'], 'h2'), | ||
58 | (namespaces['html'], 'h3'), | ||
59 | (namespaces['html'], 'h4'), | ||
60 | (namespaces['html'], 'h5'), | ||
61 | (namespaces['html'], 'h6'), | ||
62 | (namespaces['html'], 'hr'), | ||
63 | (namespaces['html'], 'i'), | ||
64 | (namespaces['html'], 'img'), | ||
65 | (namespaces['html'], 'input'), | ||
66 | (namespaces['html'], 'ins'), | ||
67 | (namespaces['html'], 'keygen'), | ||
68 | (namespaces['html'], 'kbd'), | ||
69 | (namespaces['html'], 'label'), | ||
70 | (namespaces['html'], 'legend'), | ||
71 | (namespaces['html'], 'li'), | ||
72 | (namespaces['html'], 'm'), | ||
73 | (namespaces['html'], 'map'), | ||
74 | (namespaces['html'], 'menu'), | ||
75 | (namespaces['html'], 'meter'), | ||
76 | (namespaces['html'], 'multicol'), | ||
77 | (namespaces['html'], 'nav'), | ||
78 | (namespaces['html'], 'nextid'), | ||
79 | (namespaces['html'], 'ol'), | ||
80 | (namespaces['html'], 'output'), | ||
81 | (namespaces['html'], 'optgroup'), | ||
82 | (namespaces['html'], 'option'), | ||
83 | (namespaces['html'], 'p'), | ||
84 | (namespaces['html'], 'pre'), | ||
85 | (namespaces['html'], 'progress'), | ||
86 | (namespaces['html'], 'q'), | ||
87 | (namespaces['html'], 's'), | ||
88 | (namespaces['html'], 'samp'), | ||
89 | (namespaces['html'], 'section'), | ||
90 | (namespaces['html'], 'select'), | ||
91 | (namespaces['html'], 'small'), | ||
92 | (namespaces['html'], 'sound'), | ||
93 | (namespaces['html'], 'source'), | ||
94 | (namespaces['html'], 'spacer'), | ||
95 | (namespaces['html'], 'span'), | ||
96 | (namespaces['html'], 'strike'), | ||
97 | (namespaces['html'], 'strong'), | ||
98 | (namespaces['html'], 'sub'), | ||
99 | (namespaces['html'], 'sup'), | ||
100 | (namespaces['html'], 'table'), | ||
101 | (namespaces['html'], 'tbody'), | ||
102 | (namespaces['html'], 'td'), | ||
103 | (namespaces['html'], 'textarea'), | ||
104 | (namespaces['html'], 'time'), | ||
105 | (namespaces['html'], 'tfoot'), | ||
106 | (namespaces['html'], 'th'), | ||
107 | (namespaces['html'], 'thead'), | ||
108 | (namespaces['html'], 'tr'), | ||
109 | (namespaces['html'], 'tt'), | ||
110 | (namespaces['html'], 'u'), | ||
111 | (namespaces['html'], 'ul'), | ||
112 | (namespaces['html'], 'var'), | ||
113 | (namespaces['html'], 'video'), | ||
114 | (namespaces['mathml'], 'maction'), | ||
115 | (namespaces['mathml'], 'math'), | ||
116 | (namespaces['mathml'], 'merror'), | ||
117 | (namespaces['mathml'], 'mfrac'), | ||
118 | (namespaces['mathml'], 'mi'), | ||
119 | (namespaces['mathml'], 'mmultiscripts'), | ||
120 | (namespaces['mathml'], 'mn'), | ||
121 | (namespaces['mathml'], 'mo'), | ||
122 | (namespaces['mathml'], 'mover'), | ||
123 | (namespaces['mathml'], 'mpadded'), | ||
124 | (namespaces['mathml'], 'mphantom'), | ||
125 | (namespaces['mathml'], 'mprescripts'), | ||
126 | (namespaces['mathml'], 'mroot'), | ||
127 | (namespaces['mathml'], 'mrow'), | ||
128 | (namespaces['mathml'], 'mspace'), | ||
129 | (namespaces['mathml'], 'msqrt'), | ||
130 | (namespaces['mathml'], 'mstyle'), | ||
131 | (namespaces['mathml'], 'msub'), | ||
132 | (namespaces['mathml'], 'msubsup'), | ||
133 | (namespaces['mathml'], 'msup'), | ||
134 | (namespaces['mathml'], 'mtable'), | ||
135 | (namespaces['mathml'], 'mtd'), | ||
136 | (namespaces['mathml'], 'mtext'), | ||
137 | (namespaces['mathml'], 'mtr'), | ||
138 | (namespaces['mathml'], 'munder'), | ||
139 | (namespaces['mathml'], 'munderover'), | ||
140 | (namespaces['mathml'], 'none'), | ||
141 | (namespaces['svg'], 'a'), | ||
142 | (namespaces['svg'], 'animate'), | ||
143 | (namespaces['svg'], 'animateColor'), | ||
144 | (namespaces['svg'], 'animateMotion'), | ||
145 | (namespaces['svg'], 'animateTransform'), | ||
146 | (namespaces['svg'], 'clipPath'), | ||
147 | (namespaces['svg'], 'circle'), | ||
148 | (namespaces['svg'], 'defs'), | ||
149 | (namespaces['svg'], 'desc'), | ||
150 | (namespaces['svg'], 'ellipse'), | ||
151 | (namespaces['svg'], 'font-face'), | ||
152 | (namespaces['svg'], 'font-face-name'), | ||
153 | (namespaces['svg'], 'font-face-src'), | ||
154 | (namespaces['svg'], 'g'), | ||
155 | (namespaces['svg'], 'glyph'), | ||
156 | (namespaces['svg'], 'hkern'), | ||
157 | (namespaces['svg'], 'linearGradient'), | ||
158 | (namespaces['svg'], 'line'), | ||
159 | (namespaces['svg'], 'marker'), | ||
160 | (namespaces['svg'], 'metadata'), | ||
161 | (namespaces['svg'], 'missing-glyph'), | ||
162 | (namespaces['svg'], 'mpath'), | ||
163 | (namespaces['svg'], 'path'), | ||
164 | (namespaces['svg'], 'polygon'), | ||
165 | (namespaces['svg'], 'polyline'), | ||
166 | (namespaces['svg'], 'radialGradient'), | ||
167 | (namespaces['svg'], 'rect'), | ||
168 | (namespaces['svg'], 'set'), | ||
169 | (namespaces['svg'], 'stop'), | ||
170 | (namespaces['svg'], 'svg'), | ||
171 | (namespaces['svg'], 'switch'), | ||
172 | (namespaces['svg'], 'text'), | ||
173 | (namespaces['svg'], 'title'), | ||
174 | (namespaces['svg'], 'tspan'), | ||
175 | (namespaces['svg'], 'use'), | ||
176 | )) | ||
177 | |||
178 | allowed_attributes = frozenset(( | ||
179 | # HTML attributes | ||
180 | (None, 'abbr'), | ||
181 | (None, 'accept'), | ||
182 | (None, 'accept-charset'), | ||
183 | (None, 'accesskey'), | ||
184 | (None, 'action'), | ||
185 | (None, 'align'), | ||
186 | (None, 'alt'), | ||
187 | (None, 'autocomplete'), | ||
188 | (None, 'autofocus'), | ||
189 | (None, 'axis'), | ||
190 | (None, 'background'), | ||
191 | (None, 'balance'), | ||
192 | (None, 'bgcolor'), | ||
193 | (None, 'bgproperties'), | ||
194 | (None, 'border'), | ||
195 | (None, 'bordercolor'), | ||
196 | (None, 'bordercolordark'), | ||
197 | (None, 'bordercolorlight'), | ||
198 | (None, 'bottompadding'), | ||
199 | (None, 'cellpadding'), | ||
200 | (None, 'cellspacing'), | ||
201 | (None, 'ch'), | ||
202 | (None, 'challenge'), | ||
203 | (None, 'char'), | ||
204 | (None, 'charoff'), | ||
205 | (None, 'choff'), | ||
206 | (None, 'charset'), | ||
207 | (None, 'checked'), | ||
208 | (None, 'cite'), | ||
209 | (None, 'class'), | ||
210 | (None, 'clear'), | ||
211 | (None, 'color'), | ||
212 | (None, 'cols'), | ||
213 | (None, 'colspan'), | ||
214 | (None, 'compact'), | ||
215 | (None, 'contenteditable'), | ||
216 | (None, 'controls'), | ||
217 | (None, 'coords'), | ||
218 | (None, 'data'), | ||
219 | (None, 'datafld'), | ||
220 | (None, 'datapagesize'), | ||
221 | (None, 'datasrc'), | ||
222 | (None, 'datetime'), | ||
223 | (None, 'default'), | ||
224 | (None, 'delay'), | ||
225 | (None, 'dir'), | ||
226 | (None, 'disabled'), | ||
227 | (None, 'draggable'), | ||
228 | (None, 'dynsrc'), | ||
229 | (None, 'enctype'), | ||
230 | (None, 'end'), | ||
231 | (None, 'face'), | ||
232 | (None, 'for'), | ||
233 | (None, 'form'), | ||
234 | (None, 'frame'), | ||
235 | (None, 'galleryimg'), | ||
236 | (None, 'gutter'), | ||
237 | (None, 'headers'), | ||
238 | (None, 'height'), | ||
239 | (None, 'hidefocus'), | ||
240 | (None, 'hidden'), | ||
241 | (None, 'high'), | ||
242 | (None, 'href'), | ||
243 | (None, 'hreflang'), | ||
244 | (None, 'hspace'), | ||
245 | (None, 'icon'), | ||
246 | (None, 'id'), | ||
247 | (None, 'inputmode'), | ||
248 | (None, 'ismap'), | ||
249 | (None, 'keytype'), | ||
250 | (None, 'label'), | ||
251 | (None, 'leftspacing'), | ||
252 | (None, 'lang'), | ||
253 | (None, 'list'), | ||
254 | (None, 'longdesc'), | ||
255 | (None, 'loop'), | ||
256 | (None, 'loopcount'), | ||
257 | (None, 'loopend'), | ||
258 | (None, 'loopstart'), | ||
259 | (None, 'low'), | ||
260 | (None, 'lowsrc'), | ||
261 | (None, 'max'), | ||
262 | (None, 'maxlength'), | ||
263 | (None, 'media'), | ||
264 | (None, 'method'), | ||
265 | (None, 'min'), | ||
266 | (None, 'multiple'), | ||
267 | (None, 'name'), | ||
268 | (None, 'nohref'), | ||
269 | (None, 'noshade'), | ||
270 | (None, 'nowrap'), | ||
271 | (None, 'open'), | ||
272 | (None, 'optimum'), | ||
273 | (None, 'pattern'), | ||
274 | (None, 'ping'), | ||
275 | (None, 'point-size'), | ||
276 | (None, 'poster'), | ||
277 | (None, 'pqg'), | ||
278 | (None, 'preload'), | ||
279 | (None, 'prompt'), | ||
280 | (None, 'radiogroup'), | ||
281 | (None, 'readonly'), | ||
282 | (None, 'rel'), | ||
283 | (None, 'repeat-max'), | ||
284 | (None, 'repeat-min'), | ||
285 | (None, 'replace'), | ||
286 | (None, 'required'), | ||
287 | (None, 'rev'), | ||
288 | (None, 'rightspacing'), | ||
289 | (None, 'rows'), | ||
290 | (None, 'rowspan'), | ||
291 | (None, 'rules'), | ||
292 | (None, 'scope'), | ||
293 | (None, 'selected'), | ||
294 | (None, 'shape'), | ||
295 | (None, 'size'), | ||
296 | (None, 'span'), | ||
297 | (None, 'src'), | ||
298 | (None, 'start'), | ||
299 | (None, 'step'), | ||
300 | (None, 'style'), | ||
301 | (None, 'summary'), | ||
302 | (None, 'suppress'), | ||
303 | (None, 'tabindex'), | ||
304 | (None, 'target'), | ||
305 | (None, 'template'), | ||
306 | (None, 'title'), | ||
307 | (None, 'toppadding'), | ||
308 | (None, 'type'), | ||
309 | (None, 'unselectable'), | ||
310 | (None, 'usemap'), | ||
311 | (None, 'urn'), | ||
312 | (None, 'valign'), | ||
313 | (None, 'value'), | ||
314 | (None, 'variable'), | ||
315 | (None, 'volume'), | ||
316 | (None, 'vspace'), | ||
317 | (None, 'vrml'), | ||
318 | (None, 'width'), | ||
319 | (None, 'wrap'), | ||
320 | (namespaces['xml'], 'lang'), | ||
321 | # MathML attributes | ||
322 | (None, 'actiontype'), | ||
323 | (None, 'align'), | ||
324 | (None, 'columnalign'), | ||
325 | (None, 'columnalign'), | ||
326 | (None, 'columnalign'), | ||
327 | (None, 'columnlines'), | ||
328 | (None, 'columnspacing'), | ||
329 | (None, 'columnspan'), | ||
330 | (None, 'depth'), | ||
331 | (None, 'display'), | ||
332 | (None, 'displaystyle'), | ||
333 | (None, 'equalcolumns'), | ||
334 | (None, 'equalrows'), | ||
335 | (None, 'fence'), | ||
336 | (None, 'fontstyle'), | ||
337 | (None, 'fontweight'), | ||
338 | (None, 'frame'), | ||
339 | (None, 'height'), | ||
340 | (None, 'linethickness'), | ||
341 | (None, 'lspace'), | ||
342 | (None, 'mathbackground'), | ||
343 | (None, 'mathcolor'), | ||
344 | (None, 'mathvariant'), | ||
345 | (None, 'mathvariant'), | ||
346 | (None, 'maxsize'), | ||
347 | (None, 'minsize'), | ||
348 | (None, 'other'), | ||
349 | (None, 'rowalign'), | ||
350 | (None, 'rowalign'), | ||
351 | (None, 'rowalign'), | ||
352 | (None, 'rowlines'), | ||
353 | (None, 'rowspacing'), | ||
354 | (None, 'rowspan'), | ||
355 | (None, 'rspace'), | ||
356 | (None, 'scriptlevel'), | ||
357 | (None, 'selection'), | ||
358 | (None, 'separator'), | ||
359 | (None, 'stretchy'), | ||
360 | (None, 'width'), | ||
361 | (None, 'width'), | ||
362 | (namespaces['xlink'], 'href'), | ||
363 | (namespaces['xlink'], 'show'), | ||
364 | (namespaces['xlink'], 'type'), | ||
365 | # SVG attributes | ||
366 | (None, 'accent-height'), | ||
367 | (None, 'accumulate'), | ||
368 | (None, 'additive'), | ||
369 | (None, 'alphabetic'), | ||
370 | (None, 'arabic-form'), | ||
371 | (None, 'ascent'), | ||
372 | (None, 'attributeName'), | ||
373 | (None, 'attributeType'), | ||
374 | (None, 'baseProfile'), | ||
375 | (None, 'bbox'), | ||
376 | (None, 'begin'), | ||
377 | (None, 'by'), | ||
378 | (None, 'calcMode'), | ||
379 | (None, 'cap-height'), | ||
380 | (None, 'class'), | ||
381 | (None, 'clip-path'), | ||
382 | (None, 'color'), | ||
383 | (None, 'color-rendering'), | ||
384 | (None, 'content'), | ||
385 | (None, 'cx'), | ||
386 | (None, 'cy'), | ||
387 | (None, 'd'), | ||
388 | (None, 'dx'), | ||
389 | (None, 'dy'), | ||
390 | (None, 'descent'), | ||
391 | (None, 'display'), | ||
392 | (None, 'dur'), | ||
393 | (None, 'end'), | ||
394 | (None, 'fill'), | ||
395 | (None, 'fill-opacity'), | ||
396 | (None, 'fill-rule'), | ||
397 | (None, 'font-family'), | ||
398 | (None, 'font-size'), | ||
399 | (None, 'font-stretch'), | ||
400 | (None, 'font-style'), | ||
401 | (None, 'font-variant'), | ||
402 | (None, 'font-weight'), | ||
403 | (None, 'from'), | ||
404 | (None, 'fx'), | ||
405 | (None, 'fy'), | ||
406 | (None, 'g1'), | ||
407 | (None, 'g2'), | ||
408 | (None, 'glyph-name'), | ||
409 | (None, 'gradientUnits'), | ||
410 | (None, 'hanging'), | ||
411 | (None, 'height'), | ||
412 | (None, 'horiz-adv-x'), | ||
413 | (None, 'horiz-origin-x'), | ||
414 | (None, 'id'), | ||
415 | (None, 'ideographic'), | ||
416 | (None, 'k'), | ||
417 | (None, 'keyPoints'), | ||
418 | (None, 'keySplines'), | ||
419 | (None, 'keyTimes'), | ||
420 | (None, 'lang'), | ||
421 | (None, 'marker-end'), | ||
422 | (None, 'marker-mid'), | ||
423 | (None, 'marker-start'), | ||
424 | (None, 'markerHeight'), | ||
425 | (None, 'markerUnits'), | ||
426 | (None, 'markerWidth'), | ||
427 | (None, 'mathematical'), | ||
428 | (None, 'max'), | ||
429 | (None, 'min'), | ||
430 | (None, 'name'), | ||
431 | (None, 'offset'), | ||
432 | (None, 'opacity'), | ||
433 | (None, 'orient'), | ||
434 | (None, 'origin'), | ||
435 | (None, 'overline-position'), | ||
436 | (None, 'overline-thickness'), | ||
437 | (None, 'panose-1'), | ||
438 | (None, 'path'), | ||
439 | (None, 'pathLength'), | ||
440 | (None, 'points'), | ||
441 | (None, 'preserveAspectRatio'), | ||
442 | (None, 'r'), | ||
443 | (None, 'refX'), | ||
444 | (None, 'refY'), | ||
445 | (None, 'repeatCount'), | ||
446 | (None, 'repeatDur'), | ||
447 | (None, 'requiredExtensions'), | ||
448 | (None, 'requiredFeatures'), | ||
449 | (None, 'restart'), | ||
450 | (None, 'rotate'), | ||
451 | (None, 'rx'), | ||
452 | (None, 'ry'), | ||
453 | (None, 'slope'), | ||
454 | (None, 'stemh'), | ||
455 | (None, 'stemv'), | ||
456 | (None, 'stop-color'), | ||
457 | (None, 'stop-opacity'), | ||
458 | (None, 'strikethrough-position'), | ||
459 | (None, 'strikethrough-thickness'), | ||
460 | (None, 'stroke'), | ||
461 | (None, 'stroke-dasharray'), | ||
462 | (None, 'stroke-dashoffset'), | ||
463 | (None, 'stroke-linecap'), | ||
464 | (None, 'stroke-linejoin'), | ||
465 | (None, 'stroke-miterlimit'), | ||
466 | (None, 'stroke-opacity'), | ||
467 | (None, 'stroke-width'), | ||
468 | (None, 'systemLanguage'), | ||
469 | (None, 'target'), | ||
470 | (None, 'text-anchor'), | ||
471 | (None, 'to'), | ||
472 | (None, 'transform'), | ||
473 | (None, 'type'), | ||
474 | (None, 'u1'), | ||
475 | (None, 'u2'), | ||
476 | (None, 'underline-position'), | ||
477 | (None, 'underline-thickness'), | ||
478 | (None, 'unicode'), | ||
479 | (None, 'unicode-range'), | ||
480 | (None, 'units-per-em'), | ||
481 | (None, 'values'), | ||
482 | (None, 'version'), | ||
483 | (None, 'viewBox'), | ||
484 | (None, 'visibility'), | ||
485 | (None, 'width'), | ||
486 | (None, 'widths'), | ||
487 | (None, 'x'), | ||
488 | (None, 'x-height'), | ||
489 | (None, 'x1'), | ||
490 | (None, 'x2'), | ||
491 | (namespaces['xlink'], 'actuate'), | ||
492 | (namespaces['xlink'], 'arcrole'), | ||
493 | (namespaces['xlink'], 'href'), | ||
494 | (namespaces['xlink'], 'role'), | ||
495 | (namespaces['xlink'], 'show'), | ||
496 | (namespaces['xlink'], 'title'), | ||
497 | (namespaces['xlink'], 'type'), | ||
498 | (namespaces['xml'], 'base'), | ||
499 | (namespaces['xml'], 'lang'), | ||
500 | (namespaces['xml'], 'space'), | ||
501 | (None, 'y'), | ||
502 | (None, 'y1'), | ||
503 | (None, 'y2'), | ||
504 | (None, 'zoomAndPan'), | ||
505 | )) | ||
506 | |||
507 | attr_val_is_uri = frozenset(( | ||
508 | (None, 'href'), | ||
509 | (None, 'src'), | ||
510 | (None, 'cite'), | ||
511 | (None, 'action'), | ||
512 | (None, 'longdesc'), | ||
513 | (None, 'poster'), | ||
514 | (None, 'background'), | ||
515 | (None, 'datasrc'), | ||
516 | (None, 'dynsrc'), | ||
517 | (None, 'lowsrc'), | ||
518 | (None, 'ping'), | ||
519 | (namespaces['xlink'], 'href'), | ||
520 | (namespaces['xml'], 'base'), | ||
521 | )) | ||
522 | |||
523 | svg_attr_val_allows_ref = frozenset(( | ||
524 | (None, 'clip-path'), | ||
525 | (None, 'color-profile'), | ||
526 | (None, 'cursor'), | ||
527 | (None, 'fill'), | ||
528 | (None, 'filter'), | ||
529 | (None, 'marker'), | ||
530 | (None, 'marker-start'), | ||
531 | (None, 'marker-mid'), | ||
532 | (None, 'marker-end'), | ||
533 | (None, 'mask'), | ||
534 | (None, 'stroke'), | ||
535 | )) | ||
536 | |||
537 | svg_allow_local_href = frozenset(( | ||
538 | (None, 'altGlyph'), | ||
539 | (None, 'animate'), | ||
540 | (None, 'animateColor'), | ||
541 | (None, 'animateMotion'), | ||
542 | (None, 'animateTransform'), | ||
543 | (None, 'cursor'), | ||
544 | (None, 'feImage'), | ||
545 | (None, 'filter'), | ||
546 | (None, 'linearGradient'), | ||
547 | (None, 'pattern'), | ||
548 | (None, 'radialGradient'), | ||
549 | (None, 'textpath'), | ||
550 | (None, 'tref'), | ||
551 | (None, 'set'), | ||
552 | (None, 'use') | ||
553 | )) | ||
554 | |||
555 | allowed_css_properties = frozenset(( | ||
556 | 'azimuth', | ||
557 | 'background-color', | ||
558 | 'border-bottom-color', | ||
559 | 'border-collapse', | ||
560 | 'border-color', | ||
561 | 'border-left-color', | ||
562 | 'border-right-color', | ||
563 | 'border-top-color', | ||
564 | 'clear', | ||
565 | 'color', | ||
566 | 'cursor', | ||
567 | 'direction', | ||
568 | 'display', | ||
569 | 'elevation', | ||
570 | 'float', | ||
571 | 'font', | ||
572 | 'font-family', | ||
573 | 'font-size', | ||
574 | 'font-style', | ||
575 | 'font-variant', | ||
576 | 'font-weight', | ||
577 | 'height', | ||
578 | 'letter-spacing', | ||
579 | 'line-height', | ||
580 | 'overflow', | ||
581 | 'pause', | ||
582 | 'pause-after', | ||
583 | 'pause-before', | ||
584 | 'pitch', | ||
585 | 'pitch-range', | ||
586 | 'richness', | ||
587 | 'speak', | ||
588 | 'speak-header', | ||
589 | 'speak-numeral', | ||
590 | 'speak-punctuation', | ||
591 | 'speech-rate', | ||
592 | 'stress', | ||
593 | 'text-align', | ||
594 | 'text-decoration', | ||
595 | 'text-indent', | ||
596 | 'unicode-bidi', | ||
597 | 'vertical-align', | ||
598 | 'voice-family', | ||
599 | 'volume', | ||
600 | 'white-space', | ||
601 | 'width', | ||
602 | )) | ||
603 | |||
604 | allowed_css_keywords = frozenset(( | ||
605 | 'auto', | ||
606 | 'aqua', | ||
607 | 'black', | ||
608 | 'block', | ||
609 | 'blue', | ||
610 | 'bold', | ||
611 | 'both', | ||
612 | 'bottom', | ||
613 | 'brown', | ||
614 | 'center', | ||
615 | 'collapse', | ||
616 | 'dashed', | ||
617 | 'dotted', | ||
618 | 'fuchsia', | ||
619 | 'gray', | ||
620 | 'green', | ||
621 | '!important', | ||
622 | 'italic', | ||
623 | 'left', | ||
624 | 'lime', | ||
625 | 'maroon', | ||
626 | 'medium', | ||
627 | 'none', | ||
628 | 'navy', | ||
629 | 'normal', | ||
630 | 'nowrap', | ||
631 | 'olive', | ||
632 | 'pointer', | ||
633 | 'purple', | ||
634 | 'red', | ||
635 | 'right', | ||
636 | 'solid', | ||
637 | 'silver', | ||
638 | 'teal', | ||
639 | 'top', | ||
640 | 'transparent', | ||
641 | 'underline', | ||
642 | 'white', | ||
643 | 'yellow', | ||
644 | )) | ||
645 | |||
646 | allowed_svg_properties = frozenset(( | ||
647 | 'fill', | ||
648 | 'fill-opacity', | ||
649 | 'fill-rule', | ||
650 | 'stroke', | ||
651 | 'stroke-width', | ||
652 | 'stroke-linecap', | ||
653 | 'stroke-linejoin', | ||
654 | 'stroke-opacity', | ||
655 | )) | ||
656 | |||
657 | allowed_protocols = frozenset(( | ||
658 | 'ed2k', | ||
659 | 'ftp', | ||
660 | 'http', | ||
661 | 'https', | ||
662 | 'irc', | ||
663 | 'mailto', | ||
664 | 'news', | ||
665 | 'gopher', | ||
666 | 'nntp', | ||
667 | 'telnet', | ||
668 | 'webcal', | ||
669 | 'xmpp', | ||
670 | 'callto', | ||
671 | 'feed', | ||
672 | 'urn', | ||
673 | 'aim', | ||
674 | 'rsync', | ||
675 | 'tag', | ||
676 | 'ssh', | ||
677 | 'sftp', | ||
678 | 'rtsp', | ||
679 | 'afs', | ||
680 | 'data', | ||
681 | )) | ||
682 | |||
683 | allowed_content_types = frozenset(( | ||
684 | 'image/png', | ||
685 | 'image/jpeg', | ||
686 | 'image/gif', | ||
687 | 'image/webp', | ||
688 | 'image/bmp', | ||
689 | 'text/plain', | ||
690 | )) | ||
691 | |||
692 | |||
693 | data_content_type = re.compile(r''' | ||
694 | ^ | ||
695 | # Match a content type <application>/<type> | ||
696 | (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) | ||
697 | # Match any character set and encoding | ||
698 | (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) | ||
699 | |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) | ||
700 | # Assume the rest is data | ||
701 | ,.* | ||
702 | $ | ||
703 | ''', | ||
704 | re.VERBOSE) | ||
705 | |||
706 | |||
707 | class Filter(base.Filter): | ||
708 | """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes""" | ||
709 | def __init__(self, | ||
710 | source, | ||
711 | allowed_elements=allowed_elements, | ||
712 | allowed_attributes=allowed_attributes, | ||
713 | allowed_css_properties=allowed_css_properties, | ||
714 | allowed_css_keywords=allowed_css_keywords, | ||
715 | allowed_svg_properties=allowed_svg_properties, | ||
716 | allowed_protocols=allowed_protocols, | ||
717 | allowed_content_types=allowed_content_types, | ||
718 | attr_val_is_uri=attr_val_is_uri, | ||
719 | svg_attr_val_allows_ref=svg_attr_val_allows_ref, | ||
720 | svg_allow_local_href=svg_allow_local_href): | ||
721 | """Creates a Filter | ||
722 | |||
723 | :arg allowed_elements: set of elements to allow--everything else will | ||
724 | be escaped | ||
725 | |||
726 | :arg allowed_attributes: set of attributes to allow in | ||
727 | elements--everything else will be stripped | ||
728 | |||
729 | :arg allowed_css_properties: set of CSS properties to allow--everything | ||
730 | else will be stripped | ||
731 | |||
732 | :arg allowed_css_keywords: set of CSS keywords to allow--everything | ||
733 | else will be stripped | ||
734 | |||
735 | :arg allowed_svg_properties: set of SVG properties to allow--everything | ||
736 | else will be removed | ||
737 | |||
738 | :arg allowed_protocols: set of allowed protocols for URIs | ||
739 | |||
740 | :arg allowed_content_types: set of allowed content types for ``data`` URIs. | ||
741 | |||
742 | :arg attr_val_is_uri: set of attributes that have URI values--values | ||
743 | that have a scheme not listed in ``allowed_protocols`` are removed | ||
744 | |||
745 | :arg svg_attr_val_allows_ref: set of SVG attributes that can have | ||
746 | references | ||
747 | |||
748 | :arg svg_allow_local_href: set of SVG elements that can have local | ||
749 | hrefs--these are removed | ||
750 | |||
751 | """ | ||
752 | super(Filter, self).__init__(source) | ||
753 | self.allowed_elements = allowed_elements | ||
754 | self.allowed_attributes = allowed_attributes | ||
755 | self.allowed_css_properties = allowed_css_properties | ||
756 | self.allowed_css_keywords = allowed_css_keywords | ||
757 | self.allowed_svg_properties = allowed_svg_properties | ||
758 | self.allowed_protocols = allowed_protocols | ||
759 | self.allowed_content_types = allowed_content_types | ||
760 | self.attr_val_is_uri = attr_val_is_uri | ||
761 | self.svg_attr_val_allows_ref = svg_attr_val_allows_ref | ||
762 | self.svg_allow_local_href = svg_allow_local_href | ||
763 | |||
764 | def __iter__(self): | ||
765 | for token in base.Filter.__iter__(self): | ||
766 | token = self.sanitize_token(token) | ||
767 | if token: | ||
768 | yield token | ||
769 | |||
770 | # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and | ||
771 | # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes | ||
772 | # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and | ||
773 | # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI | ||
774 | # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are | ||
775 | # allowed. | ||
776 | # | ||
777 | # sanitize_html('<script> do_nasty_stuff() </script>') | ||
778 | # => <script> do_nasty_stuff() </script> | ||
779 | # sanitize_html('<a href="javascript: sucker();">Click here for $100</a>') | ||
780 | # => <a>Click here for $100</a> | ||
781 | def sanitize_token(self, token): | ||
782 | |||
783 | # accommodate filters which use token_type differently | ||
784 | token_type = token["type"] | ||
785 | if token_type in ("StartTag", "EndTag", "EmptyTag"): | ||
786 | name = token["name"] | ||
787 | namespace = token["namespace"] | ||
788 | if ((namespace, name) in self.allowed_elements or | ||
789 | (namespace is None and | ||
790 | (namespaces["html"], name) in self.allowed_elements)): | ||
791 | return self.allowed_token(token) | ||
792 | else: | ||
793 | return self.disallowed_token(token) | ||
794 | elif token_type == "Comment": | ||
795 | pass | ||
796 | else: | ||
797 | return token | ||
798 | |||
799 | def allowed_token(self, token): | ||
800 | if "data" in token: | ||
801 | attrs = token["data"] | ||
802 | attr_names = set(attrs.keys()) | ||
803 | |||
804 | # Remove forbidden attributes | ||
805 | for to_remove in (attr_names - self.allowed_attributes): | ||
806 | del token["data"][to_remove] | ||
807 | attr_names.remove(to_remove) | ||
808 | |||
809 | # Remove attributes with disallowed URL values | ||
810 | for attr in (attr_names & self.attr_val_is_uri): | ||
811 | assert attr in attrs | ||
812 | # I don't have a clue where this regexp comes from or why it matches those | ||
813 | # characters, nor why we call unescape. I just know it's always been here. | ||
814 | # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all | ||
815 | # this will do is remove *more* than it otherwise would. | ||
816 | val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '', | ||
817 | unescape(attrs[attr])).lower() | ||
818 | # remove replacement characters from unescaped characters | ||
819 | val_unescaped = val_unescaped.replace("\ufffd", "") | ||
820 | try: | ||
821 | uri = urlparse.urlparse(val_unescaped) | ||
822 | except ValueError: | ||
823 | uri = None | ||
824 | del attrs[attr] | ||
825 | if uri and uri.scheme: | ||
826 | if uri.scheme not in self.allowed_protocols: | ||
827 | del attrs[attr] | ||
828 | if uri.scheme == 'data': | ||
829 | m = data_content_type.match(uri.path) | ||
830 | if not m: | ||
831 | del attrs[attr] | ||
832 | elif m.group('content_type') not in self.allowed_content_types: | ||
833 | del attrs[attr] | ||
834 | |||
835 | for attr in self.svg_attr_val_allows_ref: | ||
836 | if attr in attrs: | ||
837 | attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', | ||
838 | ' ', | ||
839 | unescape(attrs[attr])) | ||
840 | if (token["name"] in self.svg_allow_local_href and | ||
841 | (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*', | ||
842 | attrs[(namespaces['xlink'], 'href')])): | ||
843 | del attrs[(namespaces['xlink'], 'href')] | ||
844 | if (None, 'style') in attrs: | ||
845 | attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')]) | ||
846 | token["data"] = attrs | ||
847 | return token | ||
848 | |||
849 | def disallowed_token(self, token): | ||
850 | token_type = token["type"] | ||
851 | if token_type == "EndTag": | ||
852 | token["data"] = "</%s>" % token["name"] | ||
853 | elif token["data"]: | ||
854 | assert token_type in ("StartTag", "EmptyTag") | ||
855 | attrs = [] | ||
856 | for (ns, name), v in token["data"].items(): | ||
857 | attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v))) | ||
858 | token["data"] = "<%s%s>" % (token["name"], ''.join(attrs)) | ||
859 | else: | ||
860 | token["data"] = "<%s>" % token["name"] | ||
861 | if token.get("selfClosing"): | ||
862 | token["data"] = token["data"][:-1] + "/>" | ||
863 | |||
864 | token["type"] = "Characters" | ||
865 | |||
866 | del token["name"] | ||
867 | return token | ||
868 | |||
869 | def sanitize_css(self, style): | ||
870 | # disallow urls | ||
871 | style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) | ||
872 | |||
873 | # gauntlet | ||
874 | if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): | ||
875 | return '' | ||
876 | if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): | ||
877 | return '' | ||
878 | |||
879 | clean = [] | ||
880 | for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style): | ||
881 | if not value: | ||
882 | continue | ||
883 | if prop.lower() in self.allowed_css_properties: | ||
884 | clean.append(prop + ': ' + value + ';') | ||
885 | elif prop.split('-')[0].lower() in ['background', 'border', 'margin', | ||
886 | 'padding']: | ||
887 | for keyword in value.split(): | ||
888 | if keyword not in self.allowed_css_keywords and \ | ||
889 | not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa | ||
890 | break | ||
891 | else: | ||
892 | clean.append(prop + ': ' + value + ';') | ||
893 | elif prop.lower() in self.allowed_svg_properties: | ||
894 | clean.append(prop + ': ' + value + ';') | ||
895 | |||
896 | return ' '.join(clean) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/whitespace.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/whitespace.py new file mode 100644 index 0000000..24bb0de --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/whitespace.py | |||
@@ -0,0 +1,38 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | import re | ||
4 | |||
5 | from . import base | ||
6 | from ..constants import rcdataElements, spaceCharacters | ||
7 | spaceCharacters = "".join(spaceCharacters) | ||
8 | |||
9 | SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) | ||
10 | |||
11 | |||
12 | class Filter(base.Filter): | ||
13 | """Collapses whitespace except in pre, textarea, and script elements""" | ||
14 | spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) | ||
15 | |||
16 | def __iter__(self): | ||
17 | preserve = 0 | ||
18 | for token in base.Filter.__iter__(self): | ||
19 | type = token["type"] | ||
20 | if type == "StartTag" \ | ||
21 | and (preserve or token["name"] in self.spacePreserveElements): | ||
22 | preserve += 1 | ||
23 | |||
24 | elif type == "EndTag" and preserve: | ||
25 | preserve -= 1 | ||
26 | |||
27 | elif not preserve and type == "SpaceCharacters" and token["data"]: | ||
28 | # Test on token["data"] above to not introduce spaces where there were not | ||
29 | token["data"] = " " | ||
30 | |||
31 | elif not preserve and type == "Characters": | ||
32 | token["data"] = collapse_spaces(token["data"]) | ||
33 | |||
34 | yield token | ||
35 | |||
36 | |||
37 | def collapse_spaces(text): | ||
38 | return SPACES_REGEX.sub(' ', text) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/html5parser.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/html5parser.py new file mode 100644 index 0000000..b185971 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/html5parser.py | |||
@@ -0,0 +1,2791 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | from pip._vendor.six import with_metaclass, viewkeys | ||
3 | |||
4 | import types | ||
5 | from collections import OrderedDict | ||
6 | |||
7 | from . import _inputstream | ||
8 | from . import _tokenizer | ||
9 | |||
10 | from . import treebuilders | ||
11 | from .treebuilders.base import Marker | ||
12 | |||
13 | from . import _utils | ||
14 | from .constants import ( | ||
15 | spaceCharacters, asciiUpper2Lower, | ||
16 | specialElements, headingElements, cdataElements, rcdataElements, | ||
17 | tokenTypes, tagTokenTypes, | ||
18 | namespaces, | ||
19 | htmlIntegrationPointElements, mathmlTextIntegrationPointElements, | ||
20 | adjustForeignAttributes as adjustForeignAttributesMap, | ||
21 | adjustMathMLAttributes, adjustSVGAttributes, | ||
22 | E, | ||
23 | _ReparseException | ||
24 | ) | ||
25 | |||
26 | |||
27 | def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): | ||
28 | """Parse an HTML document as a string or file-like object into a tree | ||
29 | |||
30 | :arg doc: the document to parse as a string or file-like object | ||
31 | |||
32 | :arg treebuilder: the treebuilder to use when parsing | ||
33 | |||
34 | :arg namespaceHTMLElements: whether or not to namespace HTML elements | ||
35 | |||
36 | :returns: parsed tree | ||
37 | |||
38 | Example: | ||
39 | |||
40 | >>> from html5lib.html5parser import parse | ||
41 | >>> parse('<html><body><p>This is a doc</p></body></html>') | ||
42 | <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0> | ||
43 | |||
44 | """ | ||
45 | tb = treebuilders.getTreeBuilder(treebuilder) | ||
46 | p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) | ||
47 | return p.parse(doc, **kwargs) | ||
48 | |||
49 | |||
50 | def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): | ||
51 | """Parse an HTML fragment as a string or file-like object into a tree | ||
52 | |||
53 | :arg doc: the fragment to parse as a string or file-like object | ||
54 | |||
55 | :arg container: the container context to parse the fragment in | ||
56 | |||
57 | :arg treebuilder: the treebuilder to use when parsing | ||
58 | |||
59 | :arg namespaceHTMLElements: whether or not to namespace HTML elements | ||
60 | |||
61 | :returns: parsed tree | ||
62 | |||
63 | Example: | ||
64 | |||
65 | >>> from html5lib.html5libparser import parseFragment | ||
66 | >>> parseFragment('<b>this is a fragment</b>') | ||
67 | <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090> | ||
68 | |||
69 | """ | ||
70 | tb = treebuilders.getTreeBuilder(treebuilder) | ||
71 | p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) | ||
72 | return p.parseFragment(doc, container=container, **kwargs) | ||
73 | |||
74 | |||
75 | def method_decorator_metaclass(function): | ||
76 | class Decorated(type): | ||
77 | def __new__(meta, classname, bases, classDict): | ||
78 | for attributeName, attribute in classDict.items(): | ||
79 | if isinstance(attribute, types.FunctionType): | ||
80 | attribute = function(attribute) | ||
81 | |||
82 | classDict[attributeName] = attribute | ||
83 | return type.__new__(meta, classname, bases, classDict) | ||
84 | return Decorated | ||
85 | |||
86 | |||
87 | class HTMLParser(object): | ||
88 | """HTML parser | ||
89 | |||
90 | Generates a tree structure from a stream of (possibly malformed) HTML. | ||
91 | |||
92 | """ | ||
93 | |||
94 | def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): | ||
95 | """ | ||
96 | :arg tree: a treebuilder class controlling the type of tree that will be | ||
97 | returned. Built in treebuilders can be accessed through | ||
98 | html5lib.treebuilders.getTreeBuilder(treeType) | ||
99 | |||
100 | :arg strict: raise an exception when a parse error is encountered | ||
101 | |||
102 | :arg namespaceHTMLElements: whether or not to namespace HTML elements | ||
103 | |||
104 | :arg debug: whether or not to enable debug mode which logs things | ||
105 | |||
106 | Example: | ||
107 | |||
108 | >>> from html5lib.html5parser import HTMLParser | ||
109 | >>> parser = HTMLParser() # generates parser with etree builder | ||
110 | >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict | ||
111 | |||
112 | """ | ||
113 | |||
114 | # Raise an exception on the first error encountered | ||
115 | self.strict = strict | ||
116 | |||
117 | if tree is None: | ||
118 | tree = treebuilders.getTreeBuilder("etree") | ||
119 | self.tree = tree(namespaceHTMLElements) | ||
120 | self.errors = [] | ||
121 | |||
122 | self.phases = dict([(name, cls(self, self.tree)) for name, cls in | ||
123 | getPhases(debug).items()]) | ||
124 | |||
125 | def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): | ||
126 | |||
127 | self.innerHTMLMode = innerHTML | ||
128 | self.container = container | ||
129 | self.scripting = scripting | ||
130 | self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) | ||
131 | self.reset() | ||
132 | |||
133 | try: | ||
134 | self.mainLoop() | ||
135 | except _ReparseException: | ||
136 | self.reset() | ||
137 | self.mainLoop() | ||
138 | |||
139 | def reset(self): | ||
140 | self.tree.reset() | ||
141 | self.firstStartTag = False | ||
142 | self.errors = [] | ||
143 | self.log = [] # only used with debug mode | ||
144 | # "quirks" / "limited quirks" / "no quirks" | ||
145 | self.compatMode = "no quirks" | ||
146 | |||
147 | if self.innerHTMLMode: | ||
148 | self.innerHTML = self.container.lower() | ||
149 | |||
150 | if self.innerHTML in cdataElements: | ||
151 | self.tokenizer.state = self.tokenizer.rcdataState | ||
152 | elif self.innerHTML in rcdataElements: | ||
153 | self.tokenizer.state = self.tokenizer.rawtextState | ||
154 | elif self.innerHTML == 'plaintext': | ||
155 | self.tokenizer.state = self.tokenizer.plaintextState | ||
156 | else: | ||
157 | # state already is data state | ||
158 | # self.tokenizer.state = self.tokenizer.dataState | ||
159 | pass | ||
160 | self.phase = self.phases["beforeHtml"] | ||
161 | self.phase.insertHtmlElement() | ||
162 | self.resetInsertionMode() | ||
163 | else: | ||
164 | self.innerHTML = False # pylint:disable=redefined-variable-type | ||
165 | self.phase = self.phases["initial"] | ||
166 | |||
167 | self.lastPhase = None | ||
168 | |||
169 | self.beforeRCDataPhase = None | ||
170 | |||
171 | self.framesetOK = True | ||
172 | |||
173 | @property | ||
174 | def documentEncoding(self): | ||
175 | """Name of the character encoding that was used to decode the input stream, or | ||
176 | :obj:`None` if that is not determined yet | ||
177 | |||
178 | """ | ||
179 | if not hasattr(self, 'tokenizer'): | ||
180 | return None | ||
181 | return self.tokenizer.stream.charEncoding[0].name | ||
182 | |||
183 | def isHTMLIntegrationPoint(self, element): | ||
184 | if (element.name == "annotation-xml" and | ||
185 | element.namespace == namespaces["mathml"]): | ||
186 | return ("encoding" in element.attributes and | ||
187 | element.attributes["encoding"].translate( | ||
188 | asciiUpper2Lower) in | ||
189 | ("text/html", "application/xhtml+xml")) | ||
190 | else: | ||
191 | return (element.namespace, element.name) in htmlIntegrationPointElements | ||
192 | |||
193 | def isMathMLTextIntegrationPoint(self, element): | ||
194 | return (element.namespace, element.name) in mathmlTextIntegrationPointElements | ||
195 | |||
196 | def mainLoop(self): | ||
197 | CharactersToken = tokenTypes["Characters"] | ||
198 | SpaceCharactersToken = tokenTypes["SpaceCharacters"] | ||
199 | StartTagToken = tokenTypes["StartTag"] | ||
200 | EndTagToken = tokenTypes["EndTag"] | ||
201 | CommentToken = tokenTypes["Comment"] | ||
202 | DoctypeToken = tokenTypes["Doctype"] | ||
203 | ParseErrorToken = tokenTypes["ParseError"] | ||
204 | |||
205 | for token in self.normalizedTokens(): | ||
206 | prev_token = None | ||
207 | new_token = token | ||
208 | while new_token is not None: | ||
209 | prev_token = new_token | ||
210 | currentNode = self.tree.openElements[-1] if self.tree.openElements else None | ||
211 | currentNodeNamespace = currentNode.namespace if currentNode else None | ||
212 | currentNodeName = currentNode.name if currentNode else None | ||
213 | |||
214 | type = new_token["type"] | ||
215 | |||
216 | if type == ParseErrorToken: | ||
217 | self.parseError(new_token["data"], new_token.get("datavars", {})) | ||
218 | new_token = None | ||
219 | else: | ||
220 | if (len(self.tree.openElements) == 0 or | ||
221 | currentNodeNamespace == self.tree.defaultNamespace or | ||
222 | (self.isMathMLTextIntegrationPoint(currentNode) and | ||
223 | ((type == StartTagToken and | ||
224 | token["name"] not in frozenset(["mglyph", "malignmark"])) or | ||
225 | type in (CharactersToken, SpaceCharactersToken))) or | ||
226 | (currentNodeNamespace == namespaces["mathml"] and | ||
227 | currentNodeName == "annotation-xml" and | ||
228 | type == StartTagToken and | ||
229 | token["name"] == "svg") or | ||
230 | (self.isHTMLIntegrationPoint(currentNode) and | ||
231 | type in (StartTagToken, CharactersToken, SpaceCharactersToken))): | ||
232 | phase = self.phase | ||
233 | else: | ||
234 | phase = self.phases["inForeignContent"] | ||
235 | |||
236 | if type == CharactersToken: | ||
237 | new_token = phase.processCharacters(new_token) | ||
238 | elif type == SpaceCharactersToken: | ||
239 | new_token = phase.processSpaceCharacters(new_token) | ||
240 | elif type == StartTagToken: | ||
241 | new_token = phase.processStartTag(new_token) | ||
242 | elif type == EndTagToken: | ||
243 | new_token = phase.processEndTag(new_token) | ||
244 | elif type == CommentToken: | ||
245 | new_token = phase.processComment(new_token) | ||
246 | elif type == DoctypeToken: | ||
247 | new_token = phase.processDoctype(new_token) | ||
248 | |||
249 | if (type == StartTagToken and prev_token["selfClosing"] and | ||
250 | not prev_token["selfClosingAcknowledged"]): | ||
251 | self.parseError("non-void-element-with-trailing-solidus", | ||
252 | {"name": prev_token["name"]}) | ||
253 | |||
254 | # When the loop finishes it's EOF | ||
255 | reprocess = True | ||
256 | phases = [] | ||
257 | while reprocess: | ||
258 | phases.append(self.phase) | ||
259 | reprocess = self.phase.processEOF() | ||
260 | if reprocess: | ||
261 | assert self.phase not in phases | ||
262 | |||
263 | def normalizedTokens(self): | ||
264 | for token in self.tokenizer: | ||
265 | yield self.normalizeToken(token) | ||
266 | |||
267 | def parse(self, stream, *args, **kwargs): | ||
268 | """Parse a HTML document into a well-formed tree | ||
269 | |||
270 | :arg stream: a file-like object or string containing the HTML to be parsed | ||
271 | |||
272 | The optional encoding parameter must be a string that indicates | ||
273 | the encoding. If specified, that encoding will be used, | ||
274 | regardless of any BOM or later declaration (such as in a meta | ||
275 | element). | ||
276 | |||
277 | :arg scripting: treat noscript elements as if JavaScript was turned on | ||
278 | |||
279 | :returns: parsed tree | ||
280 | |||
281 | Example: | ||
282 | |||
283 | >>> from html5lib.html5parser import HTMLParser | ||
284 | >>> parser = HTMLParser() | ||
285 | >>> parser.parse('<html><body><p>This is a doc</p></body></html>') | ||
286 | <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0> | ||
287 | |||
288 | """ | ||
289 | self._parse(stream, False, None, *args, **kwargs) | ||
290 | return self.tree.getDocument() | ||
291 | |||
292 | def parseFragment(self, stream, *args, **kwargs): | ||
293 | """Parse a HTML fragment into a well-formed tree fragment | ||
294 | |||
295 | :arg container: name of the element we're setting the innerHTML | ||
296 | property if set to None, default to 'div' | ||
297 | |||
298 | :arg stream: a file-like object or string containing the HTML to be parsed | ||
299 | |||
300 | The optional encoding parameter must be a string that indicates | ||
301 | the encoding. If specified, that encoding will be used, | ||
302 | regardless of any BOM or later declaration (such as in a meta | ||
303 | element) | ||
304 | |||
305 | :arg scripting: treat noscript elements as if JavaScript was turned on | ||
306 | |||
307 | :returns: parsed tree | ||
308 | |||
309 | Example: | ||
310 | |||
311 | >>> from html5lib.html5libparser import HTMLParser | ||
312 | >>> parser = HTMLParser() | ||
313 | >>> parser.parseFragment('<b>this is a fragment</b>') | ||
314 | <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090> | ||
315 | |||
316 | """ | ||
317 | self._parse(stream, True, *args, **kwargs) | ||
318 | return self.tree.getFragment() | ||
319 | |||
320 | def parseError(self, errorcode="XXX-undefined-error", datavars=None): | ||
321 | # XXX The idea is to make errorcode mandatory. | ||
322 | if datavars is None: | ||
323 | datavars = {} | ||
324 | self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) | ||
325 | if self.strict: | ||
326 | raise ParseError(E[errorcode] % datavars) | ||
327 | |||
328 | def normalizeToken(self, token): | ||
329 | # HTML5 specific normalizations to the token stream | ||
330 | if token["type"] == tokenTypes["StartTag"]: | ||
331 | raw = token["data"] | ||
332 | token["data"] = OrderedDict(raw) | ||
333 | if len(raw) > len(token["data"]): | ||
334 | # we had some duplicated attribute, fix so first wins | ||
335 | token["data"].update(raw[::-1]) | ||
336 | |||
337 | return token | ||
338 | |||
339 | def adjustMathMLAttributes(self, token): | ||
340 | adjust_attributes(token, adjustMathMLAttributes) | ||
341 | |||
342 | def adjustSVGAttributes(self, token): | ||
343 | adjust_attributes(token, adjustSVGAttributes) | ||
344 | |||
345 | def adjustForeignAttributes(self, token): | ||
346 | adjust_attributes(token, adjustForeignAttributesMap) | ||
347 | |||
348 | def reparseTokenNormal(self, token): | ||
349 | # pylint:disable=unused-argument | ||
350 | self.parser.phase() | ||
351 | |||
352 | def resetInsertionMode(self): | ||
353 | # The name of this method is mostly historical. (It's also used in the | ||
354 | # specification.) | ||
355 | last = False | ||
356 | newModes = { | ||
357 | "select": "inSelect", | ||
358 | "td": "inCell", | ||
359 | "th": "inCell", | ||
360 | "tr": "inRow", | ||
361 | "tbody": "inTableBody", | ||
362 | "thead": "inTableBody", | ||
363 | "tfoot": "inTableBody", | ||
364 | "caption": "inCaption", | ||
365 | "colgroup": "inColumnGroup", | ||
366 | "table": "inTable", | ||
367 | "head": "inBody", | ||
368 | "body": "inBody", | ||
369 | "frameset": "inFrameset", | ||
370 | "html": "beforeHead" | ||
371 | } | ||
372 | for node in self.tree.openElements[::-1]: | ||
373 | nodeName = node.name | ||
374 | new_phase = None | ||
375 | if node == self.tree.openElements[0]: | ||
376 | assert self.innerHTML | ||
377 | last = True | ||
378 | nodeName = self.innerHTML | ||
379 | # Check for conditions that should only happen in the innerHTML | ||
380 | # case | ||
381 | if nodeName in ("select", "colgroup", "head", "html"): | ||
382 | assert self.innerHTML | ||
383 | |||
384 | if not last and node.namespace != self.tree.defaultNamespace: | ||
385 | continue | ||
386 | |||
387 | if nodeName in newModes: | ||
388 | new_phase = self.phases[newModes[nodeName]] | ||
389 | break | ||
390 | elif last: | ||
391 | new_phase = self.phases["inBody"] | ||
392 | break | ||
393 | |||
394 | self.phase = new_phase | ||
395 | |||
396 | def parseRCDataRawtext(self, token, contentType): | ||
397 | # Generic RCDATA/RAWTEXT Parsing algorithm | ||
398 | assert contentType in ("RAWTEXT", "RCDATA") | ||
399 | |||
400 | self.tree.insertElement(token) | ||
401 | |||
402 | if contentType == "RAWTEXT": | ||
403 | self.tokenizer.state = self.tokenizer.rawtextState | ||
404 | else: | ||
405 | self.tokenizer.state = self.tokenizer.rcdataState | ||
406 | |||
407 | self.originalPhase = self.phase | ||
408 | |||
409 | self.phase = self.phases["text"] | ||
410 | |||
411 | |||
412 | @_utils.memoize | ||
413 | def getPhases(debug): | ||
414 | def log(function): | ||
415 | """Logger that records which phase processes each token""" | ||
416 | type_names = dict((value, key) for key, value in | ||
417 | tokenTypes.items()) | ||
418 | |||
419 | def wrapped(self, *args, **kwargs): | ||
420 | if function.__name__.startswith("process") and len(args) > 0: | ||
421 | token = args[0] | ||
422 | try: | ||
423 | info = {"type": type_names[token['type']]} | ||
424 | except: | ||
425 | raise | ||
426 | if token['type'] in tagTokenTypes: | ||
427 | info["name"] = token['name'] | ||
428 | |||
429 | self.parser.log.append((self.parser.tokenizer.state.__name__, | ||
430 | self.parser.phase.__class__.__name__, | ||
431 | self.__class__.__name__, | ||
432 | function.__name__, | ||
433 | info)) | ||
434 | return function(self, *args, **kwargs) | ||
435 | else: | ||
436 | return function(self, *args, **kwargs) | ||
437 | return wrapped | ||
438 | |||
439 | def getMetaclass(use_metaclass, metaclass_func): | ||
440 | if use_metaclass: | ||
441 | return method_decorator_metaclass(metaclass_func) | ||
442 | else: | ||
443 | return type | ||
444 | |||
445 | # pylint:disable=unused-argument | ||
446 | class Phase(with_metaclass(getMetaclass(debug, log))): | ||
447 | """Base class for helper object that implements each phase of processing | ||
448 | """ | ||
449 | |||
450 | def __init__(self, parser, tree): | ||
451 | self.parser = parser | ||
452 | self.tree = tree | ||
453 | |||
454 | def processEOF(self): | ||
455 | raise NotImplementedError | ||
456 | |||
457 | def processComment(self, token): | ||
458 | # For most phases the following is correct. Where it's not it will be | ||
459 | # overridden. | ||
460 | self.tree.insertComment(token, self.tree.openElements[-1]) | ||
461 | |||
462 | def processDoctype(self, token): | ||
463 | self.parser.parseError("unexpected-doctype") | ||
464 | |||
465 | def processCharacters(self, token): | ||
466 | self.tree.insertText(token["data"]) | ||
467 | |||
468 | def processSpaceCharacters(self, token): | ||
469 | self.tree.insertText(token["data"]) | ||
470 | |||
471 | def processStartTag(self, token): | ||
472 | return self.startTagHandler[token["name"]](token) | ||
473 | |||
474 | def startTagHtml(self, token): | ||
475 | if not self.parser.firstStartTag and token["name"] == "html": | ||
476 | self.parser.parseError("non-html-root") | ||
477 | # XXX Need a check here to see if the first start tag token emitted is | ||
478 | # this token... If it's not, invoke self.parser.parseError(). | ||
479 | for attr, value in token["data"].items(): | ||
480 | if attr not in self.tree.openElements[0].attributes: | ||
481 | self.tree.openElements[0].attributes[attr] = value | ||
482 | self.parser.firstStartTag = False | ||
483 | |||
484 | def processEndTag(self, token): | ||
485 | return self.endTagHandler[token["name"]](token) | ||
486 | |||
487 | class InitialPhase(Phase): | ||
488 | def processSpaceCharacters(self, token): | ||
489 | pass | ||
490 | |||
491 | def processComment(self, token): | ||
492 | self.tree.insertComment(token, self.tree.document) | ||
493 | |||
494 | def processDoctype(self, token): | ||
495 | name = token["name"] | ||
496 | publicId = token["publicId"] | ||
497 | systemId = token["systemId"] | ||
498 | correct = token["correct"] | ||
499 | |||
500 | if (name != "html" or publicId is not None or | ||
501 | systemId is not None and systemId != "about:legacy-compat"): | ||
502 | self.parser.parseError("unknown-doctype") | ||
503 | |||
504 | if publicId is None: | ||
505 | publicId = "" | ||
506 | |||
507 | self.tree.insertDoctype(token) | ||
508 | |||
509 | if publicId != "": | ||
510 | publicId = publicId.translate(asciiUpper2Lower) | ||
511 | |||
512 | if (not correct or token["name"] != "html" or | ||
513 | publicId.startswith( | ||
514 | ("+//silmaril//dtd html pro v0r11 19970101//", | ||
515 | "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", | ||
516 | "-//as//dtd html 3.0 aswedit + extensions//", | ||
517 | "-//ietf//dtd html 2.0 level 1//", | ||
518 | "-//ietf//dtd html 2.0 level 2//", | ||
519 | "-//ietf//dtd html 2.0 strict level 1//", | ||
520 | "-//ietf//dtd html 2.0 strict level 2//", | ||
521 | "-//ietf//dtd html 2.0 strict//", | ||
522 | "-//ietf//dtd html 2.0//", | ||
523 | "-//ietf//dtd html 2.1e//", | ||
524 | "-//ietf//dtd html 3.0//", | ||
525 | "-//ietf//dtd html 3.2 final//", | ||
526 | "-//ietf//dtd html 3.2//", | ||
527 | "-//ietf//dtd html 3//", | ||
528 | "-//ietf//dtd html level 0//", | ||
529 | "-//ietf//dtd html level 1//", | ||
530 | "-//ietf//dtd html level 2//", | ||
531 | "-//ietf//dtd html level 3//", | ||
532 | "-//ietf//dtd html strict level 0//", | ||
533 | "-//ietf//dtd html strict level 1//", | ||
534 | "-//ietf//dtd html strict level 2//", | ||
535 | "-//ietf//dtd html strict level 3//", | ||
536 | "-//ietf//dtd html strict//", | ||
537 | "-//ietf//dtd html//", | ||
538 | "-//metrius//dtd metrius presentational//", | ||
539 | "-//microsoft//dtd internet explorer 2.0 html strict//", | ||
540 | "-//microsoft//dtd internet explorer 2.0 html//", | ||
541 | "-//microsoft//dtd internet explorer 2.0 tables//", | ||
542 | "-//microsoft//dtd internet explorer 3.0 html strict//", | ||
543 | "-//microsoft//dtd internet explorer 3.0 html//", | ||
544 | "-//microsoft//dtd internet explorer 3.0 tables//", | ||
545 | "-//netscape comm. corp.//dtd html//", | ||
546 | "-//netscape comm. corp.//dtd strict html//", | ||
547 | "-//o'reilly and associates//dtd html 2.0//", | ||
548 | "-//o'reilly and associates//dtd html extended 1.0//", | ||
549 | "-//o'reilly and associates//dtd html extended relaxed 1.0//", | ||
550 | "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", | ||
551 | "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", | ||
552 | "-//spyglass//dtd html 2.0 extended//", | ||
553 | "-//sq//dtd html 2.0 hotmetal + extensions//", | ||
554 | "-//sun microsystems corp.//dtd hotjava html//", | ||
555 | "-//sun microsystems corp.//dtd hotjava strict html//", | ||
556 | "-//w3c//dtd html 3 1995-03-24//", | ||
557 | "-//w3c//dtd html 3.2 draft//", | ||
558 | "-//w3c//dtd html 3.2 final//", | ||
559 | "-//w3c//dtd html 3.2//", | ||
560 | "-//w3c//dtd html 3.2s draft//", | ||
561 | "-//w3c//dtd html 4.0 frameset//", | ||
562 | "-//w3c//dtd html 4.0 transitional//", | ||
563 | "-//w3c//dtd html experimental 19960712//", | ||
564 | "-//w3c//dtd html experimental 970421//", | ||
565 | "-//w3c//dtd w3 html//", | ||
566 | "-//w3o//dtd w3 html 3.0//", | ||
567 | "-//webtechs//dtd mozilla html 2.0//", | ||
568 | "-//webtechs//dtd mozilla html//")) or | ||
569 | publicId in ("-//w3o//dtd w3 html strict 3.0//en//", | ||
570 | "-/w3c/dtd html 4.0 transitional/en", | ||
571 | "html") or | ||
572 | publicId.startswith( | ||
573 | ("-//w3c//dtd html 4.01 frameset//", | ||
574 | "-//w3c//dtd html 4.01 transitional//")) and | ||
575 | systemId is None or | ||
576 | systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): | ||
577 | self.parser.compatMode = "quirks" | ||
578 | elif (publicId.startswith( | ||
579 | ("-//w3c//dtd xhtml 1.0 frameset//", | ||
580 | "-//w3c//dtd xhtml 1.0 transitional//")) or | ||
581 | publicId.startswith( | ||
582 | ("-//w3c//dtd html 4.01 frameset//", | ||
583 | "-//w3c//dtd html 4.01 transitional//")) and | ||
584 | systemId is not None): | ||
585 | self.parser.compatMode = "limited quirks" | ||
586 | |||
587 | self.parser.phase = self.parser.phases["beforeHtml"] | ||
588 | |||
589 | def anythingElse(self): | ||
590 | self.parser.compatMode = "quirks" | ||
591 | self.parser.phase = self.parser.phases["beforeHtml"] | ||
592 | |||
593 | def processCharacters(self, token): | ||
594 | self.parser.parseError("expected-doctype-but-got-chars") | ||
595 | self.anythingElse() | ||
596 | return token | ||
597 | |||
598 | def processStartTag(self, token): | ||
599 | self.parser.parseError("expected-doctype-but-got-start-tag", | ||
600 | {"name": token["name"]}) | ||
601 | self.anythingElse() | ||
602 | return token | ||
603 | |||
604 | def processEndTag(self, token): | ||
605 | self.parser.parseError("expected-doctype-but-got-end-tag", | ||
606 | {"name": token["name"]}) | ||
607 | self.anythingElse() | ||
608 | return token | ||
609 | |||
610 | def processEOF(self): | ||
611 | self.parser.parseError("expected-doctype-but-got-eof") | ||
612 | self.anythingElse() | ||
613 | return True | ||
614 | |||
615 | class BeforeHtmlPhase(Phase): | ||
616 | # helper methods | ||
617 | def insertHtmlElement(self): | ||
618 | self.tree.insertRoot(impliedTagToken("html", "StartTag")) | ||
619 | self.parser.phase = self.parser.phases["beforeHead"] | ||
620 | |||
621 | # other | ||
622 | def processEOF(self): | ||
623 | self.insertHtmlElement() | ||
624 | return True | ||
625 | |||
626 | def processComment(self, token): | ||
627 | self.tree.insertComment(token, self.tree.document) | ||
628 | |||
629 | def processSpaceCharacters(self, token): | ||
630 | pass | ||
631 | |||
632 | def processCharacters(self, token): | ||
633 | self.insertHtmlElement() | ||
634 | return token | ||
635 | |||
636 | def processStartTag(self, token): | ||
637 | if token["name"] == "html": | ||
638 | self.parser.firstStartTag = True | ||
639 | self.insertHtmlElement() | ||
640 | return token | ||
641 | |||
642 | def processEndTag(self, token): | ||
643 | if token["name"] not in ("head", "body", "html", "br"): | ||
644 | self.parser.parseError("unexpected-end-tag-before-html", | ||
645 | {"name": token["name"]}) | ||
646 | else: | ||
647 | self.insertHtmlElement() | ||
648 | return token | ||
649 | |||
650 | class BeforeHeadPhase(Phase): | ||
651 | def __init__(self, parser, tree): | ||
652 | Phase.__init__(self, parser, tree) | ||
653 | |||
654 | self.startTagHandler = _utils.MethodDispatcher([ | ||
655 | ("html", self.startTagHtml), | ||
656 | ("head", self.startTagHead) | ||
657 | ]) | ||
658 | self.startTagHandler.default = self.startTagOther | ||
659 | |||
660 | self.endTagHandler = _utils.MethodDispatcher([ | ||
661 | (("head", "body", "html", "br"), self.endTagImplyHead) | ||
662 | ]) | ||
663 | self.endTagHandler.default = self.endTagOther | ||
664 | |||
665 | def processEOF(self): | ||
666 | self.startTagHead(impliedTagToken("head", "StartTag")) | ||
667 | return True | ||
668 | |||
669 | def processSpaceCharacters(self, token): | ||
670 | pass | ||
671 | |||
672 | def processCharacters(self, token): | ||
673 | self.startTagHead(impliedTagToken("head", "StartTag")) | ||
674 | return token | ||
675 | |||
676 | def startTagHtml(self, token): | ||
677 | return self.parser.phases["inBody"].processStartTag(token) | ||
678 | |||
679 | def startTagHead(self, token): | ||
680 | self.tree.insertElement(token) | ||
681 | self.tree.headPointer = self.tree.openElements[-1] | ||
682 | self.parser.phase = self.parser.phases["inHead"] | ||
683 | |||
684 | def startTagOther(self, token): | ||
685 | self.startTagHead(impliedTagToken("head", "StartTag")) | ||
686 | return token | ||
687 | |||
688 | def endTagImplyHead(self, token): | ||
689 | self.startTagHead(impliedTagToken("head", "StartTag")) | ||
690 | return token | ||
691 | |||
692 | def endTagOther(self, token): | ||
693 | self.parser.parseError("end-tag-after-implied-root", | ||
694 | {"name": token["name"]}) | ||
695 | |||
696 | class InHeadPhase(Phase): | ||
697 | def __init__(self, parser, tree): | ||
698 | Phase.__init__(self, parser, tree) | ||
699 | |||
700 | self.startTagHandler = _utils.MethodDispatcher([ | ||
701 | ("html", self.startTagHtml), | ||
702 | ("title", self.startTagTitle), | ||
703 | (("noframes", "style"), self.startTagNoFramesStyle), | ||
704 | ("noscript", self.startTagNoscript), | ||
705 | ("script", self.startTagScript), | ||
706 | (("base", "basefont", "bgsound", "command", "link"), | ||
707 | self.startTagBaseLinkCommand), | ||
708 | ("meta", self.startTagMeta), | ||
709 | ("head", self.startTagHead) | ||
710 | ]) | ||
711 | self.startTagHandler.default = self.startTagOther | ||
712 | |||
713 | self.endTagHandler = _utils.MethodDispatcher([ | ||
714 | ("head", self.endTagHead), | ||
715 | (("br", "html", "body"), self.endTagHtmlBodyBr) | ||
716 | ]) | ||
717 | self.endTagHandler.default = self.endTagOther | ||
718 | |||
719 | # the real thing | ||
720 | def processEOF(self): | ||
721 | self.anythingElse() | ||
722 | return True | ||
723 | |||
724 | def processCharacters(self, token): | ||
725 | self.anythingElse() | ||
726 | return token | ||
727 | |||
728 | def startTagHtml(self, token): | ||
729 | return self.parser.phases["inBody"].processStartTag(token) | ||
730 | |||
731 | def startTagHead(self, token): | ||
732 | self.parser.parseError("two-heads-are-not-better-than-one") | ||
733 | |||
734 | def startTagBaseLinkCommand(self, token): | ||
735 | self.tree.insertElement(token) | ||
736 | self.tree.openElements.pop() | ||
737 | token["selfClosingAcknowledged"] = True | ||
738 | |||
739 | def startTagMeta(self, token): | ||
740 | self.tree.insertElement(token) | ||
741 | self.tree.openElements.pop() | ||
742 | token["selfClosingAcknowledged"] = True | ||
743 | |||
744 | attributes = token["data"] | ||
745 | if self.parser.tokenizer.stream.charEncoding[1] == "tentative": | ||
746 | if "charset" in attributes: | ||
747 | self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) | ||
748 | elif ("content" in attributes and | ||
749 | "http-equiv" in attributes and | ||
750 | attributes["http-equiv"].lower() == "content-type"): | ||
751 | # Encoding it as UTF-8 here is a hack, as really we should pass | ||
752 | # the abstract Unicode string, and just use the | ||
753 | # ContentAttrParser on that, but using UTF-8 allows all chars | ||
754 | # to be encoded and as a ASCII-superset works. | ||
755 | data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) | ||
756 | parser = _inputstream.ContentAttrParser(data) | ||
757 | codec = parser.parse() | ||
758 | self.parser.tokenizer.stream.changeEncoding(codec) | ||
759 | |||
760 | def startTagTitle(self, token): | ||
761 | self.parser.parseRCDataRawtext(token, "RCDATA") | ||
762 | |||
763 | def startTagNoFramesStyle(self, token): | ||
764 | # Need to decide whether to implement the scripting-disabled case | ||
765 | self.parser.parseRCDataRawtext(token, "RAWTEXT") | ||
766 | |||
767 | def startTagNoscript(self, token): | ||
768 | if self.parser.scripting: | ||
769 | self.parser.parseRCDataRawtext(token, "RAWTEXT") | ||
770 | else: | ||
771 | self.tree.insertElement(token) | ||
772 | self.parser.phase = self.parser.phases["inHeadNoscript"] | ||
773 | |||
774 | def startTagScript(self, token): | ||
775 | self.tree.insertElement(token) | ||
776 | self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState | ||
777 | self.parser.originalPhase = self.parser.phase | ||
778 | self.parser.phase = self.parser.phases["text"] | ||
779 | |||
780 | def startTagOther(self, token): | ||
781 | self.anythingElse() | ||
782 | return token | ||
783 | |||
784 | def endTagHead(self, token): | ||
785 | node = self.parser.tree.openElements.pop() | ||
786 | assert node.name == "head", "Expected head got %s" % node.name | ||
787 | self.parser.phase = self.parser.phases["afterHead"] | ||
788 | |||
789 | def endTagHtmlBodyBr(self, token): | ||
790 | self.anythingElse() | ||
791 | return token | ||
792 | |||
793 | def endTagOther(self, token): | ||
794 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
795 | |||
796 | def anythingElse(self): | ||
797 | self.endTagHead(impliedTagToken("head")) | ||
798 | |||
799 | class InHeadNoscriptPhase(Phase): | ||
800 | def __init__(self, parser, tree): | ||
801 | Phase.__init__(self, parser, tree) | ||
802 | |||
803 | self.startTagHandler = _utils.MethodDispatcher([ | ||
804 | ("html", self.startTagHtml), | ||
805 | (("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand), | ||
806 | (("head", "noscript"), self.startTagHeadNoscript), | ||
807 | ]) | ||
808 | self.startTagHandler.default = self.startTagOther | ||
809 | |||
810 | self.endTagHandler = _utils.MethodDispatcher([ | ||
811 | ("noscript", self.endTagNoscript), | ||
812 | ("br", self.endTagBr), | ||
813 | ]) | ||
814 | self.endTagHandler.default = self.endTagOther | ||
815 | |||
816 | def processEOF(self): | ||
817 | self.parser.parseError("eof-in-head-noscript") | ||
818 | self.anythingElse() | ||
819 | return True | ||
820 | |||
821 | def processComment(self, token): | ||
822 | return self.parser.phases["inHead"].processComment(token) | ||
823 | |||
824 | def processCharacters(self, token): | ||
825 | self.parser.parseError("char-in-head-noscript") | ||
826 | self.anythingElse() | ||
827 | return token | ||
828 | |||
829 | def processSpaceCharacters(self, token): | ||
830 | return self.parser.phases["inHead"].processSpaceCharacters(token) | ||
831 | |||
832 | def startTagHtml(self, token): | ||
833 | return self.parser.phases["inBody"].processStartTag(token) | ||
834 | |||
835 | def startTagBaseLinkCommand(self, token): | ||
836 | return self.parser.phases["inHead"].processStartTag(token) | ||
837 | |||
838 | def startTagHeadNoscript(self, token): | ||
839 | self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) | ||
840 | |||
841 | def startTagOther(self, token): | ||
842 | self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) | ||
843 | self.anythingElse() | ||
844 | return token | ||
845 | |||
846 | def endTagNoscript(self, token): | ||
847 | node = self.parser.tree.openElements.pop() | ||
848 | assert node.name == "noscript", "Expected noscript got %s" % node.name | ||
849 | self.parser.phase = self.parser.phases["inHead"] | ||
850 | |||
851 | def endTagBr(self, token): | ||
852 | self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) | ||
853 | self.anythingElse() | ||
854 | return token | ||
855 | |||
856 | def endTagOther(self, token): | ||
857 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
858 | |||
859 | def anythingElse(self): | ||
860 | # Caller must raise parse error first! | ||
861 | self.endTagNoscript(impliedTagToken("noscript")) | ||
862 | |||
863 | class AfterHeadPhase(Phase): | ||
864 | def __init__(self, parser, tree): | ||
865 | Phase.__init__(self, parser, tree) | ||
866 | |||
867 | self.startTagHandler = _utils.MethodDispatcher([ | ||
868 | ("html", self.startTagHtml), | ||
869 | ("body", self.startTagBody), | ||
870 | ("frameset", self.startTagFrameset), | ||
871 | (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", | ||
872 | "style", "title"), | ||
873 | self.startTagFromHead), | ||
874 | ("head", self.startTagHead) | ||
875 | ]) | ||
876 | self.startTagHandler.default = self.startTagOther | ||
877 | self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), | ||
878 | self.endTagHtmlBodyBr)]) | ||
879 | self.endTagHandler.default = self.endTagOther | ||
880 | |||
881 | def processEOF(self): | ||
882 | self.anythingElse() | ||
883 | return True | ||
884 | |||
885 | def processCharacters(self, token): | ||
886 | self.anythingElse() | ||
887 | return token | ||
888 | |||
889 | def startTagHtml(self, token): | ||
890 | return self.parser.phases["inBody"].processStartTag(token) | ||
891 | |||
892 | def startTagBody(self, token): | ||
893 | self.parser.framesetOK = False | ||
894 | self.tree.insertElement(token) | ||
895 | self.parser.phase = self.parser.phases["inBody"] | ||
896 | |||
897 | def startTagFrameset(self, token): | ||
898 | self.tree.insertElement(token) | ||
899 | self.parser.phase = self.parser.phases["inFrameset"] | ||
900 | |||
901 | def startTagFromHead(self, token): | ||
902 | self.parser.parseError("unexpected-start-tag-out-of-my-head", | ||
903 | {"name": token["name"]}) | ||
904 | self.tree.openElements.append(self.tree.headPointer) | ||
905 | self.parser.phases["inHead"].processStartTag(token) | ||
906 | for node in self.tree.openElements[::-1]: | ||
907 | if node.name == "head": | ||
908 | self.tree.openElements.remove(node) | ||
909 | break | ||
910 | |||
911 | def startTagHead(self, token): | ||
912 | self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) | ||
913 | |||
914 | def startTagOther(self, token): | ||
915 | self.anythingElse() | ||
916 | return token | ||
917 | |||
918 | def endTagHtmlBodyBr(self, token): | ||
919 | self.anythingElse() | ||
920 | return token | ||
921 | |||
922 | def endTagOther(self, token): | ||
923 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
924 | |||
925 | def anythingElse(self): | ||
926 | self.tree.insertElement(impliedTagToken("body", "StartTag")) | ||
927 | self.parser.phase = self.parser.phases["inBody"] | ||
928 | self.parser.framesetOK = True | ||
929 | |||
930 | class InBodyPhase(Phase): | ||
931 | # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody | ||
932 | # the really-really-really-very crazy mode | ||
933 | def __init__(self, parser, tree): | ||
934 | Phase.__init__(self, parser, tree) | ||
935 | |||
936 | # Set this to the default handler | ||
937 | self.processSpaceCharacters = self.processSpaceCharactersNonPre | ||
938 | |||
939 | self.startTagHandler = _utils.MethodDispatcher([ | ||
940 | ("html", self.startTagHtml), | ||
941 | (("base", "basefont", "bgsound", "command", "link", "meta", | ||
942 | "script", "style", "title"), | ||
943 | self.startTagProcessInHead), | ||
944 | ("body", self.startTagBody), | ||
945 | ("frameset", self.startTagFrameset), | ||
946 | (("address", "article", "aside", "blockquote", "center", "details", | ||
947 | "dir", "div", "dl", "fieldset", "figcaption", "figure", | ||
948 | "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p", | ||
949 | "section", "summary", "ul"), | ||
950 | self.startTagCloseP), | ||
951 | (headingElements, self.startTagHeading), | ||
952 | (("pre", "listing"), self.startTagPreListing), | ||
953 | ("form", self.startTagForm), | ||
954 | (("li", "dd", "dt"), self.startTagListItem), | ||
955 | ("plaintext", self.startTagPlaintext), | ||
956 | ("a", self.startTagA), | ||
957 | (("b", "big", "code", "em", "font", "i", "s", "small", "strike", | ||
958 | "strong", "tt", "u"), self.startTagFormatting), | ||
959 | ("nobr", self.startTagNobr), | ||
960 | ("button", self.startTagButton), | ||
961 | (("applet", "marquee", "object"), self.startTagAppletMarqueeObject), | ||
962 | ("xmp", self.startTagXmp), | ||
963 | ("table", self.startTagTable), | ||
964 | (("area", "br", "embed", "img", "keygen", "wbr"), | ||
965 | self.startTagVoidFormatting), | ||
966 | (("param", "source", "track"), self.startTagParamSource), | ||
967 | ("input", self.startTagInput), | ||
968 | ("hr", self.startTagHr), | ||
969 | ("image", self.startTagImage), | ||
970 | ("isindex", self.startTagIsIndex), | ||
971 | ("textarea", self.startTagTextarea), | ||
972 | ("iframe", self.startTagIFrame), | ||
973 | ("noscript", self.startTagNoscript), | ||
974 | (("noembed", "noframes"), self.startTagRawtext), | ||
975 | ("select", self.startTagSelect), | ||
976 | (("rp", "rt"), self.startTagRpRt), | ||
977 | (("option", "optgroup"), self.startTagOpt), | ||
978 | (("math"), self.startTagMath), | ||
979 | (("svg"), self.startTagSvg), | ||
980 | (("caption", "col", "colgroup", "frame", "head", | ||
981 | "tbody", "td", "tfoot", "th", "thead", | ||
982 | "tr"), self.startTagMisplaced) | ||
983 | ]) | ||
984 | self.startTagHandler.default = self.startTagOther | ||
985 | |||
986 | self.endTagHandler = _utils.MethodDispatcher([ | ||
987 | ("body", self.endTagBody), | ||
988 | ("html", self.endTagHtml), | ||
989 | (("address", "article", "aside", "blockquote", "button", "center", | ||
990 | "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", | ||
991 | "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre", | ||
992 | "section", "summary", "ul"), self.endTagBlock), | ||
993 | ("form", self.endTagForm), | ||
994 | ("p", self.endTagP), | ||
995 | (("dd", "dt", "li"), self.endTagListItem), | ||
996 | (headingElements, self.endTagHeading), | ||
997 | (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", | ||
998 | "strike", "strong", "tt", "u"), self.endTagFormatting), | ||
999 | (("applet", "marquee", "object"), self.endTagAppletMarqueeObject), | ||
1000 | ("br", self.endTagBr), | ||
1001 | ]) | ||
1002 | self.endTagHandler.default = self.endTagOther | ||
1003 | |||
1004 | def isMatchingFormattingElement(self, node1, node2): | ||
1005 | return (node1.name == node2.name and | ||
1006 | node1.namespace == node2.namespace and | ||
1007 | node1.attributes == node2.attributes) | ||
1008 | |||
1009 | # helper | ||
1010 | def addFormattingElement(self, token): | ||
1011 | self.tree.insertElement(token) | ||
1012 | element = self.tree.openElements[-1] | ||
1013 | |||
1014 | matchingElements = [] | ||
1015 | for node in self.tree.activeFormattingElements[::-1]: | ||
1016 | if node is Marker: | ||
1017 | break | ||
1018 | elif self.isMatchingFormattingElement(node, element): | ||
1019 | matchingElements.append(node) | ||
1020 | |||
1021 | assert len(matchingElements) <= 3 | ||
1022 | if len(matchingElements) == 3: | ||
1023 | self.tree.activeFormattingElements.remove(matchingElements[-1]) | ||
1024 | self.tree.activeFormattingElements.append(element) | ||
1025 | |||
1026 | # the real deal | ||
1027 | def processEOF(self): | ||
1028 | allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td", | ||
1029 | "tfoot", "th", "thead", "tr", "body", | ||
1030 | "html")) | ||
1031 | for node in self.tree.openElements[::-1]: | ||
1032 | if node.name not in allowed_elements: | ||
1033 | self.parser.parseError("expected-closing-tag-but-got-eof") | ||
1034 | break | ||
1035 | # Stop parsing | ||
1036 | |||
1037 | def processSpaceCharactersDropNewline(self, token): | ||
1038 | # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we | ||
1039 | # want to drop leading newlines | ||
1040 | data = token["data"] | ||
1041 | self.processSpaceCharacters = self.processSpaceCharactersNonPre | ||
1042 | if (data.startswith("\n") and | ||
1043 | self.tree.openElements[-1].name in ("pre", "listing", "textarea") and | ||
1044 | not self.tree.openElements[-1].hasContent()): | ||
1045 | data = data[1:] | ||
1046 | if data: | ||
1047 | self.tree.reconstructActiveFormattingElements() | ||
1048 | self.tree.insertText(data) | ||
1049 | |||
1050 | def processCharacters(self, token): | ||
1051 | if token["data"] == "\u0000": | ||
1052 | # The tokenizer should always emit null on its own | ||
1053 | return | ||
1054 | self.tree.reconstructActiveFormattingElements() | ||
1055 | self.tree.insertText(token["data"]) | ||
1056 | # This must be bad for performance | ||
1057 | if (self.parser.framesetOK and | ||
1058 | any([char not in spaceCharacters | ||
1059 | for char in token["data"]])): | ||
1060 | self.parser.framesetOK = False | ||
1061 | |||
1062 | def processSpaceCharactersNonPre(self, token): | ||
1063 | self.tree.reconstructActiveFormattingElements() | ||
1064 | self.tree.insertText(token["data"]) | ||
1065 | |||
1066 | def startTagProcessInHead(self, token): | ||
1067 | return self.parser.phases["inHead"].processStartTag(token) | ||
1068 | |||
1069 | def startTagBody(self, token): | ||
1070 | self.parser.parseError("unexpected-start-tag", {"name": "body"}) | ||
1071 | if (len(self.tree.openElements) == 1 or | ||
1072 | self.tree.openElements[1].name != "body"): | ||
1073 | assert self.parser.innerHTML | ||
1074 | else: | ||
1075 | self.parser.framesetOK = False | ||
1076 | for attr, value in token["data"].items(): | ||
1077 | if attr not in self.tree.openElements[1].attributes: | ||
1078 | self.tree.openElements[1].attributes[attr] = value | ||
1079 | |||
1080 | def startTagFrameset(self, token): | ||
1081 | self.parser.parseError("unexpected-start-tag", {"name": "frameset"}) | ||
1082 | if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"): | ||
1083 | assert self.parser.innerHTML | ||
1084 | elif not self.parser.framesetOK: | ||
1085 | pass | ||
1086 | else: | ||
1087 | if self.tree.openElements[1].parent: | ||
1088 | self.tree.openElements[1].parent.removeChild(self.tree.openElements[1]) | ||
1089 | while self.tree.openElements[-1].name != "html": | ||
1090 | self.tree.openElements.pop() | ||
1091 | self.tree.insertElement(token) | ||
1092 | self.parser.phase = self.parser.phases["inFrameset"] | ||
1093 | |||
1094 | def startTagCloseP(self, token): | ||
1095 | if self.tree.elementInScope("p", variant="button"): | ||
1096 | self.endTagP(impliedTagToken("p")) | ||
1097 | self.tree.insertElement(token) | ||
1098 | |||
1099 | def startTagPreListing(self, token): | ||
1100 | if self.tree.elementInScope("p", variant="button"): | ||
1101 | self.endTagP(impliedTagToken("p")) | ||
1102 | self.tree.insertElement(token) | ||
1103 | self.parser.framesetOK = False | ||
1104 | self.processSpaceCharacters = self.processSpaceCharactersDropNewline | ||
1105 | |||
1106 | def startTagForm(self, token): | ||
1107 | if self.tree.formPointer: | ||
1108 | self.parser.parseError("unexpected-start-tag", {"name": "form"}) | ||
1109 | else: | ||
1110 | if self.tree.elementInScope("p", variant="button"): | ||
1111 | self.endTagP(impliedTagToken("p")) | ||
1112 | self.tree.insertElement(token) | ||
1113 | self.tree.formPointer = self.tree.openElements[-1] | ||
1114 | |||
1115 | def startTagListItem(self, token): | ||
1116 | self.parser.framesetOK = False | ||
1117 | |||
1118 | stopNamesMap = {"li": ["li"], | ||
1119 | "dt": ["dt", "dd"], | ||
1120 | "dd": ["dt", "dd"]} | ||
1121 | stopNames = stopNamesMap[token["name"]] | ||
1122 | for node in reversed(self.tree.openElements): | ||
1123 | if node.name in stopNames: | ||
1124 | self.parser.phase.processEndTag( | ||
1125 | impliedTagToken(node.name, "EndTag")) | ||
1126 | break | ||
1127 | if (node.nameTuple in specialElements and | ||
1128 | node.name not in ("address", "div", "p")): | ||
1129 | break | ||
1130 | |||
1131 | if self.tree.elementInScope("p", variant="button"): | ||
1132 | self.parser.phase.processEndTag( | ||
1133 | impliedTagToken("p", "EndTag")) | ||
1134 | |||
1135 | self.tree.insertElement(token) | ||
1136 | |||
1137 | def startTagPlaintext(self, token): | ||
1138 | if self.tree.elementInScope("p", variant="button"): | ||
1139 | self.endTagP(impliedTagToken("p")) | ||
1140 | self.tree.insertElement(token) | ||
1141 | self.parser.tokenizer.state = self.parser.tokenizer.plaintextState | ||
1142 | |||
1143 | def startTagHeading(self, token): | ||
1144 | if self.tree.elementInScope("p", variant="button"): | ||
1145 | self.endTagP(impliedTagToken("p")) | ||
1146 | if self.tree.openElements[-1].name in headingElements: | ||
1147 | self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) | ||
1148 | self.tree.openElements.pop() | ||
1149 | self.tree.insertElement(token) | ||
1150 | |||
1151 | def startTagA(self, token): | ||
1152 | afeAElement = self.tree.elementInActiveFormattingElements("a") | ||
1153 | if afeAElement: | ||
1154 | self.parser.parseError("unexpected-start-tag-implies-end-tag", | ||
1155 | {"startName": "a", "endName": "a"}) | ||
1156 | self.endTagFormatting(impliedTagToken("a")) | ||
1157 | if afeAElement in self.tree.openElements: | ||
1158 | self.tree.openElements.remove(afeAElement) | ||
1159 | if afeAElement in self.tree.activeFormattingElements: | ||
1160 | self.tree.activeFormattingElements.remove(afeAElement) | ||
1161 | self.tree.reconstructActiveFormattingElements() | ||
1162 | self.addFormattingElement(token) | ||
1163 | |||
1164 | def startTagFormatting(self, token): | ||
1165 | self.tree.reconstructActiveFormattingElements() | ||
1166 | self.addFormattingElement(token) | ||
1167 | |||
1168 | def startTagNobr(self, token): | ||
1169 | self.tree.reconstructActiveFormattingElements() | ||
1170 | if self.tree.elementInScope("nobr"): | ||
1171 | self.parser.parseError("unexpected-start-tag-implies-end-tag", | ||
1172 | {"startName": "nobr", "endName": "nobr"}) | ||
1173 | self.processEndTag(impliedTagToken("nobr")) | ||
1174 | # XXX Need tests that trigger the following | ||
1175 | self.tree.reconstructActiveFormattingElements() | ||
1176 | self.addFormattingElement(token) | ||
1177 | |||
1178 | def startTagButton(self, token): | ||
1179 | if self.tree.elementInScope("button"): | ||
1180 | self.parser.parseError("unexpected-start-tag-implies-end-tag", | ||
1181 | {"startName": "button", "endName": "button"}) | ||
1182 | self.processEndTag(impliedTagToken("button")) | ||
1183 | return token | ||
1184 | else: | ||
1185 | self.tree.reconstructActiveFormattingElements() | ||
1186 | self.tree.insertElement(token) | ||
1187 | self.parser.framesetOK = False | ||
1188 | |||
1189 | def startTagAppletMarqueeObject(self, token): | ||
1190 | self.tree.reconstructActiveFormattingElements() | ||
1191 | self.tree.insertElement(token) | ||
1192 | self.tree.activeFormattingElements.append(Marker) | ||
1193 | self.parser.framesetOK = False | ||
1194 | |||
1195 | def startTagXmp(self, token): | ||
1196 | if self.tree.elementInScope("p", variant="button"): | ||
1197 | self.endTagP(impliedTagToken("p")) | ||
1198 | self.tree.reconstructActiveFormattingElements() | ||
1199 | self.parser.framesetOK = False | ||
1200 | self.parser.parseRCDataRawtext(token, "RAWTEXT") | ||
1201 | |||
1202 | def startTagTable(self, token): | ||
1203 | if self.parser.compatMode != "quirks": | ||
1204 | if self.tree.elementInScope("p", variant="button"): | ||
1205 | self.processEndTag(impliedTagToken("p")) | ||
1206 | self.tree.insertElement(token) | ||
1207 | self.parser.framesetOK = False | ||
1208 | self.parser.phase = self.parser.phases["inTable"] | ||
1209 | |||
1210 | def startTagVoidFormatting(self, token): | ||
1211 | self.tree.reconstructActiveFormattingElements() | ||
1212 | self.tree.insertElement(token) | ||
1213 | self.tree.openElements.pop() | ||
1214 | token["selfClosingAcknowledged"] = True | ||
1215 | self.parser.framesetOK = False | ||
1216 | |||
1217 | def startTagInput(self, token): | ||
1218 | framesetOK = self.parser.framesetOK | ||
1219 | self.startTagVoidFormatting(token) | ||
1220 | if ("type" in token["data"] and | ||
1221 | token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): | ||
1222 | # input type=hidden doesn't change framesetOK | ||
1223 | self.parser.framesetOK = framesetOK | ||
1224 | |||
1225 | def startTagParamSource(self, token): | ||
1226 | self.tree.insertElement(token) | ||
1227 | self.tree.openElements.pop() | ||
1228 | token["selfClosingAcknowledged"] = True | ||
1229 | |||
1230 | def startTagHr(self, token): | ||
1231 | if self.tree.elementInScope("p", variant="button"): | ||
1232 | self.endTagP(impliedTagToken("p")) | ||
1233 | self.tree.insertElement(token) | ||
1234 | self.tree.openElements.pop() | ||
1235 | token["selfClosingAcknowledged"] = True | ||
1236 | self.parser.framesetOK = False | ||
1237 | |||
1238 | def startTagImage(self, token): | ||
1239 | # No really... | ||
1240 | self.parser.parseError("unexpected-start-tag-treated-as", | ||
1241 | {"originalName": "image", "newName": "img"}) | ||
1242 | self.processStartTag(impliedTagToken("img", "StartTag", | ||
1243 | attributes=token["data"], | ||
1244 | selfClosing=token["selfClosing"])) | ||
1245 | |||
1246 | def startTagIsIndex(self, token): | ||
1247 | self.parser.parseError("deprecated-tag", {"name": "isindex"}) | ||
1248 | if self.tree.formPointer: | ||
1249 | return | ||
1250 | form_attrs = {} | ||
1251 | if "action" in token["data"]: | ||
1252 | form_attrs["action"] = token["data"]["action"] | ||
1253 | self.processStartTag(impliedTagToken("form", "StartTag", | ||
1254 | attributes=form_attrs)) | ||
1255 | self.processStartTag(impliedTagToken("hr", "StartTag")) | ||
1256 | self.processStartTag(impliedTagToken("label", "StartTag")) | ||
1257 | # XXX Localization ... | ||
1258 | if "prompt" in token["data"]: | ||
1259 | prompt = token["data"]["prompt"] | ||
1260 | else: | ||
1261 | prompt = "This is a searchable index. Enter search keywords: " | ||
1262 | self.processCharacters( | ||
1263 | {"type": tokenTypes["Characters"], "data": prompt}) | ||
1264 | attributes = token["data"].copy() | ||
1265 | if "action" in attributes: | ||
1266 | del attributes["action"] | ||
1267 | if "prompt" in attributes: | ||
1268 | del attributes["prompt"] | ||
1269 | attributes["name"] = "isindex" | ||
1270 | self.processStartTag(impliedTagToken("input", "StartTag", | ||
1271 | attributes=attributes, | ||
1272 | selfClosing=token["selfClosing"])) | ||
1273 | self.processEndTag(impliedTagToken("label")) | ||
1274 | self.processStartTag(impliedTagToken("hr", "StartTag")) | ||
1275 | self.processEndTag(impliedTagToken("form")) | ||
1276 | |||
1277 | def startTagTextarea(self, token): | ||
1278 | self.tree.insertElement(token) | ||
1279 | self.parser.tokenizer.state = self.parser.tokenizer.rcdataState | ||
1280 | self.processSpaceCharacters = self.processSpaceCharactersDropNewline | ||
1281 | self.parser.framesetOK = False | ||
1282 | |||
1283 | def startTagIFrame(self, token): | ||
1284 | self.parser.framesetOK = False | ||
1285 | self.startTagRawtext(token) | ||
1286 | |||
1287 | def startTagNoscript(self, token): | ||
1288 | if self.parser.scripting: | ||
1289 | self.startTagRawtext(token) | ||
1290 | else: | ||
1291 | self.startTagOther(token) | ||
1292 | |||
1293 | def startTagRawtext(self, token): | ||
1294 | """iframe, noembed noframes, noscript(if scripting enabled)""" | ||
1295 | self.parser.parseRCDataRawtext(token, "RAWTEXT") | ||
1296 | |||
1297 | def startTagOpt(self, token): | ||
1298 | if self.tree.openElements[-1].name == "option": | ||
1299 | self.parser.phase.processEndTag(impliedTagToken("option")) | ||
1300 | self.tree.reconstructActiveFormattingElements() | ||
1301 | self.parser.tree.insertElement(token) | ||
1302 | |||
1303 | def startTagSelect(self, token): | ||
1304 | self.tree.reconstructActiveFormattingElements() | ||
1305 | self.tree.insertElement(token) | ||
1306 | self.parser.framesetOK = False | ||
1307 | if self.parser.phase in (self.parser.phases["inTable"], | ||
1308 | self.parser.phases["inCaption"], | ||
1309 | self.parser.phases["inColumnGroup"], | ||
1310 | self.parser.phases["inTableBody"], | ||
1311 | self.parser.phases["inRow"], | ||
1312 | self.parser.phases["inCell"]): | ||
1313 | self.parser.phase = self.parser.phases["inSelectInTable"] | ||
1314 | else: | ||
1315 | self.parser.phase = self.parser.phases["inSelect"] | ||
1316 | |||
1317 | def startTagRpRt(self, token): | ||
1318 | if self.tree.elementInScope("ruby"): | ||
1319 | self.tree.generateImpliedEndTags() | ||
1320 | if self.tree.openElements[-1].name != "ruby": | ||
1321 | self.parser.parseError() | ||
1322 | self.tree.insertElement(token) | ||
1323 | |||
1324 | def startTagMath(self, token): | ||
1325 | self.tree.reconstructActiveFormattingElements() | ||
1326 | self.parser.adjustMathMLAttributes(token) | ||
1327 | self.parser.adjustForeignAttributes(token) | ||
1328 | token["namespace"] = namespaces["mathml"] | ||
1329 | self.tree.insertElement(token) | ||
1330 | # Need to get the parse error right for the case where the token | ||
1331 | # has a namespace not equal to the xmlns attribute | ||
1332 | if token["selfClosing"]: | ||
1333 | self.tree.openElements.pop() | ||
1334 | token["selfClosingAcknowledged"] = True | ||
1335 | |||
1336 | def startTagSvg(self, token): | ||
1337 | self.tree.reconstructActiveFormattingElements() | ||
1338 | self.parser.adjustSVGAttributes(token) | ||
1339 | self.parser.adjustForeignAttributes(token) | ||
1340 | token["namespace"] = namespaces["svg"] | ||
1341 | self.tree.insertElement(token) | ||
1342 | # Need to get the parse error right for the case where the token | ||
1343 | # has a namespace not equal to the xmlns attribute | ||
1344 | if token["selfClosing"]: | ||
1345 | self.tree.openElements.pop() | ||
1346 | token["selfClosingAcknowledged"] = True | ||
1347 | |||
1348 | def startTagMisplaced(self, token): | ||
1349 | """ Elements that should be children of other elements that have a | ||
1350 | different insertion mode; here they are ignored | ||
1351 | "caption", "col", "colgroup", "frame", "frameset", "head", | ||
1352 | "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", | ||
1353 | "tr", "noscript" | ||
1354 | """ | ||
1355 | self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]}) | ||
1356 | |||
1357 | def startTagOther(self, token): | ||
1358 | self.tree.reconstructActiveFormattingElements() | ||
1359 | self.tree.insertElement(token) | ||
1360 | |||
1361 | def endTagP(self, token): | ||
1362 | if not self.tree.elementInScope("p", variant="button"): | ||
1363 | self.startTagCloseP(impliedTagToken("p", "StartTag")) | ||
1364 | self.parser.parseError("unexpected-end-tag", {"name": "p"}) | ||
1365 | self.endTagP(impliedTagToken("p", "EndTag")) | ||
1366 | else: | ||
1367 | self.tree.generateImpliedEndTags("p") | ||
1368 | if self.tree.openElements[-1].name != "p": | ||
1369 | self.parser.parseError("unexpected-end-tag", {"name": "p"}) | ||
1370 | node = self.tree.openElements.pop() | ||
1371 | while node.name != "p": | ||
1372 | node = self.tree.openElements.pop() | ||
1373 | |||
1374 | def endTagBody(self, token): | ||
1375 | if not self.tree.elementInScope("body"): | ||
1376 | self.parser.parseError() | ||
1377 | return | ||
1378 | elif self.tree.openElements[-1].name != "body": | ||
1379 | for node in self.tree.openElements[2:]: | ||
1380 | if node.name not in frozenset(("dd", "dt", "li", "optgroup", | ||
1381 | "option", "p", "rp", "rt", | ||
1382 | "tbody", "td", "tfoot", | ||
1383 | "th", "thead", "tr", "body", | ||
1384 | "html")): | ||
1385 | # Not sure this is the correct name for the parse error | ||
1386 | self.parser.parseError( | ||
1387 | "expected-one-end-tag-but-got-another", | ||
1388 | {"gotName": "body", "expectedName": node.name}) | ||
1389 | break | ||
1390 | self.parser.phase = self.parser.phases["afterBody"] | ||
1391 | |||
1392 | def endTagHtml(self, token): | ||
1393 | # We repeat the test for the body end tag token being ignored here | ||
1394 | if self.tree.elementInScope("body"): | ||
1395 | self.endTagBody(impliedTagToken("body")) | ||
1396 | return token | ||
1397 | |||
1398 | def endTagBlock(self, token): | ||
1399 | # Put us back in the right whitespace handling mode | ||
1400 | if token["name"] == "pre": | ||
1401 | self.processSpaceCharacters = self.processSpaceCharactersNonPre | ||
1402 | inScope = self.tree.elementInScope(token["name"]) | ||
1403 | if inScope: | ||
1404 | self.tree.generateImpliedEndTags() | ||
1405 | if self.tree.openElements[-1].name != token["name"]: | ||
1406 | self.parser.parseError("end-tag-too-early", {"name": token["name"]}) | ||
1407 | if inScope: | ||
1408 | node = self.tree.openElements.pop() | ||
1409 | while node.name != token["name"]: | ||
1410 | node = self.tree.openElements.pop() | ||
1411 | |||
1412 | def endTagForm(self, token): | ||
1413 | node = self.tree.formPointer | ||
1414 | self.tree.formPointer = None | ||
1415 | if node is None or not self.tree.elementInScope(node): | ||
1416 | self.parser.parseError("unexpected-end-tag", | ||
1417 | {"name": "form"}) | ||
1418 | else: | ||
1419 | self.tree.generateImpliedEndTags() | ||
1420 | if self.tree.openElements[-1] != node: | ||
1421 | self.parser.parseError("end-tag-too-early-ignored", | ||
1422 | {"name": "form"}) | ||
1423 | self.tree.openElements.remove(node) | ||
1424 | |||
1425 | def endTagListItem(self, token): | ||
1426 | if token["name"] == "li": | ||
1427 | variant = "list" | ||
1428 | else: | ||
1429 | variant = None | ||
1430 | if not self.tree.elementInScope(token["name"], variant=variant): | ||
1431 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
1432 | else: | ||
1433 | self.tree.generateImpliedEndTags(exclude=token["name"]) | ||
1434 | if self.tree.openElements[-1].name != token["name"]: | ||
1435 | self.parser.parseError( | ||
1436 | "end-tag-too-early", | ||
1437 | {"name": token["name"]}) | ||
1438 | node = self.tree.openElements.pop() | ||
1439 | while node.name != token["name"]: | ||
1440 | node = self.tree.openElements.pop() | ||
1441 | |||
1442 | def endTagHeading(self, token): | ||
1443 | for item in headingElements: | ||
1444 | if self.tree.elementInScope(item): | ||
1445 | self.tree.generateImpliedEndTags() | ||
1446 | break | ||
1447 | if self.tree.openElements[-1].name != token["name"]: | ||
1448 | self.parser.parseError("end-tag-too-early", {"name": token["name"]}) | ||
1449 | |||
1450 | for item in headingElements: | ||
1451 | if self.tree.elementInScope(item): | ||
1452 | item = self.tree.openElements.pop() | ||
1453 | while item.name not in headingElements: | ||
1454 | item = self.tree.openElements.pop() | ||
1455 | break | ||
1456 | |||
1457 | def endTagFormatting(self, token): | ||
1458 | """The much-feared adoption agency algorithm""" | ||
1459 | # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867 | ||
1460 | # XXX Better parseError messages appreciated. | ||
1461 | |||
1462 | # Step 1 | ||
1463 | outerLoopCounter = 0 | ||
1464 | |||
1465 | # Step 2 | ||
1466 | while outerLoopCounter < 8: | ||
1467 | |||
1468 | # Step 3 | ||
1469 | outerLoopCounter += 1 | ||
1470 | |||
1471 | # Step 4: | ||
1472 | |||
1473 | # Let the formatting element be the last element in | ||
1474 | # the list of active formatting elements that: | ||
1475 | # - is between the end of the list and the last scope | ||
1476 | # marker in the list, if any, or the start of the list | ||
1477 | # otherwise, and | ||
1478 | # - has the same tag name as the token. | ||
1479 | formattingElement = self.tree.elementInActiveFormattingElements( | ||
1480 | token["name"]) | ||
1481 | if (not formattingElement or | ||
1482 | (formattingElement in self.tree.openElements and | ||
1483 | not self.tree.elementInScope(formattingElement.name))): | ||
1484 | # If there is no such node, then abort these steps | ||
1485 | # and instead act as described in the "any other | ||
1486 | # end tag" entry below. | ||
1487 | self.endTagOther(token) | ||
1488 | return | ||
1489 | |||
1490 | # Otherwise, if there is such a node, but that node is | ||
1491 | # not in the stack of open elements, then this is a | ||
1492 | # parse error; remove the element from the list, and | ||
1493 | # abort these steps. | ||
1494 | elif formattingElement not in self.tree.openElements: | ||
1495 | self.parser.parseError("adoption-agency-1.2", {"name": token["name"]}) | ||
1496 | self.tree.activeFormattingElements.remove(formattingElement) | ||
1497 | return | ||
1498 | |||
1499 | # Otherwise, if there is such a node, and that node is | ||
1500 | # also in the stack of open elements, but the element | ||
1501 | # is not in scope, then this is a parse error; ignore | ||
1502 | # the token, and abort these steps. | ||
1503 | elif not self.tree.elementInScope(formattingElement.name): | ||
1504 | self.parser.parseError("adoption-agency-4.4", {"name": token["name"]}) | ||
1505 | return | ||
1506 | |||
1507 | # Otherwise, there is a formatting element and that | ||
1508 | # element is in the stack and is in scope. If the | ||
1509 | # element is not the current node, this is a parse | ||
1510 | # error. In any case, proceed with the algorithm as | ||
1511 | # written in the following steps. | ||
1512 | else: | ||
1513 | if formattingElement != self.tree.openElements[-1]: | ||
1514 | self.parser.parseError("adoption-agency-1.3", {"name": token["name"]}) | ||
1515 | |||
1516 | # Step 5: | ||
1517 | |||
1518 | # Let the furthest block be the topmost node in the | ||
1519 | # stack of open elements that is lower in the stack | ||
1520 | # than the formatting element, and is an element in | ||
1521 | # the special category. There might not be one. | ||
1522 | afeIndex = self.tree.openElements.index(formattingElement) | ||
1523 | furthestBlock = None | ||
1524 | for element in self.tree.openElements[afeIndex:]: | ||
1525 | if element.nameTuple in specialElements: | ||
1526 | furthestBlock = element | ||
1527 | break | ||
1528 | |||
1529 | # Step 6: | ||
1530 | |||
1531 | # If there is no furthest block, then the UA must | ||
1532 | # first pop all the nodes from the bottom of the stack | ||
1533 | # of open elements, from the current node up to and | ||
1534 | # including the formatting element, then remove the | ||
1535 | # formatting element from the list of active | ||
1536 | # formatting elements, and finally abort these steps. | ||
1537 | if furthestBlock is None: | ||
1538 | element = self.tree.openElements.pop() | ||
1539 | while element != formattingElement: | ||
1540 | element = self.tree.openElements.pop() | ||
1541 | self.tree.activeFormattingElements.remove(element) | ||
1542 | return | ||
1543 | |||
1544 | # Step 7 | ||
1545 | commonAncestor = self.tree.openElements[afeIndex - 1] | ||
1546 | |||
1547 | # Step 8: | ||
1548 | # The bookmark is supposed to help us identify where to reinsert | ||
1549 | # nodes in step 15. We have to ensure that we reinsert nodes after | ||
1550 | # the node before the active formatting element. Note the bookmark | ||
1551 | # can move in step 9.7 | ||
1552 | bookmark = self.tree.activeFormattingElements.index(formattingElement) | ||
1553 | |||
1554 | # Step 9 | ||
1555 | lastNode = node = furthestBlock | ||
1556 | innerLoopCounter = 0 | ||
1557 | |||
1558 | index = self.tree.openElements.index(node) | ||
1559 | while innerLoopCounter < 3: | ||
1560 | innerLoopCounter += 1 | ||
1561 | # Node is element before node in open elements | ||
1562 | index -= 1 | ||
1563 | node = self.tree.openElements[index] | ||
1564 | if node not in self.tree.activeFormattingElements: | ||
1565 | self.tree.openElements.remove(node) | ||
1566 | continue | ||
1567 | # Step 9.6 | ||
1568 | if node == formattingElement: | ||
1569 | break | ||
1570 | # Step 9.7 | ||
1571 | if lastNode == furthestBlock: | ||
1572 | bookmark = self.tree.activeFormattingElements.index(node) + 1 | ||
1573 | # Step 9.8 | ||
1574 | clone = node.cloneNode() | ||
1575 | # Replace node with clone | ||
1576 | self.tree.activeFormattingElements[ | ||
1577 | self.tree.activeFormattingElements.index(node)] = clone | ||
1578 | self.tree.openElements[ | ||
1579 | self.tree.openElements.index(node)] = clone | ||
1580 | node = clone | ||
1581 | # Step 9.9 | ||
1582 | # Remove lastNode from its parents, if any | ||
1583 | if lastNode.parent: | ||
1584 | lastNode.parent.removeChild(lastNode) | ||
1585 | node.appendChild(lastNode) | ||
1586 | # Step 9.10 | ||
1587 | lastNode = node | ||
1588 | |||
1589 | # Step 10 | ||
1590 | # Foster parent lastNode if commonAncestor is a | ||
1591 | # table, tbody, tfoot, thead, or tr we need to foster | ||
1592 | # parent the lastNode | ||
1593 | if lastNode.parent: | ||
1594 | lastNode.parent.removeChild(lastNode) | ||
1595 | |||
1596 | if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")): | ||
1597 | parent, insertBefore = self.tree.getTableMisnestedNodePosition() | ||
1598 | parent.insertBefore(lastNode, insertBefore) | ||
1599 | else: | ||
1600 | commonAncestor.appendChild(lastNode) | ||
1601 | |||
1602 | # Step 11 | ||
1603 | clone = formattingElement.cloneNode() | ||
1604 | |||
1605 | # Step 12 | ||
1606 | furthestBlock.reparentChildren(clone) | ||
1607 | |||
1608 | # Step 13 | ||
1609 | furthestBlock.appendChild(clone) | ||
1610 | |||
1611 | # Step 14 | ||
1612 | self.tree.activeFormattingElements.remove(formattingElement) | ||
1613 | self.tree.activeFormattingElements.insert(bookmark, clone) | ||
1614 | |||
1615 | # Step 15 | ||
1616 | self.tree.openElements.remove(formattingElement) | ||
1617 | self.tree.openElements.insert( | ||
1618 | self.tree.openElements.index(furthestBlock) + 1, clone) | ||
1619 | |||
1620 | def endTagAppletMarqueeObject(self, token): | ||
1621 | if self.tree.elementInScope(token["name"]): | ||
1622 | self.tree.generateImpliedEndTags() | ||
1623 | if self.tree.openElements[-1].name != token["name"]: | ||
1624 | self.parser.parseError("end-tag-too-early", {"name": token["name"]}) | ||
1625 | |||
1626 | if self.tree.elementInScope(token["name"]): | ||
1627 | element = self.tree.openElements.pop() | ||
1628 | while element.name != token["name"]: | ||
1629 | element = self.tree.openElements.pop() | ||
1630 | self.tree.clearActiveFormattingElements() | ||
1631 | |||
1632 | def endTagBr(self, token): | ||
1633 | self.parser.parseError("unexpected-end-tag-treated-as", | ||
1634 | {"originalName": "br", "newName": "br element"}) | ||
1635 | self.tree.reconstructActiveFormattingElements() | ||
1636 | self.tree.insertElement(impliedTagToken("br", "StartTag")) | ||
1637 | self.tree.openElements.pop() | ||
1638 | |||
1639 | def endTagOther(self, token): | ||
1640 | for node in self.tree.openElements[::-1]: | ||
1641 | if node.name == token["name"]: | ||
1642 | self.tree.generateImpliedEndTags(exclude=token["name"]) | ||
1643 | if self.tree.openElements[-1].name != token["name"]: | ||
1644 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
1645 | while self.tree.openElements.pop() != node: | ||
1646 | pass | ||
1647 | break | ||
1648 | else: | ||
1649 | if node.nameTuple in specialElements: | ||
1650 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
1651 | break | ||
1652 | |||
1653 | class TextPhase(Phase): | ||
1654 | def __init__(self, parser, tree): | ||
1655 | Phase.__init__(self, parser, tree) | ||
1656 | self.startTagHandler = _utils.MethodDispatcher([]) | ||
1657 | self.startTagHandler.default = self.startTagOther | ||
1658 | self.endTagHandler = _utils.MethodDispatcher([ | ||
1659 | ("script", self.endTagScript)]) | ||
1660 | self.endTagHandler.default = self.endTagOther | ||
1661 | |||
1662 | def processCharacters(self, token): | ||
1663 | self.tree.insertText(token["data"]) | ||
1664 | |||
1665 | def processEOF(self): | ||
1666 | self.parser.parseError("expected-named-closing-tag-but-got-eof", | ||
1667 | {"name": self.tree.openElements[-1].name}) | ||
1668 | self.tree.openElements.pop() | ||
1669 | self.parser.phase = self.parser.originalPhase | ||
1670 | return True | ||
1671 | |||
1672 | def startTagOther(self, token): | ||
1673 | assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name'] | ||
1674 | |||
1675 | def endTagScript(self, token): | ||
1676 | node = self.tree.openElements.pop() | ||
1677 | assert node.name == "script" | ||
1678 | self.parser.phase = self.parser.originalPhase | ||
1679 | # The rest of this method is all stuff that only happens if | ||
1680 | # document.write works | ||
1681 | |||
1682 | def endTagOther(self, token): | ||
1683 | self.tree.openElements.pop() | ||
1684 | self.parser.phase = self.parser.originalPhase | ||
1685 | |||
1686 | class InTablePhase(Phase): | ||
1687 | # http://www.whatwg.org/specs/web-apps/current-work/#in-table | ||
1688 | def __init__(self, parser, tree): | ||
1689 | Phase.__init__(self, parser, tree) | ||
1690 | self.startTagHandler = _utils.MethodDispatcher([ | ||
1691 | ("html", self.startTagHtml), | ||
1692 | ("caption", self.startTagCaption), | ||
1693 | ("colgroup", self.startTagColgroup), | ||
1694 | ("col", self.startTagCol), | ||
1695 | (("tbody", "tfoot", "thead"), self.startTagRowGroup), | ||
1696 | (("td", "th", "tr"), self.startTagImplyTbody), | ||
1697 | ("table", self.startTagTable), | ||
1698 | (("style", "script"), self.startTagStyleScript), | ||
1699 | ("input", self.startTagInput), | ||
1700 | ("form", self.startTagForm) | ||
1701 | ]) | ||
1702 | self.startTagHandler.default = self.startTagOther | ||
1703 | |||
1704 | self.endTagHandler = _utils.MethodDispatcher([ | ||
1705 | ("table", self.endTagTable), | ||
1706 | (("body", "caption", "col", "colgroup", "html", "tbody", "td", | ||
1707 | "tfoot", "th", "thead", "tr"), self.endTagIgnore) | ||
1708 | ]) | ||
1709 | self.endTagHandler.default = self.endTagOther | ||
1710 | |||
1711 | # helper methods | ||
1712 | def clearStackToTableContext(self): | ||
1713 | # "clear the stack back to a table context" | ||
1714 | while self.tree.openElements[-1].name not in ("table", "html"): | ||
1715 | # self.parser.parseError("unexpected-implied-end-tag-in-table", | ||
1716 | # {"name": self.tree.openElements[-1].name}) | ||
1717 | self.tree.openElements.pop() | ||
1718 | # When the current node is <html> it's an innerHTML case | ||
1719 | |||
1720 | # processing methods | ||
1721 | def processEOF(self): | ||
1722 | if self.tree.openElements[-1].name != "html": | ||
1723 | self.parser.parseError("eof-in-table") | ||
1724 | else: | ||
1725 | assert self.parser.innerHTML | ||
1726 | # Stop parsing | ||
1727 | |||
1728 | def processSpaceCharacters(self, token): | ||
1729 | originalPhase = self.parser.phase | ||
1730 | self.parser.phase = self.parser.phases["inTableText"] | ||
1731 | self.parser.phase.originalPhase = originalPhase | ||
1732 | self.parser.phase.processSpaceCharacters(token) | ||
1733 | |||
1734 | def processCharacters(self, token): | ||
1735 | originalPhase = self.parser.phase | ||
1736 | self.parser.phase = self.parser.phases["inTableText"] | ||
1737 | self.parser.phase.originalPhase = originalPhase | ||
1738 | self.parser.phase.processCharacters(token) | ||
1739 | |||
1740 | def insertText(self, token): | ||
1741 | # If we get here there must be at least one non-whitespace character | ||
1742 | # Do the table magic! | ||
1743 | self.tree.insertFromTable = True | ||
1744 | self.parser.phases["inBody"].processCharacters(token) | ||
1745 | self.tree.insertFromTable = False | ||
1746 | |||
1747 | def startTagCaption(self, token): | ||
1748 | self.clearStackToTableContext() | ||
1749 | self.tree.activeFormattingElements.append(Marker) | ||
1750 | self.tree.insertElement(token) | ||
1751 | self.parser.phase = self.parser.phases["inCaption"] | ||
1752 | |||
1753 | def startTagColgroup(self, token): | ||
1754 | self.clearStackToTableContext() | ||
1755 | self.tree.insertElement(token) | ||
1756 | self.parser.phase = self.parser.phases["inColumnGroup"] | ||
1757 | |||
1758 | def startTagCol(self, token): | ||
1759 | self.startTagColgroup(impliedTagToken("colgroup", "StartTag")) | ||
1760 | return token | ||
1761 | |||
1762 | def startTagRowGroup(self, token): | ||
1763 | self.clearStackToTableContext() | ||
1764 | self.tree.insertElement(token) | ||
1765 | self.parser.phase = self.parser.phases["inTableBody"] | ||
1766 | |||
1767 | def startTagImplyTbody(self, token): | ||
1768 | self.startTagRowGroup(impliedTagToken("tbody", "StartTag")) | ||
1769 | return token | ||
1770 | |||
1771 | def startTagTable(self, token): | ||
1772 | self.parser.parseError("unexpected-start-tag-implies-end-tag", | ||
1773 | {"startName": "table", "endName": "table"}) | ||
1774 | self.parser.phase.processEndTag(impliedTagToken("table")) | ||
1775 | if not self.parser.innerHTML: | ||
1776 | return token | ||
1777 | |||
1778 | def startTagStyleScript(self, token): | ||
1779 | return self.parser.phases["inHead"].processStartTag(token) | ||
1780 | |||
1781 | def startTagInput(self, token): | ||
1782 | if ("type" in token["data"] and | ||
1783 | token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): | ||
1784 | self.parser.parseError("unexpected-hidden-input-in-table") | ||
1785 | self.tree.insertElement(token) | ||
1786 | # XXX associate with form | ||
1787 | self.tree.openElements.pop() | ||
1788 | else: | ||
1789 | self.startTagOther(token) | ||
1790 | |||
1791 | def startTagForm(self, token): | ||
1792 | self.parser.parseError("unexpected-form-in-table") | ||
1793 | if self.tree.formPointer is None: | ||
1794 | self.tree.insertElement(token) | ||
1795 | self.tree.formPointer = self.tree.openElements[-1] | ||
1796 | self.tree.openElements.pop() | ||
1797 | |||
1798 | def startTagOther(self, token): | ||
1799 | self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]}) | ||
1800 | # Do the table magic! | ||
1801 | self.tree.insertFromTable = True | ||
1802 | self.parser.phases["inBody"].processStartTag(token) | ||
1803 | self.tree.insertFromTable = False | ||
1804 | |||
1805 | def endTagTable(self, token): | ||
1806 | if self.tree.elementInScope("table", variant="table"): | ||
1807 | self.tree.generateImpliedEndTags() | ||
1808 | if self.tree.openElements[-1].name != "table": | ||
1809 | self.parser.parseError("end-tag-too-early-named", | ||
1810 | {"gotName": "table", | ||
1811 | "expectedName": self.tree.openElements[-1].name}) | ||
1812 | while self.tree.openElements[-1].name != "table": | ||
1813 | self.tree.openElements.pop() | ||
1814 | self.tree.openElements.pop() | ||
1815 | self.parser.resetInsertionMode() | ||
1816 | else: | ||
1817 | # innerHTML case | ||
1818 | assert self.parser.innerHTML | ||
1819 | self.parser.parseError() | ||
1820 | |||
1821 | def endTagIgnore(self, token): | ||
1822 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
1823 | |||
1824 | def endTagOther(self, token): | ||
1825 | self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]}) | ||
1826 | # Do the table magic! | ||
1827 | self.tree.insertFromTable = True | ||
1828 | self.parser.phases["inBody"].processEndTag(token) | ||
1829 | self.tree.insertFromTable = False | ||
1830 | |||
1831 | class InTableTextPhase(Phase): | ||
1832 | def __init__(self, parser, tree): | ||
1833 | Phase.__init__(self, parser, tree) | ||
1834 | self.originalPhase = None | ||
1835 | self.characterTokens = [] | ||
1836 | |||
1837 | def flushCharacters(self): | ||
1838 | data = "".join([item["data"] for item in self.characterTokens]) | ||
1839 | if any([item not in spaceCharacters for item in data]): | ||
1840 | token = {"type": tokenTypes["Characters"], "data": data} | ||
1841 | self.parser.phases["inTable"].insertText(token) | ||
1842 | elif data: | ||
1843 | self.tree.insertText(data) | ||
1844 | self.characterTokens = [] | ||
1845 | |||
1846 | def processComment(self, token): | ||
1847 | self.flushCharacters() | ||
1848 | self.parser.phase = self.originalPhase | ||
1849 | return token | ||
1850 | |||
1851 | def processEOF(self): | ||
1852 | self.flushCharacters() | ||
1853 | self.parser.phase = self.originalPhase | ||
1854 | return True | ||
1855 | |||
1856 | def processCharacters(self, token): | ||
1857 | if token["data"] == "\u0000": | ||
1858 | return | ||
1859 | self.characterTokens.append(token) | ||
1860 | |||
1861 | def processSpaceCharacters(self, token): | ||
1862 | # pretty sure we should never reach here | ||
1863 | self.characterTokens.append(token) | ||
1864 | # assert False | ||
1865 | |||
1866 | def processStartTag(self, token): | ||
1867 | self.flushCharacters() | ||
1868 | self.parser.phase = self.originalPhase | ||
1869 | return token | ||
1870 | |||
1871 | def processEndTag(self, token): | ||
1872 | self.flushCharacters() | ||
1873 | self.parser.phase = self.originalPhase | ||
1874 | return token | ||
1875 | |||
1876 | class InCaptionPhase(Phase): | ||
1877 | # http://www.whatwg.org/specs/web-apps/current-work/#in-caption | ||
1878 | def __init__(self, parser, tree): | ||
1879 | Phase.__init__(self, parser, tree) | ||
1880 | |||
1881 | self.startTagHandler = _utils.MethodDispatcher([ | ||
1882 | ("html", self.startTagHtml), | ||
1883 | (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", | ||
1884 | "thead", "tr"), self.startTagTableElement) | ||
1885 | ]) | ||
1886 | self.startTagHandler.default = self.startTagOther | ||
1887 | |||
1888 | self.endTagHandler = _utils.MethodDispatcher([ | ||
1889 | ("caption", self.endTagCaption), | ||
1890 | ("table", self.endTagTable), | ||
1891 | (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", | ||
1892 | "thead", "tr"), self.endTagIgnore) | ||
1893 | ]) | ||
1894 | self.endTagHandler.default = self.endTagOther | ||
1895 | |||
1896 | def ignoreEndTagCaption(self): | ||
1897 | return not self.tree.elementInScope("caption", variant="table") | ||
1898 | |||
1899 | def processEOF(self): | ||
1900 | self.parser.phases["inBody"].processEOF() | ||
1901 | |||
1902 | def processCharacters(self, token): | ||
1903 | return self.parser.phases["inBody"].processCharacters(token) | ||
1904 | |||
1905 | def startTagTableElement(self, token): | ||
1906 | self.parser.parseError() | ||
1907 | # XXX Have to duplicate logic here to find out if the tag is ignored | ||
1908 | ignoreEndTag = self.ignoreEndTagCaption() | ||
1909 | self.parser.phase.processEndTag(impliedTagToken("caption")) | ||
1910 | if not ignoreEndTag: | ||
1911 | return token | ||
1912 | |||
1913 | def startTagOther(self, token): | ||
1914 | return self.parser.phases["inBody"].processStartTag(token) | ||
1915 | |||
1916 | def endTagCaption(self, token): | ||
1917 | if not self.ignoreEndTagCaption(): | ||
1918 | # AT this code is quite similar to endTagTable in "InTable" | ||
1919 | self.tree.generateImpliedEndTags() | ||
1920 | if self.tree.openElements[-1].name != "caption": | ||
1921 | self.parser.parseError("expected-one-end-tag-but-got-another", | ||
1922 | {"gotName": "caption", | ||
1923 | "expectedName": self.tree.openElements[-1].name}) | ||
1924 | while self.tree.openElements[-1].name != "caption": | ||
1925 | self.tree.openElements.pop() | ||
1926 | self.tree.openElements.pop() | ||
1927 | self.tree.clearActiveFormattingElements() | ||
1928 | self.parser.phase = self.parser.phases["inTable"] | ||
1929 | else: | ||
1930 | # innerHTML case | ||
1931 | assert self.parser.innerHTML | ||
1932 | self.parser.parseError() | ||
1933 | |||
1934 | def endTagTable(self, token): | ||
1935 | self.parser.parseError() | ||
1936 | ignoreEndTag = self.ignoreEndTagCaption() | ||
1937 | self.parser.phase.processEndTag(impliedTagToken("caption")) | ||
1938 | if not ignoreEndTag: | ||
1939 | return token | ||
1940 | |||
1941 | def endTagIgnore(self, token): | ||
1942 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
1943 | |||
1944 | def endTagOther(self, token): | ||
1945 | return self.parser.phases["inBody"].processEndTag(token) | ||
1946 | |||
1947 | class InColumnGroupPhase(Phase): | ||
1948 | # http://www.whatwg.org/specs/web-apps/current-work/#in-column | ||
1949 | |||
1950 | def __init__(self, parser, tree): | ||
1951 | Phase.__init__(self, parser, tree) | ||
1952 | |||
1953 | self.startTagHandler = _utils.MethodDispatcher([ | ||
1954 | ("html", self.startTagHtml), | ||
1955 | ("col", self.startTagCol) | ||
1956 | ]) | ||
1957 | self.startTagHandler.default = self.startTagOther | ||
1958 | |||
1959 | self.endTagHandler = _utils.MethodDispatcher([ | ||
1960 | ("colgroup", self.endTagColgroup), | ||
1961 | ("col", self.endTagCol) | ||
1962 | ]) | ||
1963 | self.endTagHandler.default = self.endTagOther | ||
1964 | |||
1965 | def ignoreEndTagColgroup(self): | ||
1966 | return self.tree.openElements[-1].name == "html" | ||
1967 | |||
1968 | def processEOF(self): | ||
1969 | if self.tree.openElements[-1].name == "html": | ||
1970 | assert self.parser.innerHTML | ||
1971 | return | ||
1972 | else: | ||
1973 | ignoreEndTag = self.ignoreEndTagColgroup() | ||
1974 | self.endTagColgroup(impliedTagToken("colgroup")) | ||
1975 | if not ignoreEndTag: | ||
1976 | return True | ||
1977 | |||
1978 | def processCharacters(self, token): | ||
1979 | ignoreEndTag = self.ignoreEndTagColgroup() | ||
1980 | self.endTagColgroup(impliedTagToken("colgroup")) | ||
1981 | if not ignoreEndTag: | ||
1982 | return token | ||
1983 | |||
1984 | def startTagCol(self, token): | ||
1985 | self.tree.insertElement(token) | ||
1986 | self.tree.openElements.pop() | ||
1987 | token["selfClosingAcknowledged"] = True | ||
1988 | |||
1989 | def startTagOther(self, token): | ||
1990 | ignoreEndTag = self.ignoreEndTagColgroup() | ||
1991 | self.endTagColgroup(impliedTagToken("colgroup")) | ||
1992 | if not ignoreEndTag: | ||
1993 | return token | ||
1994 | |||
1995 | def endTagColgroup(self, token): | ||
1996 | if self.ignoreEndTagColgroup(): | ||
1997 | # innerHTML case | ||
1998 | assert self.parser.innerHTML | ||
1999 | self.parser.parseError() | ||
2000 | else: | ||
2001 | self.tree.openElements.pop() | ||
2002 | self.parser.phase = self.parser.phases["inTable"] | ||
2003 | |||
2004 | def endTagCol(self, token): | ||
2005 | self.parser.parseError("no-end-tag", {"name": "col"}) | ||
2006 | |||
2007 | def endTagOther(self, token): | ||
2008 | ignoreEndTag = self.ignoreEndTagColgroup() | ||
2009 | self.endTagColgroup(impliedTagToken("colgroup")) | ||
2010 | if not ignoreEndTag: | ||
2011 | return token | ||
2012 | |||
2013 | class InTableBodyPhase(Phase): | ||
2014 | # http://www.whatwg.org/specs/web-apps/current-work/#in-table0 | ||
2015 | def __init__(self, parser, tree): | ||
2016 | Phase.__init__(self, parser, tree) | ||
2017 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2018 | ("html", self.startTagHtml), | ||
2019 | ("tr", self.startTagTr), | ||
2020 | (("td", "th"), self.startTagTableCell), | ||
2021 | (("caption", "col", "colgroup", "tbody", "tfoot", "thead"), | ||
2022 | self.startTagTableOther) | ||
2023 | ]) | ||
2024 | self.startTagHandler.default = self.startTagOther | ||
2025 | |||
2026 | self.endTagHandler = _utils.MethodDispatcher([ | ||
2027 | (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), | ||
2028 | ("table", self.endTagTable), | ||
2029 | (("body", "caption", "col", "colgroup", "html", "td", "th", | ||
2030 | "tr"), self.endTagIgnore) | ||
2031 | ]) | ||
2032 | self.endTagHandler.default = self.endTagOther | ||
2033 | |||
2034 | # helper methods | ||
2035 | def clearStackToTableBodyContext(self): | ||
2036 | while self.tree.openElements[-1].name not in ("tbody", "tfoot", | ||
2037 | "thead", "html"): | ||
2038 | # self.parser.parseError("unexpected-implied-end-tag-in-table", | ||
2039 | # {"name": self.tree.openElements[-1].name}) | ||
2040 | self.tree.openElements.pop() | ||
2041 | if self.tree.openElements[-1].name == "html": | ||
2042 | assert self.parser.innerHTML | ||
2043 | |||
2044 | # the rest | ||
2045 | def processEOF(self): | ||
2046 | self.parser.phases["inTable"].processEOF() | ||
2047 | |||
2048 | def processSpaceCharacters(self, token): | ||
2049 | return self.parser.phases["inTable"].processSpaceCharacters(token) | ||
2050 | |||
2051 | def processCharacters(self, token): | ||
2052 | return self.parser.phases["inTable"].processCharacters(token) | ||
2053 | |||
2054 | def startTagTr(self, token): | ||
2055 | self.clearStackToTableBodyContext() | ||
2056 | self.tree.insertElement(token) | ||
2057 | self.parser.phase = self.parser.phases["inRow"] | ||
2058 | |||
2059 | def startTagTableCell(self, token): | ||
2060 | self.parser.parseError("unexpected-cell-in-table-body", | ||
2061 | {"name": token["name"]}) | ||
2062 | self.startTagTr(impliedTagToken("tr", "StartTag")) | ||
2063 | return token | ||
2064 | |||
2065 | def startTagTableOther(self, token): | ||
2066 | # XXX AT Any ideas on how to share this with endTagTable? | ||
2067 | if (self.tree.elementInScope("tbody", variant="table") or | ||
2068 | self.tree.elementInScope("thead", variant="table") or | ||
2069 | self.tree.elementInScope("tfoot", variant="table")): | ||
2070 | self.clearStackToTableBodyContext() | ||
2071 | self.endTagTableRowGroup( | ||
2072 | impliedTagToken(self.tree.openElements[-1].name)) | ||
2073 | return token | ||
2074 | else: | ||
2075 | # innerHTML case | ||
2076 | assert self.parser.innerHTML | ||
2077 | self.parser.parseError() | ||
2078 | |||
2079 | def startTagOther(self, token): | ||
2080 | return self.parser.phases["inTable"].processStartTag(token) | ||
2081 | |||
2082 | def endTagTableRowGroup(self, token): | ||
2083 | if self.tree.elementInScope(token["name"], variant="table"): | ||
2084 | self.clearStackToTableBodyContext() | ||
2085 | self.tree.openElements.pop() | ||
2086 | self.parser.phase = self.parser.phases["inTable"] | ||
2087 | else: | ||
2088 | self.parser.parseError("unexpected-end-tag-in-table-body", | ||
2089 | {"name": token["name"]}) | ||
2090 | |||
2091 | def endTagTable(self, token): | ||
2092 | if (self.tree.elementInScope("tbody", variant="table") or | ||
2093 | self.tree.elementInScope("thead", variant="table") or | ||
2094 | self.tree.elementInScope("tfoot", variant="table")): | ||
2095 | self.clearStackToTableBodyContext() | ||
2096 | self.endTagTableRowGroup( | ||
2097 | impliedTagToken(self.tree.openElements[-1].name)) | ||
2098 | return token | ||
2099 | else: | ||
2100 | # innerHTML case | ||
2101 | assert self.parser.innerHTML | ||
2102 | self.parser.parseError() | ||
2103 | |||
2104 | def endTagIgnore(self, token): | ||
2105 | self.parser.parseError("unexpected-end-tag-in-table-body", | ||
2106 | {"name": token["name"]}) | ||
2107 | |||
2108 | def endTagOther(self, token): | ||
2109 | return self.parser.phases["inTable"].processEndTag(token) | ||
2110 | |||
2111 | class InRowPhase(Phase): | ||
2112 | # http://www.whatwg.org/specs/web-apps/current-work/#in-row | ||
2113 | def __init__(self, parser, tree): | ||
2114 | Phase.__init__(self, parser, tree) | ||
2115 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2116 | ("html", self.startTagHtml), | ||
2117 | (("td", "th"), self.startTagTableCell), | ||
2118 | (("caption", "col", "colgroup", "tbody", "tfoot", "thead", | ||
2119 | "tr"), self.startTagTableOther) | ||
2120 | ]) | ||
2121 | self.startTagHandler.default = self.startTagOther | ||
2122 | |||
2123 | self.endTagHandler = _utils.MethodDispatcher([ | ||
2124 | ("tr", self.endTagTr), | ||
2125 | ("table", self.endTagTable), | ||
2126 | (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), | ||
2127 | (("body", "caption", "col", "colgroup", "html", "td", "th"), | ||
2128 | self.endTagIgnore) | ||
2129 | ]) | ||
2130 | self.endTagHandler.default = self.endTagOther | ||
2131 | |||
2132 | # helper methods (XXX unify this with other table helper methods) | ||
2133 | def clearStackToTableRowContext(self): | ||
2134 | while self.tree.openElements[-1].name not in ("tr", "html"): | ||
2135 | self.parser.parseError("unexpected-implied-end-tag-in-table-row", | ||
2136 | {"name": self.tree.openElements[-1].name}) | ||
2137 | self.tree.openElements.pop() | ||
2138 | |||
2139 | def ignoreEndTagTr(self): | ||
2140 | return not self.tree.elementInScope("tr", variant="table") | ||
2141 | |||
2142 | # the rest | ||
2143 | def processEOF(self): | ||
2144 | self.parser.phases["inTable"].processEOF() | ||
2145 | |||
2146 | def processSpaceCharacters(self, token): | ||
2147 | return self.parser.phases["inTable"].processSpaceCharacters(token) | ||
2148 | |||
2149 | def processCharacters(self, token): | ||
2150 | return self.parser.phases["inTable"].processCharacters(token) | ||
2151 | |||
2152 | def startTagTableCell(self, token): | ||
2153 | self.clearStackToTableRowContext() | ||
2154 | self.tree.insertElement(token) | ||
2155 | self.parser.phase = self.parser.phases["inCell"] | ||
2156 | self.tree.activeFormattingElements.append(Marker) | ||
2157 | |||
2158 | def startTagTableOther(self, token): | ||
2159 | ignoreEndTag = self.ignoreEndTagTr() | ||
2160 | self.endTagTr(impliedTagToken("tr")) | ||
2161 | # XXX how are we sure it's always ignored in the innerHTML case? | ||
2162 | if not ignoreEndTag: | ||
2163 | return token | ||
2164 | |||
2165 | def startTagOther(self, token): | ||
2166 | return self.parser.phases["inTable"].processStartTag(token) | ||
2167 | |||
2168 | def endTagTr(self, token): | ||
2169 | if not self.ignoreEndTagTr(): | ||
2170 | self.clearStackToTableRowContext() | ||
2171 | self.tree.openElements.pop() | ||
2172 | self.parser.phase = self.parser.phases["inTableBody"] | ||
2173 | else: | ||
2174 | # innerHTML case | ||
2175 | assert self.parser.innerHTML | ||
2176 | self.parser.parseError() | ||
2177 | |||
2178 | def endTagTable(self, token): | ||
2179 | ignoreEndTag = self.ignoreEndTagTr() | ||
2180 | self.endTagTr(impliedTagToken("tr")) | ||
2181 | # Reprocess the current tag if the tr end tag was not ignored | ||
2182 | # XXX how are we sure it's always ignored in the innerHTML case? | ||
2183 | if not ignoreEndTag: | ||
2184 | return token | ||
2185 | |||
2186 | def endTagTableRowGroup(self, token): | ||
2187 | if self.tree.elementInScope(token["name"], variant="table"): | ||
2188 | self.endTagTr(impliedTagToken("tr")) | ||
2189 | return token | ||
2190 | else: | ||
2191 | self.parser.parseError() | ||
2192 | |||
2193 | def endTagIgnore(self, token): | ||
2194 | self.parser.parseError("unexpected-end-tag-in-table-row", | ||
2195 | {"name": token["name"]}) | ||
2196 | |||
2197 | def endTagOther(self, token): | ||
2198 | return self.parser.phases["inTable"].processEndTag(token) | ||
2199 | |||
2200 | class InCellPhase(Phase): | ||
2201 | # http://www.whatwg.org/specs/web-apps/current-work/#in-cell | ||
2202 | def __init__(self, parser, tree): | ||
2203 | Phase.__init__(self, parser, tree) | ||
2204 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2205 | ("html", self.startTagHtml), | ||
2206 | (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", | ||
2207 | "thead", "tr"), self.startTagTableOther) | ||
2208 | ]) | ||
2209 | self.startTagHandler.default = self.startTagOther | ||
2210 | |||
2211 | self.endTagHandler = _utils.MethodDispatcher([ | ||
2212 | (("td", "th"), self.endTagTableCell), | ||
2213 | (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore), | ||
2214 | (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply) | ||
2215 | ]) | ||
2216 | self.endTagHandler.default = self.endTagOther | ||
2217 | |||
2218 | # helper | ||
2219 | def closeCell(self): | ||
2220 | if self.tree.elementInScope("td", variant="table"): | ||
2221 | self.endTagTableCell(impliedTagToken("td")) | ||
2222 | elif self.tree.elementInScope("th", variant="table"): | ||
2223 | self.endTagTableCell(impliedTagToken("th")) | ||
2224 | |||
2225 | # the rest | ||
2226 | def processEOF(self): | ||
2227 | self.parser.phases["inBody"].processEOF() | ||
2228 | |||
2229 | def processCharacters(self, token): | ||
2230 | return self.parser.phases["inBody"].processCharacters(token) | ||
2231 | |||
2232 | def startTagTableOther(self, token): | ||
2233 | if (self.tree.elementInScope("td", variant="table") or | ||
2234 | self.tree.elementInScope("th", variant="table")): | ||
2235 | self.closeCell() | ||
2236 | return token | ||
2237 | else: | ||
2238 | # innerHTML case | ||
2239 | assert self.parser.innerHTML | ||
2240 | self.parser.parseError() | ||
2241 | |||
2242 | def startTagOther(self, token): | ||
2243 | return self.parser.phases["inBody"].processStartTag(token) | ||
2244 | |||
2245 | def endTagTableCell(self, token): | ||
2246 | if self.tree.elementInScope(token["name"], variant="table"): | ||
2247 | self.tree.generateImpliedEndTags(token["name"]) | ||
2248 | if self.tree.openElements[-1].name != token["name"]: | ||
2249 | self.parser.parseError("unexpected-cell-end-tag", | ||
2250 | {"name": token["name"]}) | ||
2251 | while True: | ||
2252 | node = self.tree.openElements.pop() | ||
2253 | if node.name == token["name"]: | ||
2254 | break | ||
2255 | else: | ||
2256 | self.tree.openElements.pop() | ||
2257 | self.tree.clearActiveFormattingElements() | ||
2258 | self.parser.phase = self.parser.phases["inRow"] | ||
2259 | else: | ||
2260 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
2261 | |||
2262 | def endTagIgnore(self, token): | ||
2263 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
2264 | |||
2265 | def endTagImply(self, token): | ||
2266 | if self.tree.elementInScope(token["name"], variant="table"): | ||
2267 | self.closeCell() | ||
2268 | return token | ||
2269 | else: | ||
2270 | # sometimes innerHTML case | ||
2271 | self.parser.parseError() | ||
2272 | |||
2273 | def endTagOther(self, token): | ||
2274 | return self.parser.phases["inBody"].processEndTag(token) | ||
2275 | |||
2276 | class InSelectPhase(Phase): | ||
2277 | def __init__(self, parser, tree): | ||
2278 | Phase.__init__(self, parser, tree) | ||
2279 | |||
2280 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2281 | ("html", self.startTagHtml), | ||
2282 | ("option", self.startTagOption), | ||
2283 | ("optgroup", self.startTagOptgroup), | ||
2284 | ("select", self.startTagSelect), | ||
2285 | (("input", "keygen", "textarea"), self.startTagInput), | ||
2286 | ("script", self.startTagScript) | ||
2287 | ]) | ||
2288 | self.startTagHandler.default = self.startTagOther | ||
2289 | |||
2290 | self.endTagHandler = _utils.MethodDispatcher([ | ||
2291 | ("option", self.endTagOption), | ||
2292 | ("optgroup", self.endTagOptgroup), | ||
2293 | ("select", self.endTagSelect) | ||
2294 | ]) | ||
2295 | self.endTagHandler.default = self.endTagOther | ||
2296 | |||
2297 | # http://www.whatwg.org/specs/web-apps/current-work/#in-select | ||
2298 | def processEOF(self): | ||
2299 | if self.tree.openElements[-1].name != "html": | ||
2300 | self.parser.parseError("eof-in-select") | ||
2301 | else: | ||
2302 | assert self.parser.innerHTML | ||
2303 | |||
2304 | def processCharacters(self, token): | ||
2305 | if token["data"] == "\u0000": | ||
2306 | return | ||
2307 | self.tree.insertText(token["data"]) | ||
2308 | |||
2309 | def startTagOption(self, token): | ||
2310 | # We need to imply </option> if <option> is the current node. | ||
2311 | if self.tree.openElements[-1].name == "option": | ||
2312 | self.tree.openElements.pop() | ||
2313 | self.tree.insertElement(token) | ||
2314 | |||
2315 | def startTagOptgroup(self, token): | ||
2316 | if self.tree.openElements[-1].name == "option": | ||
2317 | self.tree.openElements.pop() | ||
2318 | if self.tree.openElements[-1].name == "optgroup": | ||
2319 | self.tree.openElements.pop() | ||
2320 | self.tree.insertElement(token) | ||
2321 | |||
2322 | def startTagSelect(self, token): | ||
2323 | self.parser.parseError("unexpected-select-in-select") | ||
2324 | self.endTagSelect(impliedTagToken("select")) | ||
2325 | |||
2326 | def startTagInput(self, token): | ||
2327 | self.parser.parseError("unexpected-input-in-select") | ||
2328 | if self.tree.elementInScope("select", variant="select"): | ||
2329 | self.endTagSelect(impliedTagToken("select")) | ||
2330 | return token | ||
2331 | else: | ||
2332 | assert self.parser.innerHTML | ||
2333 | |||
2334 | def startTagScript(self, token): | ||
2335 | return self.parser.phases["inHead"].processStartTag(token) | ||
2336 | |||
2337 | def startTagOther(self, token): | ||
2338 | self.parser.parseError("unexpected-start-tag-in-select", | ||
2339 | {"name": token["name"]}) | ||
2340 | |||
2341 | def endTagOption(self, token): | ||
2342 | if self.tree.openElements[-1].name == "option": | ||
2343 | self.tree.openElements.pop() | ||
2344 | else: | ||
2345 | self.parser.parseError("unexpected-end-tag-in-select", | ||
2346 | {"name": "option"}) | ||
2347 | |||
2348 | def endTagOptgroup(self, token): | ||
2349 | # </optgroup> implicitly closes <option> | ||
2350 | if (self.tree.openElements[-1].name == "option" and | ||
2351 | self.tree.openElements[-2].name == "optgroup"): | ||
2352 | self.tree.openElements.pop() | ||
2353 | # It also closes </optgroup> | ||
2354 | if self.tree.openElements[-1].name == "optgroup": | ||
2355 | self.tree.openElements.pop() | ||
2356 | # But nothing else | ||
2357 | else: | ||
2358 | self.parser.parseError("unexpected-end-tag-in-select", | ||
2359 | {"name": "optgroup"}) | ||
2360 | |||
2361 | def endTagSelect(self, token): | ||
2362 | if self.tree.elementInScope("select", variant="select"): | ||
2363 | node = self.tree.openElements.pop() | ||
2364 | while node.name != "select": | ||
2365 | node = self.tree.openElements.pop() | ||
2366 | self.parser.resetInsertionMode() | ||
2367 | else: | ||
2368 | # innerHTML case | ||
2369 | assert self.parser.innerHTML | ||
2370 | self.parser.parseError() | ||
2371 | |||
2372 | def endTagOther(self, token): | ||
2373 | self.parser.parseError("unexpected-end-tag-in-select", | ||
2374 | {"name": token["name"]}) | ||
2375 | |||
2376 | class InSelectInTablePhase(Phase): | ||
2377 | def __init__(self, parser, tree): | ||
2378 | Phase.__init__(self, parser, tree) | ||
2379 | |||
2380 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2381 | (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), | ||
2382 | self.startTagTable) | ||
2383 | ]) | ||
2384 | self.startTagHandler.default = self.startTagOther | ||
2385 | |||
2386 | self.endTagHandler = _utils.MethodDispatcher([ | ||
2387 | (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), | ||
2388 | self.endTagTable) | ||
2389 | ]) | ||
2390 | self.endTagHandler.default = self.endTagOther | ||
2391 | |||
2392 | def processEOF(self): | ||
2393 | self.parser.phases["inSelect"].processEOF() | ||
2394 | |||
2395 | def processCharacters(self, token): | ||
2396 | return self.parser.phases["inSelect"].processCharacters(token) | ||
2397 | |||
2398 | def startTagTable(self, token): | ||
2399 | self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]}) | ||
2400 | self.endTagOther(impliedTagToken("select")) | ||
2401 | return token | ||
2402 | |||
2403 | def startTagOther(self, token): | ||
2404 | return self.parser.phases["inSelect"].processStartTag(token) | ||
2405 | |||
2406 | def endTagTable(self, token): | ||
2407 | self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]}) | ||
2408 | if self.tree.elementInScope(token["name"], variant="table"): | ||
2409 | self.endTagOther(impliedTagToken("select")) | ||
2410 | return token | ||
2411 | |||
2412 | def endTagOther(self, token): | ||
2413 | return self.parser.phases["inSelect"].processEndTag(token) | ||
2414 | |||
2415 | class InForeignContentPhase(Phase): | ||
2416 | breakoutElements = frozenset(["b", "big", "blockquote", "body", "br", | ||
2417 | "center", "code", "dd", "div", "dl", "dt", | ||
2418 | "em", "embed", "h1", "h2", "h3", | ||
2419 | "h4", "h5", "h6", "head", "hr", "i", "img", | ||
2420 | "li", "listing", "menu", "meta", "nobr", | ||
2421 | "ol", "p", "pre", "ruby", "s", "small", | ||
2422 | "span", "strong", "strike", "sub", "sup", | ||
2423 | "table", "tt", "u", "ul", "var"]) | ||
2424 | |||
2425 | def __init__(self, parser, tree): | ||
2426 | Phase.__init__(self, parser, tree) | ||
2427 | |||
2428 | def adjustSVGTagNames(self, token): | ||
2429 | replacements = {"altglyph": "altGlyph", | ||
2430 | "altglyphdef": "altGlyphDef", | ||
2431 | "altglyphitem": "altGlyphItem", | ||
2432 | "animatecolor": "animateColor", | ||
2433 | "animatemotion": "animateMotion", | ||
2434 | "animatetransform": "animateTransform", | ||
2435 | "clippath": "clipPath", | ||
2436 | "feblend": "feBlend", | ||
2437 | "fecolormatrix": "feColorMatrix", | ||
2438 | "fecomponenttransfer": "feComponentTransfer", | ||
2439 | "fecomposite": "feComposite", | ||
2440 | "feconvolvematrix": "feConvolveMatrix", | ||
2441 | "fediffuselighting": "feDiffuseLighting", | ||
2442 | "fedisplacementmap": "feDisplacementMap", | ||
2443 | "fedistantlight": "feDistantLight", | ||
2444 | "feflood": "feFlood", | ||
2445 | "fefunca": "feFuncA", | ||
2446 | "fefuncb": "feFuncB", | ||
2447 | "fefuncg": "feFuncG", | ||
2448 | "fefuncr": "feFuncR", | ||
2449 | "fegaussianblur": "feGaussianBlur", | ||
2450 | "feimage": "feImage", | ||
2451 | "femerge": "feMerge", | ||
2452 | "femergenode": "feMergeNode", | ||
2453 | "femorphology": "feMorphology", | ||
2454 | "feoffset": "feOffset", | ||
2455 | "fepointlight": "fePointLight", | ||
2456 | "fespecularlighting": "feSpecularLighting", | ||
2457 | "fespotlight": "feSpotLight", | ||
2458 | "fetile": "feTile", | ||
2459 | "feturbulence": "feTurbulence", | ||
2460 | "foreignobject": "foreignObject", | ||
2461 | "glyphref": "glyphRef", | ||
2462 | "lineargradient": "linearGradient", | ||
2463 | "radialgradient": "radialGradient", | ||
2464 | "textpath": "textPath"} | ||
2465 | |||
2466 | if token["name"] in replacements: | ||
2467 | token["name"] = replacements[token["name"]] | ||
2468 | |||
2469 | def processCharacters(self, token): | ||
2470 | if token["data"] == "\u0000": | ||
2471 | token["data"] = "\uFFFD" | ||
2472 | elif (self.parser.framesetOK and | ||
2473 | any(char not in spaceCharacters for char in token["data"])): | ||
2474 | self.parser.framesetOK = False | ||
2475 | Phase.processCharacters(self, token) | ||
2476 | |||
2477 | def processStartTag(self, token): | ||
2478 | currentNode = self.tree.openElements[-1] | ||
2479 | if (token["name"] in self.breakoutElements or | ||
2480 | (token["name"] == "font" and | ||
2481 | set(token["data"].keys()) & set(["color", "face", "size"]))): | ||
2482 | self.parser.parseError("unexpected-html-element-in-foreign-content", | ||
2483 | {"name": token["name"]}) | ||
2484 | while (self.tree.openElements[-1].namespace != | ||
2485 | self.tree.defaultNamespace and | ||
2486 | not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and | ||
2487 | not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])): | ||
2488 | self.tree.openElements.pop() | ||
2489 | return token | ||
2490 | |||
2491 | else: | ||
2492 | if currentNode.namespace == namespaces["mathml"]: | ||
2493 | self.parser.adjustMathMLAttributes(token) | ||
2494 | elif currentNode.namespace == namespaces["svg"]: | ||
2495 | self.adjustSVGTagNames(token) | ||
2496 | self.parser.adjustSVGAttributes(token) | ||
2497 | self.parser.adjustForeignAttributes(token) | ||
2498 | token["namespace"] = currentNode.namespace | ||
2499 | self.tree.insertElement(token) | ||
2500 | if token["selfClosing"]: | ||
2501 | self.tree.openElements.pop() | ||
2502 | token["selfClosingAcknowledged"] = True | ||
2503 | |||
2504 | def processEndTag(self, token): | ||
2505 | nodeIndex = len(self.tree.openElements) - 1 | ||
2506 | node = self.tree.openElements[-1] | ||
2507 | if node.name.translate(asciiUpper2Lower) != token["name"]: | ||
2508 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
2509 | |||
2510 | while True: | ||
2511 | if node.name.translate(asciiUpper2Lower) == token["name"]: | ||
2512 | # XXX this isn't in the spec but it seems necessary | ||
2513 | if self.parser.phase == self.parser.phases["inTableText"]: | ||
2514 | self.parser.phase.flushCharacters() | ||
2515 | self.parser.phase = self.parser.phase.originalPhase | ||
2516 | while self.tree.openElements.pop() != node: | ||
2517 | assert self.tree.openElements | ||
2518 | new_token = None | ||
2519 | break | ||
2520 | nodeIndex -= 1 | ||
2521 | |||
2522 | node = self.tree.openElements[nodeIndex] | ||
2523 | if node.namespace != self.tree.defaultNamespace: | ||
2524 | continue | ||
2525 | else: | ||
2526 | new_token = self.parser.phase.processEndTag(token) | ||
2527 | break | ||
2528 | return new_token | ||
2529 | |||
2530 | class AfterBodyPhase(Phase): | ||
2531 | def __init__(self, parser, tree): | ||
2532 | Phase.__init__(self, parser, tree) | ||
2533 | |||
2534 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2535 | ("html", self.startTagHtml) | ||
2536 | ]) | ||
2537 | self.startTagHandler.default = self.startTagOther | ||
2538 | |||
2539 | self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)]) | ||
2540 | self.endTagHandler.default = self.endTagOther | ||
2541 | |||
2542 | def processEOF(self): | ||
2543 | # Stop parsing | ||
2544 | pass | ||
2545 | |||
2546 | def processComment(self, token): | ||
2547 | # This is needed because data is to be appended to the <html> element | ||
2548 | # here and not to whatever is currently open. | ||
2549 | self.tree.insertComment(token, self.tree.openElements[0]) | ||
2550 | |||
2551 | def processCharacters(self, token): | ||
2552 | self.parser.parseError("unexpected-char-after-body") | ||
2553 | self.parser.phase = self.parser.phases["inBody"] | ||
2554 | return token | ||
2555 | |||
2556 | def startTagHtml(self, token): | ||
2557 | return self.parser.phases["inBody"].processStartTag(token) | ||
2558 | |||
2559 | def startTagOther(self, token): | ||
2560 | self.parser.parseError("unexpected-start-tag-after-body", | ||
2561 | {"name": token["name"]}) | ||
2562 | self.parser.phase = self.parser.phases["inBody"] | ||
2563 | return token | ||
2564 | |||
2565 | def endTagHtml(self, name): | ||
2566 | if self.parser.innerHTML: | ||
2567 | self.parser.parseError("unexpected-end-tag-after-body-innerhtml") | ||
2568 | else: | ||
2569 | self.parser.phase = self.parser.phases["afterAfterBody"] | ||
2570 | |||
2571 | def endTagOther(self, token): | ||
2572 | self.parser.parseError("unexpected-end-tag-after-body", | ||
2573 | {"name": token["name"]}) | ||
2574 | self.parser.phase = self.parser.phases["inBody"] | ||
2575 | return token | ||
2576 | |||
2577 | class InFramesetPhase(Phase): | ||
2578 | # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset | ||
2579 | def __init__(self, parser, tree): | ||
2580 | Phase.__init__(self, parser, tree) | ||
2581 | |||
2582 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2583 | ("html", self.startTagHtml), | ||
2584 | ("frameset", self.startTagFrameset), | ||
2585 | ("frame", self.startTagFrame), | ||
2586 | ("noframes", self.startTagNoframes) | ||
2587 | ]) | ||
2588 | self.startTagHandler.default = self.startTagOther | ||
2589 | |||
2590 | self.endTagHandler = _utils.MethodDispatcher([ | ||
2591 | ("frameset", self.endTagFrameset) | ||
2592 | ]) | ||
2593 | self.endTagHandler.default = self.endTagOther | ||
2594 | |||
2595 | def processEOF(self): | ||
2596 | if self.tree.openElements[-1].name != "html": | ||
2597 | self.parser.parseError("eof-in-frameset") | ||
2598 | else: | ||
2599 | assert self.parser.innerHTML | ||
2600 | |||
2601 | def processCharacters(self, token): | ||
2602 | self.parser.parseError("unexpected-char-in-frameset") | ||
2603 | |||
2604 | def startTagFrameset(self, token): | ||
2605 | self.tree.insertElement(token) | ||
2606 | |||
2607 | def startTagFrame(self, token): | ||
2608 | self.tree.insertElement(token) | ||
2609 | self.tree.openElements.pop() | ||
2610 | |||
2611 | def startTagNoframes(self, token): | ||
2612 | return self.parser.phases["inBody"].processStartTag(token) | ||
2613 | |||
2614 | def startTagOther(self, token): | ||
2615 | self.parser.parseError("unexpected-start-tag-in-frameset", | ||
2616 | {"name": token["name"]}) | ||
2617 | |||
2618 | def endTagFrameset(self, token): | ||
2619 | if self.tree.openElements[-1].name == "html": | ||
2620 | # innerHTML case | ||
2621 | self.parser.parseError("unexpected-frameset-in-frameset-innerhtml") | ||
2622 | else: | ||
2623 | self.tree.openElements.pop() | ||
2624 | if (not self.parser.innerHTML and | ||
2625 | self.tree.openElements[-1].name != "frameset"): | ||
2626 | # If we're not in innerHTML mode and the current node is not a | ||
2627 | # "frameset" element (anymore) then switch. | ||
2628 | self.parser.phase = self.parser.phases["afterFrameset"] | ||
2629 | |||
2630 | def endTagOther(self, token): | ||
2631 | self.parser.parseError("unexpected-end-tag-in-frameset", | ||
2632 | {"name": token["name"]}) | ||
2633 | |||
2634 | class AfterFramesetPhase(Phase): | ||
2635 | # http://www.whatwg.org/specs/web-apps/current-work/#after3 | ||
2636 | def __init__(self, parser, tree): | ||
2637 | Phase.__init__(self, parser, tree) | ||
2638 | |||
2639 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2640 | ("html", self.startTagHtml), | ||
2641 | ("noframes", self.startTagNoframes) | ||
2642 | ]) | ||
2643 | self.startTagHandler.default = self.startTagOther | ||
2644 | |||
2645 | self.endTagHandler = _utils.MethodDispatcher([ | ||
2646 | ("html", self.endTagHtml) | ||
2647 | ]) | ||
2648 | self.endTagHandler.default = self.endTagOther | ||
2649 | |||
2650 | def processEOF(self): | ||
2651 | # Stop parsing | ||
2652 | pass | ||
2653 | |||
2654 | def processCharacters(self, token): | ||
2655 | self.parser.parseError("unexpected-char-after-frameset") | ||
2656 | |||
2657 | def startTagNoframes(self, token): | ||
2658 | return self.parser.phases["inHead"].processStartTag(token) | ||
2659 | |||
2660 | def startTagOther(self, token): | ||
2661 | self.parser.parseError("unexpected-start-tag-after-frameset", | ||
2662 | {"name": token["name"]}) | ||
2663 | |||
2664 | def endTagHtml(self, token): | ||
2665 | self.parser.phase = self.parser.phases["afterAfterFrameset"] | ||
2666 | |||
2667 | def endTagOther(self, token): | ||
2668 | self.parser.parseError("unexpected-end-tag-after-frameset", | ||
2669 | {"name": token["name"]}) | ||
2670 | |||
2671 | class AfterAfterBodyPhase(Phase): | ||
2672 | def __init__(self, parser, tree): | ||
2673 | Phase.__init__(self, parser, tree) | ||
2674 | |||
2675 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2676 | ("html", self.startTagHtml) | ||
2677 | ]) | ||
2678 | self.startTagHandler.default = self.startTagOther | ||
2679 | |||
2680 | def processEOF(self): | ||
2681 | pass | ||
2682 | |||
2683 | def processComment(self, token): | ||
2684 | self.tree.insertComment(token, self.tree.document) | ||
2685 | |||
2686 | def processSpaceCharacters(self, token): | ||
2687 | return self.parser.phases["inBody"].processSpaceCharacters(token) | ||
2688 | |||
2689 | def processCharacters(self, token): | ||
2690 | self.parser.parseError("expected-eof-but-got-char") | ||
2691 | self.parser.phase = self.parser.phases["inBody"] | ||
2692 | return token | ||
2693 | |||
2694 | def startTagHtml(self, token): | ||
2695 | return self.parser.phases["inBody"].processStartTag(token) | ||
2696 | |||
2697 | def startTagOther(self, token): | ||
2698 | self.parser.parseError("expected-eof-but-got-start-tag", | ||
2699 | {"name": token["name"]}) | ||
2700 | self.parser.phase = self.parser.phases["inBody"] | ||
2701 | return token | ||
2702 | |||
2703 | def processEndTag(self, token): | ||
2704 | self.parser.parseError("expected-eof-but-got-end-tag", | ||
2705 | {"name": token["name"]}) | ||
2706 | self.parser.phase = self.parser.phases["inBody"] | ||
2707 | return token | ||
2708 | |||
2709 | class AfterAfterFramesetPhase(Phase): | ||
2710 | def __init__(self, parser, tree): | ||
2711 | Phase.__init__(self, parser, tree) | ||
2712 | |||
2713 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2714 | ("html", self.startTagHtml), | ||
2715 | ("noframes", self.startTagNoFrames) | ||
2716 | ]) | ||
2717 | self.startTagHandler.default = self.startTagOther | ||
2718 | |||
2719 | def processEOF(self): | ||
2720 | pass | ||
2721 | |||
2722 | def processComment(self, token): | ||
2723 | self.tree.insertComment(token, self.tree.document) | ||
2724 | |||
2725 | def processSpaceCharacters(self, token): | ||
2726 | return self.parser.phases["inBody"].processSpaceCharacters(token) | ||
2727 | |||
2728 | def processCharacters(self, token): | ||
2729 | self.parser.parseError("expected-eof-but-got-char") | ||
2730 | |||
2731 | def startTagHtml(self, token): | ||
2732 | return self.parser.phases["inBody"].processStartTag(token) | ||
2733 | |||
2734 | def startTagNoFrames(self, token): | ||
2735 | return self.parser.phases["inHead"].processStartTag(token) | ||
2736 | |||
2737 | def startTagOther(self, token): | ||
2738 | self.parser.parseError("expected-eof-but-got-start-tag", | ||
2739 | {"name": token["name"]}) | ||
2740 | |||
2741 | def processEndTag(self, token): | ||
2742 | self.parser.parseError("expected-eof-but-got-end-tag", | ||
2743 | {"name": token["name"]}) | ||
2744 | # pylint:enable=unused-argument | ||
2745 | |||
2746 | return { | ||
2747 | "initial": InitialPhase, | ||
2748 | "beforeHtml": BeforeHtmlPhase, | ||
2749 | "beforeHead": BeforeHeadPhase, | ||
2750 | "inHead": InHeadPhase, | ||
2751 | "inHeadNoscript": InHeadNoscriptPhase, | ||
2752 | "afterHead": AfterHeadPhase, | ||
2753 | "inBody": InBodyPhase, | ||
2754 | "text": TextPhase, | ||
2755 | "inTable": InTablePhase, | ||
2756 | "inTableText": InTableTextPhase, | ||
2757 | "inCaption": InCaptionPhase, | ||
2758 | "inColumnGroup": InColumnGroupPhase, | ||
2759 | "inTableBody": InTableBodyPhase, | ||
2760 | "inRow": InRowPhase, | ||
2761 | "inCell": InCellPhase, | ||
2762 | "inSelect": InSelectPhase, | ||
2763 | "inSelectInTable": InSelectInTablePhase, | ||
2764 | "inForeignContent": InForeignContentPhase, | ||
2765 | "afterBody": AfterBodyPhase, | ||
2766 | "inFrameset": InFramesetPhase, | ||
2767 | "afterFrameset": AfterFramesetPhase, | ||
2768 | "afterAfterBody": AfterAfterBodyPhase, | ||
2769 | "afterAfterFrameset": AfterAfterFramesetPhase, | ||
2770 | # XXX after after frameset | ||
2771 | } | ||
2772 | |||
2773 | |||
2774 | def adjust_attributes(token, replacements): | ||
2775 | needs_adjustment = viewkeys(token['data']) & viewkeys(replacements) | ||
2776 | if needs_adjustment: | ||
2777 | token['data'] = OrderedDict((replacements.get(k, k), v) | ||
2778 | for k, v in token['data'].items()) | ||
2779 | |||
2780 | |||
2781 | def impliedTagToken(name, type="EndTag", attributes=None, | ||
2782 | selfClosing=False): | ||
2783 | if attributes is None: | ||
2784 | attributes = {} | ||
2785 | return {"type": tokenTypes[type], "name": name, "data": attributes, | ||
2786 | "selfClosing": selfClosing} | ||
2787 | |||
2788 | |||
2789 | class ParseError(Exception): | ||
2790 | """Error in parsed document""" | ||
2791 | pass | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/serializer.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/serializer.py new file mode 100644 index 0000000..641323e --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/serializer.py | |||
@@ -0,0 +1,409 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | from pip._vendor.six import text_type | ||
3 | |||
4 | import re | ||
5 | |||
6 | from codecs import register_error, xmlcharrefreplace_errors | ||
7 | |||
8 | from .constants import voidElements, booleanAttributes, spaceCharacters | ||
9 | from .constants import rcdataElements, entities, xmlEntities | ||
10 | from . import treewalkers, _utils | ||
11 | from xml.sax.saxutils import escape | ||
12 | |||
13 | _quoteAttributeSpecChars = "".join(spaceCharacters) + "\"'=<>`" | ||
14 | _quoteAttributeSpec = re.compile("[" + _quoteAttributeSpecChars + "]") | ||
15 | _quoteAttributeLegacy = re.compile("[" + _quoteAttributeSpecChars + | ||
16 | "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n" | ||
17 | "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15" | ||
18 | "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" | ||
19 | "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000" | ||
20 | "\u2001\u2002\u2003\u2004\u2005\u2006\u2007" | ||
21 | "\u2008\u2009\u200a\u2028\u2029\u202f\u205f" | ||
22 | "\u3000]") | ||
23 | |||
24 | |||
25 | _encode_entity_map = {} | ||
26 | _is_ucs4 = len("\U0010FFFF") == 1 | ||
27 | for k, v in list(entities.items()): | ||
28 | # skip multi-character entities | ||
29 | if ((_is_ucs4 and len(v) > 1) or | ||
30 | (not _is_ucs4 and len(v) > 2)): | ||
31 | continue | ||
32 | if v != "&": | ||
33 | if len(v) == 2: | ||
34 | v = _utils.surrogatePairToCodepoint(v) | ||
35 | else: | ||
36 | v = ord(v) | ||
37 | if v not in _encode_entity_map or k.islower(): | ||
38 | # prefer < over < and similarly for &, >, etc. | ||
39 | _encode_entity_map[v] = k | ||
40 | |||
41 | |||
42 | def htmlentityreplace_errors(exc): | ||
43 | if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)): | ||
44 | res = [] | ||
45 | codepoints = [] | ||
46 | skip = False | ||
47 | for i, c in enumerate(exc.object[exc.start:exc.end]): | ||
48 | if skip: | ||
49 | skip = False | ||
50 | continue | ||
51 | index = i + exc.start | ||
52 | if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]): | ||
53 | codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2]) | ||
54 | skip = True | ||
55 | else: | ||
56 | codepoint = ord(c) | ||
57 | codepoints.append(codepoint) | ||
58 | for cp in codepoints: | ||
59 | e = _encode_entity_map.get(cp) | ||
60 | if e: | ||
61 | res.append("&") | ||
62 | res.append(e) | ||
63 | if not e.endswith(";"): | ||
64 | res.append(";") | ||
65 | else: | ||
66 | res.append("&#x%s;" % (hex(cp)[2:])) | ||
67 | return ("".join(res), exc.end) | ||
68 | else: | ||
69 | return xmlcharrefreplace_errors(exc) | ||
70 | |||
71 | |||
72 | register_error("htmlentityreplace", htmlentityreplace_errors) | ||
73 | |||
74 | |||
75 | def serialize(input, tree="etree", encoding=None, **serializer_opts): | ||
76 | """Serializes the input token stream using the specified treewalker | ||
77 | |||
78 | :arg input: the token stream to serialize | ||
79 | |||
80 | :arg tree: the treewalker to use | ||
81 | |||
82 | :arg encoding: the encoding to use | ||
83 | |||
84 | :arg serializer_opts: any options to pass to the | ||
85 | :py:class:`html5lib.serializer.HTMLSerializer` that gets created | ||
86 | |||
87 | :returns: the tree serialized as a string | ||
88 | |||
89 | Example: | ||
90 | |||
91 | >>> from html5lib.html5parser import parse | ||
92 | >>> from html5lib.serializer import serialize | ||
93 | >>> token_stream = parse('<html><body><p>Hi!</p></body></html>') | ||
94 | >>> serialize(token_stream, omit_optional_tags=False) | ||
95 | '<html><head></head><body><p>Hi!</p></body></html>' | ||
96 | |||
97 | """ | ||
98 | # XXX: Should we cache this? | ||
99 | walker = treewalkers.getTreeWalker(tree) | ||
100 | s = HTMLSerializer(**serializer_opts) | ||
101 | return s.render(walker(input), encoding) | ||
102 | |||
103 | |||
104 | class HTMLSerializer(object): | ||
105 | |||
106 | # attribute quoting options | ||
107 | quote_attr_values = "legacy" # be secure by default | ||
108 | quote_char = '"' | ||
109 | use_best_quote_char = True | ||
110 | |||
111 | # tag syntax options | ||
112 | omit_optional_tags = True | ||
113 | minimize_boolean_attributes = True | ||
114 | use_trailing_solidus = False | ||
115 | space_before_trailing_solidus = True | ||
116 | |||
117 | # escaping options | ||
118 | escape_lt_in_attrs = False | ||
119 | escape_rcdata = False | ||
120 | resolve_entities = True | ||
121 | |||
122 | # miscellaneous options | ||
123 | alphabetical_attributes = False | ||
124 | inject_meta_charset = True | ||
125 | strip_whitespace = False | ||
126 | sanitize = False | ||
127 | |||
128 | options = ("quote_attr_values", "quote_char", "use_best_quote_char", | ||
129 | "omit_optional_tags", "minimize_boolean_attributes", | ||
130 | "use_trailing_solidus", "space_before_trailing_solidus", | ||
131 | "escape_lt_in_attrs", "escape_rcdata", "resolve_entities", | ||
132 | "alphabetical_attributes", "inject_meta_charset", | ||
133 | "strip_whitespace", "sanitize") | ||
134 | |||
135 | def __init__(self, **kwargs): | ||
136 | """Initialize HTMLSerializer | ||
137 | |||
138 | :arg inject_meta_charset: Whether or not to inject the meta charset. | ||
139 | |||
140 | Defaults to ``True``. | ||
141 | |||
142 | :arg quote_attr_values: Whether to quote attribute values that don't | ||
143 | require quoting per legacy browser behavior (``"legacy"``), when | ||
144 | required by the standard (``"spec"``), or always (``"always"``). | ||
145 | |||
146 | Defaults to ``"legacy"``. | ||
147 | |||
148 | :arg quote_char: Use given quote character for attribute quoting. | ||
149 | |||
150 | Defaults to ``"`` which will use double quotes unless attribute | ||
151 | value contains a double quote, in which case single quotes are | ||
152 | used. | ||
153 | |||
154 | :arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute | ||
155 | values. | ||
156 | |||
157 | Defaults to ``False``. | ||
158 | |||
159 | :arg escape_rcdata: Whether to escape characters that need to be | ||
160 | escaped within normal elements within rcdata elements such as | ||
161 | style. | ||
162 | |||
163 | Defaults to ``False``. | ||
164 | |||
165 | :arg resolve_entities: Whether to resolve named character entities that | ||
166 | appear in the source tree. The XML predefined entities < > | ||
167 | & " ' are unaffected by this setting. | ||
168 | |||
169 | Defaults to ``True``. | ||
170 | |||
171 | :arg strip_whitespace: Whether to remove semantically meaningless | ||
172 | whitespace. (This compresses all whitespace to a single space | ||
173 | except within ``pre``.) | ||
174 | |||
175 | Defaults to ``False``. | ||
176 | |||
177 | :arg minimize_boolean_attributes: Shortens boolean attributes to give | ||
178 | just the attribute value, for example:: | ||
179 | |||
180 | <input disabled="disabled"> | ||
181 | |||
182 | becomes:: | ||
183 | |||
184 | <input disabled> | ||
185 | |||
186 | Defaults to ``True``. | ||
187 | |||
188 | :arg use_trailing_solidus: Includes a close-tag slash at the end of the | ||
189 | start tag of void elements (empty elements whose end tag is | ||
190 | forbidden). E.g. ``<hr/>``. | ||
191 | |||
192 | Defaults to ``False``. | ||
193 | |||
194 | :arg space_before_trailing_solidus: Places a space immediately before | ||
195 | the closing slash in a tag using a trailing solidus. E.g. | ||
196 | ``<hr />``. Requires ``use_trailing_solidus=True``. | ||
197 | |||
198 | Defaults to ``True``. | ||
199 | |||
200 | :arg sanitize: Strip all unsafe or unknown constructs from output. | ||
201 | See :py:class:`html5lib.filters.sanitizer.Filter`. | ||
202 | |||
203 | Defaults to ``False``. | ||
204 | |||
205 | :arg omit_optional_tags: Omit start/end tags that are optional. | ||
206 | |||
207 | Defaults to ``True``. | ||
208 | |||
209 | :arg alphabetical_attributes: Reorder attributes to be in alphabetical order. | ||
210 | |||
211 | Defaults to ``False``. | ||
212 | |||
213 | """ | ||
214 | unexpected_args = frozenset(kwargs) - frozenset(self.options) | ||
215 | if len(unexpected_args) > 0: | ||
216 | raise TypeError("__init__() got an unexpected keyword argument '%s'" % next(iter(unexpected_args))) | ||
217 | if 'quote_char' in kwargs: | ||
218 | self.use_best_quote_char = False | ||
219 | for attr in self.options: | ||
220 | setattr(self, attr, kwargs.get(attr, getattr(self, attr))) | ||
221 | self.errors = [] | ||
222 | self.strict = False | ||
223 | |||
224 | def encode(self, string): | ||
225 | assert(isinstance(string, text_type)) | ||
226 | if self.encoding: | ||
227 | return string.encode(self.encoding, "htmlentityreplace") | ||
228 | else: | ||
229 | return string | ||
230 | |||
231 | def encodeStrict(self, string): | ||
232 | assert(isinstance(string, text_type)) | ||
233 | if self.encoding: | ||
234 | return string.encode(self.encoding, "strict") | ||
235 | else: | ||
236 | return string | ||
237 | |||
238 | def serialize(self, treewalker, encoding=None): | ||
239 | # pylint:disable=too-many-nested-blocks | ||
240 | self.encoding = encoding | ||
241 | in_cdata = False | ||
242 | self.errors = [] | ||
243 | |||
244 | if encoding and self.inject_meta_charset: | ||
245 | from .filters.inject_meta_charset import Filter | ||
246 | treewalker = Filter(treewalker, encoding) | ||
247 | # Alphabetical attributes is here under the assumption that none of | ||
248 | # the later filters add or change order of attributes; it needs to be | ||
249 | # before the sanitizer so escaped elements come out correctly | ||
250 | if self.alphabetical_attributes: | ||
251 | from .filters.alphabeticalattributes import Filter | ||
252 | treewalker = Filter(treewalker) | ||
253 | # WhitespaceFilter should be used before OptionalTagFilter | ||
254 | # for maximum efficiently of this latter filter | ||
255 | if self.strip_whitespace: | ||
256 | from .filters.whitespace import Filter | ||
257 | treewalker = Filter(treewalker) | ||
258 | if self.sanitize: | ||
259 | from .filters.sanitizer import Filter | ||
260 | treewalker = Filter(treewalker) | ||
261 | if self.omit_optional_tags: | ||
262 | from .filters.optionaltags import Filter | ||
263 | treewalker = Filter(treewalker) | ||
264 | |||
265 | for token in treewalker: | ||
266 | type = token["type"] | ||
267 | if type == "Doctype": | ||
268 | doctype = "<!DOCTYPE %s" % token["name"] | ||
269 | |||
270 | if token["publicId"]: | ||
271 | doctype += ' PUBLIC "%s"' % token["publicId"] | ||
272 | elif token["systemId"]: | ||
273 | doctype += " SYSTEM" | ||
274 | if token["systemId"]: | ||
275 | if token["systemId"].find('"') >= 0: | ||
276 | if token["systemId"].find("'") >= 0: | ||
277 | self.serializeError("System identifer contains both single and double quote characters") | ||
278 | quote_char = "'" | ||
279 | else: | ||
280 | quote_char = '"' | ||
281 | doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char) | ||
282 | |||
283 | doctype += ">" | ||
284 | yield self.encodeStrict(doctype) | ||
285 | |||
286 | elif type in ("Characters", "SpaceCharacters"): | ||
287 | if type == "SpaceCharacters" or in_cdata: | ||
288 | if in_cdata and token["data"].find("</") >= 0: | ||
289 | self.serializeError("Unexpected </ in CDATA") | ||
290 | yield self.encode(token["data"]) | ||
291 | else: | ||
292 | yield self.encode(escape(token["data"])) | ||
293 | |||
294 | elif type in ("StartTag", "EmptyTag"): | ||
295 | name = token["name"] | ||
296 | yield self.encodeStrict("<%s" % name) | ||
297 | if name in rcdataElements and not self.escape_rcdata: | ||
298 | in_cdata = True | ||
299 | elif in_cdata: | ||
300 | self.serializeError("Unexpected child element of a CDATA element") | ||
301 | for (_, attr_name), attr_value in token["data"].items(): | ||
302 | # TODO: Add namespace support here | ||
303 | k = attr_name | ||
304 | v = attr_value | ||
305 | yield self.encodeStrict(' ') | ||
306 | |||
307 | yield self.encodeStrict(k) | ||
308 | if not self.minimize_boolean_attributes or \ | ||
309 | (k not in booleanAttributes.get(name, tuple()) and | ||
310 | k not in booleanAttributes.get("", tuple())): | ||
311 | yield self.encodeStrict("=") | ||
312 | if self.quote_attr_values == "always" or len(v) == 0: | ||
313 | quote_attr = True | ||
314 | elif self.quote_attr_values == "spec": | ||
315 | quote_attr = _quoteAttributeSpec.search(v) is not None | ||
316 | elif self.quote_attr_values == "legacy": | ||
317 | quote_attr = _quoteAttributeLegacy.search(v) is not None | ||
318 | else: | ||
319 | raise ValueError("quote_attr_values must be one of: " | ||
320 | "'always', 'spec', or 'legacy'") | ||
321 | v = v.replace("&", "&") | ||
322 | if self.escape_lt_in_attrs: | ||
323 | v = v.replace("<", "<") | ||
324 | if quote_attr: | ||
325 | quote_char = self.quote_char | ||
326 | if self.use_best_quote_char: | ||
327 | if "'" in v and '"' not in v: | ||
328 | quote_char = '"' | ||
329 | elif '"' in v and "'" not in v: | ||
330 | quote_char = "'" | ||
331 | if quote_char == "'": | ||
332 | v = v.replace("'", "'") | ||
333 | else: | ||
334 | v = v.replace('"', """) | ||
335 | yield self.encodeStrict(quote_char) | ||
336 | yield self.encode(v) | ||
337 | yield self.encodeStrict(quote_char) | ||
338 | else: | ||
339 | yield self.encode(v) | ||
340 | if name in voidElements and self.use_trailing_solidus: | ||
341 | if self.space_before_trailing_solidus: | ||
342 | yield self.encodeStrict(" /") | ||
343 | else: | ||
344 | yield self.encodeStrict("/") | ||
345 | yield self.encode(">") | ||
346 | |||
347 | elif type == "EndTag": | ||
348 | name = token["name"] | ||
349 | if name in rcdataElements: | ||
350 | in_cdata = False | ||
351 | elif in_cdata: | ||
352 | self.serializeError("Unexpected child element of a CDATA element") | ||
353 | yield self.encodeStrict("</%s>" % name) | ||
354 | |||
355 | elif type == "Comment": | ||
356 | data = token["data"] | ||
357 | if data.find("--") >= 0: | ||
358 | self.serializeError("Comment contains --") | ||
359 | yield self.encodeStrict("<!--%s-->" % token["data"]) | ||
360 | |||
361 | elif type == "Entity": | ||
362 | name = token["name"] | ||
363 | key = name + ";" | ||
364 | if key not in entities: | ||
365 | self.serializeError("Entity %s not recognized" % name) | ||
366 | if self.resolve_entities and key not in xmlEntities: | ||
367 | data = entities[key] | ||
368 | else: | ||
369 | data = "&%s;" % name | ||
370 | yield self.encodeStrict(data) | ||
371 | |||
372 | else: | ||
373 | self.serializeError(token["data"]) | ||
374 | |||
375 | def render(self, treewalker, encoding=None): | ||
376 | """Serializes the stream from the treewalker into a string | ||
377 | |||
378 | :arg treewalker: the treewalker to serialize | ||
379 | |||
380 | :arg encoding: the string encoding to use | ||
381 | |||
382 | :returns: the serialized tree | ||
383 | |||
384 | Example: | ||
385 | |||
386 | >>> from html5lib import parse, getTreeWalker | ||
387 | >>> from html5lib.serializer import HTMLSerializer | ||
388 | >>> token_stream = parse('<html><body>Hi!</body></html>') | ||
389 | >>> walker = getTreeWalker('etree') | ||
390 | >>> serializer = HTMLSerializer(omit_optional_tags=False) | ||
391 | >>> serializer.render(walker(token_stream)) | ||
392 | '<html><head></head><body>Hi!</body></html>' | ||
393 | |||
394 | """ | ||
395 | if encoding: | ||
396 | return b"".join(list(self.serialize(treewalker, encoding))) | ||
397 | else: | ||
398 | return "".join(list(self.serialize(treewalker))) | ||
399 | |||
400 | def serializeError(self, data="XXX ERROR MESSAGE NEEDED"): | ||
401 | # XXX The idea is to make data mandatory. | ||
402 | self.errors.append(data) | ||
403 | if self.strict: | ||
404 | raise SerializeError | ||
405 | |||
406 | |||
407 | class SerializeError(Exception): | ||
408 | """Error in serialized tree""" | ||
409 | pass | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/__init__.py new file mode 100644 index 0000000..8767fb0 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/__init__.py | |||
@@ -0,0 +1,30 @@ | |||
1 | """Tree adapters let you convert from one tree structure to another | ||
2 | |||
3 | Example: | ||
4 | |||
5 | .. code-block:: python | ||
6 | |||
7 | from pip._vendor import html5lib | ||
8 | from pip._vendor.html5lib.treeadapters import genshi | ||
9 | |||
10 | doc = '<html><body>Hi!</body></html>' | ||
11 | treebuilder = html5lib.getTreeBuilder('etree') | ||
12 | parser = html5lib.HTMLParser(tree=treebuilder) | ||
13 | tree = parser.parse(doc) | ||
14 | TreeWalker = html5lib.getTreeWalker('etree') | ||
15 | |||
16 | genshi_tree = genshi.to_genshi(TreeWalker(tree)) | ||
17 | |||
18 | """ | ||
19 | from __future__ import absolute_import, division, unicode_literals | ||
20 | |||
21 | from . import sax | ||
22 | |||
23 | __all__ = ["sax"] | ||
24 | |||
25 | try: | ||
26 | from . import genshi # noqa | ||
27 | except ImportError: | ||
28 | pass | ||
29 | else: | ||
30 | __all__.append("genshi") | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/genshi.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/genshi.py new file mode 100644 index 0000000..73c70c6 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/genshi.py | |||
@@ -0,0 +1,54 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from genshi.core import QName, Attrs | ||
4 | from genshi.core import START, END, TEXT, COMMENT, DOCTYPE | ||
5 | |||
6 | |||
7 | def to_genshi(walker): | ||
8 | """Convert a tree to a genshi tree | ||
9 | |||
10 | :arg walker: the treewalker to use to walk the tree to convert it | ||
11 | |||
12 | :returns: generator of genshi nodes | ||
13 | |||
14 | """ | ||
15 | text = [] | ||
16 | for token in walker: | ||
17 | type = token["type"] | ||
18 | if type in ("Characters", "SpaceCharacters"): | ||
19 | text.append(token["data"]) | ||
20 | elif text: | ||
21 | yield TEXT, "".join(text), (None, -1, -1) | ||
22 | text = [] | ||
23 | |||
24 | if type in ("StartTag", "EmptyTag"): | ||
25 | if token["namespace"]: | ||
26 | name = "{%s}%s" % (token["namespace"], token["name"]) | ||
27 | else: | ||
28 | name = token["name"] | ||
29 | attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value) | ||
30 | for attr, value in token["data"].items()]) | ||
31 | yield (START, (QName(name), attrs), (None, -1, -1)) | ||
32 | if type == "EmptyTag": | ||
33 | type = "EndTag" | ||
34 | |||
35 | if type == "EndTag": | ||
36 | if token["namespace"]: | ||
37 | name = "{%s}%s" % (token["namespace"], token["name"]) | ||
38 | else: | ||
39 | name = token["name"] | ||
40 | |||
41 | yield END, QName(name), (None, -1, -1) | ||
42 | |||
43 | elif type == "Comment": | ||
44 | yield COMMENT, token["data"], (None, -1, -1) | ||
45 | |||
46 | elif type == "Doctype": | ||
47 | yield DOCTYPE, (token["name"], token["publicId"], | ||
48 | token["systemId"]), (None, -1, -1) | ||
49 | |||
50 | else: | ||
51 | pass # FIXME: What to do? | ||
52 | |||
53 | if text: | ||
54 | yield TEXT, "".join(text), (None, -1, -1) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/sax.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/sax.py new file mode 100644 index 0000000..1f06d13 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treeadapters/sax.py | |||
@@ -0,0 +1,50 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from xml.sax.xmlreader import AttributesNSImpl | ||
4 | |||
5 | from ..constants import adjustForeignAttributes, unadjustForeignAttributes | ||
6 | |||
7 | prefix_mapping = {} | ||
8 | for prefix, localName, namespace in adjustForeignAttributes.values(): | ||
9 | if prefix is not None: | ||
10 | prefix_mapping[prefix] = namespace | ||
11 | |||
12 | |||
13 | def to_sax(walker, handler): | ||
14 | """Call SAX-like content handler based on treewalker walker | ||
15 | |||
16 | :arg walker: the treewalker to use to walk the tree to convert it | ||
17 | |||
18 | :arg handler: SAX handler to use | ||
19 | |||
20 | """ | ||
21 | handler.startDocument() | ||
22 | for prefix, namespace in prefix_mapping.items(): | ||
23 | handler.startPrefixMapping(prefix, namespace) | ||
24 | |||
25 | for token in walker: | ||
26 | type = token["type"] | ||
27 | if type == "Doctype": | ||
28 | continue | ||
29 | elif type in ("StartTag", "EmptyTag"): | ||
30 | attrs = AttributesNSImpl(token["data"], | ||
31 | unadjustForeignAttributes) | ||
32 | handler.startElementNS((token["namespace"], token["name"]), | ||
33 | token["name"], | ||
34 | attrs) | ||
35 | if type == "EmptyTag": | ||
36 | handler.endElementNS((token["namespace"], token["name"]), | ||
37 | token["name"]) | ||
38 | elif type == "EndTag": | ||
39 | handler.endElementNS((token["namespace"], token["name"]), | ||
40 | token["name"]) | ||
41 | elif type in ("Characters", "SpaceCharacters"): | ||
42 | handler.characters(token["data"]) | ||
43 | elif type == "Comment": | ||
44 | pass | ||
45 | else: | ||
46 | assert False, "Unknown token type" | ||
47 | |||
48 | for prefix, namespace in prefix_mapping.items(): | ||
49 | handler.endPrefixMapping(prefix) | ||
50 | handler.endDocument() | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/__init__.py new file mode 100644 index 0000000..2ce5c87 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/__init__.py | |||
@@ -0,0 +1,88 @@ | |||
1 | """A collection of modules for building different kinds of trees from HTML | ||
2 | documents. | ||
3 | |||
4 | To create a treebuilder for a new type of tree, you need to do | ||
5 | implement several things: | ||
6 | |||
7 | 1. A set of classes for various types of elements: Document, Doctype, Comment, | ||
8 | Element. These must implement the interface of ``base.treebuilders.Node`` | ||
9 | (although comment nodes have a different signature for their constructor, | ||
10 | see ``treebuilders.etree.Comment``) Textual content may also be implemented | ||
11 | as another node type, or not, as your tree implementation requires. | ||
12 | |||
13 | 2. A treebuilder object (called ``TreeBuilder`` by convention) that inherits | ||
14 | from ``treebuilders.base.TreeBuilder``. This has 4 required attributes: | ||
15 | |||
16 | * ``documentClass`` - the class to use for the bottommost node of a document | ||
17 | * ``elementClass`` - the class to use for HTML Elements | ||
18 | * ``commentClass`` - the class to use for comments | ||
19 | * ``doctypeClass`` - the class to use for doctypes | ||
20 | |||
21 | It also has one required method: | ||
22 | |||
23 | * ``getDocument`` - Returns the root node of the complete document tree | ||
24 | |||
25 | 3. If you wish to run the unit tests, you must also create a ``testSerializer`` | ||
26 | method on your treebuilder which accepts a node and returns a string | ||
27 | containing Node and its children serialized according to the format used in | ||
28 | the unittests | ||
29 | |||
30 | """ | ||
31 | |||
32 | from __future__ import absolute_import, division, unicode_literals | ||
33 | |||
34 | from .._utils import default_etree | ||
35 | |||
36 | treeBuilderCache = {} | ||
37 | |||
38 | |||
39 | def getTreeBuilder(treeType, implementation=None, **kwargs): | ||
40 | """Get a TreeBuilder class for various types of trees with built-in support | ||
41 | |||
42 | :arg treeType: the name of the tree type required (case-insensitive). Supported | ||
43 | values are: | ||
44 | |||
45 | * "dom" - A generic builder for DOM implementations, defaulting to a | ||
46 | xml.dom.minidom based implementation. | ||
47 | * "etree" - A generic builder for tree implementations exposing an | ||
48 | ElementTree-like interface, defaulting to xml.etree.cElementTree if | ||
49 | available and xml.etree.ElementTree if not. | ||
50 | * "lxml" - A etree-based builder for lxml.etree, handling limitations | ||
51 | of lxml's implementation. | ||
52 | |||
53 | :arg implementation: (Currently applies to the "etree" and "dom" tree | ||
54 | types). A module implementing the tree type e.g. xml.etree.ElementTree | ||
55 | or xml.etree.cElementTree. | ||
56 | |||
57 | :arg kwargs: Any additional options to pass to the TreeBuilder when | ||
58 | creating it. | ||
59 | |||
60 | Example: | ||
61 | |||
62 | >>> from html5lib.treebuilders import getTreeBuilder | ||
63 | >>> builder = getTreeBuilder('etree') | ||
64 | |||
65 | """ | ||
66 | |||
67 | treeType = treeType.lower() | ||
68 | if treeType not in treeBuilderCache: | ||
69 | if treeType == "dom": | ||
70 | from . import dom | ||
71 | # Come up with a sane default (pref. from the stdlib) | ||
72 | if implementation is None: | ||
73 | from xml.dom import minidom | ||
74 | implementation = minidom | ||
75 | # NEVER cache here, caching is done in the dom submodule | ||
76 | return dom.getDomModule(implementation, **kwargs).TreeBuilder | ||
77 | elif treeType == "lxml": | ||
78 | from . import etree_lxml | ||
79 | treeBuilderCache[treeType] = etree_lxml.TreeBuilder | ||
80 | elif treeType == "etree": | ||
81 | from . import etree | ||
82 | if implementation is None: | ||
83 | implementation = default_etree | ||
84 | # NEVER cache here, caching is done in the etree submodule | ||
85 | return etree.getETreeModule(implementation, **kwargs).TreeBuilder | ||
86 | else: | ||
87 | raise ValueError("""Unrecognised treebuilder "%s" """ % treeType) | ||
88 | return treeBuilderCache.get(treeType) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/base.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/base.py new file mode 100644 index 0000000..ed32fcb --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/base.py | |||
@@ -0,0 +1,417 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | from pip._vendor.six import text_type | ||
3 | |||
4 | from ..constants import scopingElements, tableInsertModeElements, namespaces | ||
5 | |||
6 | # The scope markers are inserted when entering object elements, | ||
7 | # marquees, table cells, and table captions, and are used to prevent formatting | ||
8 | # from "leaking" into tables, object elements, and marquees. | ||
9 | Marker = None | ||
10 | |||
11 | listElementsMap = { | ||
12 | None: (frozenset(scopingElements), False), | ||
13 | "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False), | ||
14 | "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"), | ||
15 | (namespaces["html"], "ul")])), False), | ||
16 | "table": (frozenset([(namespaces["html"], "html"), | ||
17 | (namespaces["html"], "table")]), False), | ||
18 | "select": (frozenset([(namespaces["html"], "optgroup"), | ||
19 | (namespaces["html"], "option")]), True) | ||
20 | } | ||
21 | |||
22 | |||
23 | class Node(object): | ||
24 | """Represents an item in the tree""" | ||
25 | def __init__(self, name): | ||
26 | """Creates a Node | ||
27 | |||
28 | :arg name: The tag name associated with the node | ||
29 | |||
30 | """ | ||
31 | # The tag name assocaited with the node | ||
32 | self.name = name | ||
33 | # The parent of the current node (or None for the document node) | ||
34 | self.parent = None | ||
35 | # The value of the current node (applies to text nodes and comments) | ||
36 | self.value = None | ||
37 | # A dict holding name -> value pairs for attributes of the node | ||
38 | self.attributes = {} | ||
39 | # A list of child nodes of the current node. This must include all | ||
40 | # elements but not necessarily other node types. | ||
41 | self.childNodes = [] | ||
42 | # A list of miscellaneous flags that can be set on the node. | ||
43 | self._flags = [] | ||
44 | |||
45 | def __str__(self): | ||
46 | attributesStr = " ".join(["%s=\"%s\"" % (name, value) | ||
47 | for name, value in | ||
48 | self.attributes.items()]) | ||
49 | if attributesStr: | ||
50 | return "<%s %s>" % (self.name, attributesStr) | ||
51 | else: | ||
52 | return "<%s>" % (self.name) | ||
53 | |||
54 | def __repr__(self): | ||
55 | return "<%s>" % (self.name) | ||
56 | |||
57 | def appendChild(self, node): | ||
58 | """Insert node as a child of the current node | ||
59 | |||
60 | :arg node: the node to insert | ||
61 | |||
62 | """ | ||
63 | raise NotImplementedError | ||
64 | |||
65 | def insertText(self, data, insertBefore=None): | ||
66 | """Insert data as text in the current node, positioned before the | ||
67 | start of node insertBefore or to the end of the node's text. | ||
68 | |||
69 | :arg data: the data to insert | ||
70 | |||
71 | :arg insertBefore: True if you want to insert the text before the node | ||
72 | and False if you want to insert it after the node | ||
73 | |||
74 | """ | ||
75 | raise NotImplementedError | ||
76 | |||
77 | def insertBefore(self, node, refNode): | ||
78 | """Insert node as a child of the current node, before refNode in the | ||
79 | list of child nodes. Raises ValueError if refNode is not a child of | ||
80 | the current node | ||
81 | |||
82 | :arg node: the node to insert | ||
83 | |||
84 | :arg refNode: the child node to insert the node before | ||
85 | |||
86 | """ | ||
87 | raise NotImplementedError | ||
88 | |||
89 | def removeChild(self, node): | ||
90 | """Remove node from the children of the current node | ||
91 | |||
92 | :arg node: the child node to remove | ||
93 | |||
94 | """ | ||
95 | raise NotImplementedError | ||
96 | |||
97 | def reparentChildren(self, newParent): | ||
98 | """Move all the children of the current node to newParent. | ||
99 | This is needed so that trees that don't store text as nodes move the | ||
100 | text in the correct way | ||
101 | |||
102 | :arg newParent: the node to move all this node's children to | ||
103 | |||
104 | """ | ||
105 | # XXX - should this method be made more general? | ||
106 | for child in self.childNodes: | ||
107 | newParent.appendChild(child) | ||
108 | self.childNodes = [] | ||
109 | |||
110 | def cloneNode(self): | ||
111 | """Return a shallow copy of the current node i.e. a node with the same | ||
112 | name and attributes but with no parent or child nodes | ||
113 | """ | ||
114 | raise NotImplementedError | ||
115 | |||
116 | def hasContent(self): | ||
117 | """Return true if the node has children or text, false otherwise | ||
118 | """ | ||
119 | raise NotImplementedError | ||
120 | |||
121 | |||
122 | class ActiveFormattingElements(list): | ||
123 | def append(self, node): | ||
124 | equalCount = 0 | ||
125 | if node != Marker: | ||
126 | for element in self[::-1]: | ||
127 | if element == Marker: | ||
128 | break | ||
129 | if self.nodesEqual(element, node): | ||
130 | equalCount += 1 | ||
131 | if equalCount == 3: | ||
132 | self.remove(element) | ||
133 | break | ||
134 | list.append(self, node) | ||
135 | |||
136 | def nodesEqual(self, node1, node2): | ||
137 | if not node1.nameTuple == node2.nameTuple: | ||
138 | return False | ||
139 | |||
140 | if not node1.attributes == node2.attributes: | ||
141 | return False | ||
142 | |||
143 | return True | ||
144 | |||
145 | |||
146 | class TreeBuilder(object): | ||
147 | """Base treebuilder implementation | ||
148 | |||
149 | * documentClass - the class to use for the bottommost node of a document | ||
150 | * elementClass - the class to use for HTML Elements | ||
151 | * commentClass - the class to use for comments | ||
152 | * doctypeClass - the class to use for doctypes | ||
153 | |||
154 | """ | ||
155 | # pylint:disable=not-callable | ||
156 | |||
157 | # Document class | ||
158 | documentClass = None | ||
159 | |||
160 | # The class to use for creating a node | ||
161 | elementClass = None | ||
162 | |||
163 | # The class to use for creating comments | ||
164 | commentClass = None | ||
165 | |||
166 | # The class to use for creating doctypes | ||
167 | doctypeClass = None | ||
168 | |||
169 | # Fragment class | ||
170 | fragmentClass = None | ||
171 | |||
172 | def __init__(self, namespaceHTMLElements): | ||
173 | """Create a TreeBuilder | ||
174 | |||
175 | :arg namespaceHTMLElements: whether or not to namespace HTML elements | ||
176 | |||
177 | """ | ||
178 | if namespaceHTMLElements: | ||
179 | self.defaultNamespace = "http://www.w3.org/1999/xhtml" | ||
180 | else: | ||
181 | self.defaultNamespace = None | ||
182 | self.reset() | ||
183 | |||
184 | def reset(self): | ||
185 | self.openElements = [] | ||
186 | self.activeFormattingElements = ActiveFormattingElements() | ||
187 | |||
188 | # XXX - rename these to headElement, formElement | ||
189 | self.headPointer = None | ||
190 | self.formPointer = None | ||
191 | |||
192 | self.insertFromTable = False | ||
193 | |||
194 | self.document = self.documentClass() | ||
195 | |||
196 | def elementInScope(self, target, variant=None): | ||
197 | |||
198 | # If we pass a node in we match that. if we pass a string | ||
199 | # match any node with that name | ||
200 | exactNode = hasattr(target, "nameTuple") | ||
201 | if not exactNode: | ||
202 | if isinstance(target, text_type): | ||
203 | target = (namespaces["html"], target) | ||
204 | assert isinstance(target, tuple) | ||
205 | |||
206 | listElements, invert = listElementsMap[variant] | ||
207 | |||
208 | for node in reversed(self.openElements): | ||
209 | if exactNode and node == target: | ||
210 | return True | ||
211 | elif not exactNode and node.nameTuple == target: | ||
212 | return True | ||
213 | elif (invert ^ (node.nameTuple in listElements)): | ||
214 | return False | ||
215 | |||
216 | assert False # We should never reach this point | ||
217 | |||
218 | def reconstructActiveFormattingElements(self): | ||
219 | # Within this algorithm the order of steps described in the | ||
220 | # specification is not quite the same as the order of steps in the | ||
221 | # code. It should still do the same though. | ||
222 | |||
223 | # Step 1: stop the algorithm when there's nothing to do. | ||
224 | if not self.activeFormattingElements: | ||
225 | return | ||
226 | |||
227 | # Step 2 and step 3: we start with the last element. So i is -1. | ||
228 | i = len(self.activeFormattingElements) - 1 | ||
229 | entry = self.activeFormattingElements[i] | ||
230 | if entry == Marker or entry in self.openElements: | ||
231 | return | ||
232 | |||
233 | # Step 6 | ||
234 | while entry != Marker and entry not in self.openElements: | ||
235 | if i == 0: | ||
236 | # This will be reset to 0 below | ||
237 | i = -1 | ||
238 | break | ||
239 | i -= 1 | ||
240 | # Step 5: let entry be one earlier in the list. | ||
241 | entry = self.activeFormattingElements[i] | ||
242 | |||
243 | while True: | ||
244 | # Step 7 | ||
245 | i += 1 | ||
246 | |||
247 | # Step 8 | ||
248 | entry = self.activeFormattingElements[i] | ||
249 | clone = entry.cloneNode() # Mainly to get a new copy of the attributes | ||
250 | |||
251 | # Step 9 | ||
252 | element = self.insertElement({"type": "StartTag", | ||
253 | "name": clone.name, | ||
254 | "namespace": clone.namespace, | ||
255 | "data": clone.attributes}) | ||
256 | |||
257 | # Step 10 | ||
258 | self.activeFormattingElements[i] = element | ||
259 | |||
260 | # Step 11 | ||
261 | if element == self.activeFormattingElements[-1]: | ||
262 | break | ||
263 | |||
264 | def clearActiveFormattingElements(self): | ||
265 | entry = self.activeFormattingElements.pop() | ||
266 | while self.activeFormattingElements and entry != Marker: | ||
267 | entry = self.activeFormattingElements.pop() | ||
268 | |||
269 | def elementInActiveFormattingElements(self, name): | ||
270 | """Check if an element exists between the end of the active | ||
271 | formatting elements and the last marker. If it does, return it, else | ||
272 | return false""" | ||
273 | |||
274 | for item in self.activeFormattingElements[::-1]: | ||
275 | # Check for Marker first because if it's a Marker it doesn't have a | ||
276 | # name attribute. | ||
277 | if item == Marker: | ||
278 | break | ||
279 | elif item.name == name: | ||
280 | return item | ||
281 | return False | ||
282 | |||
283 | def insertRoot(self, token): | ||
284 | element = self.createElement(token) | ||
285 | self.openElements.append(element) | ||
286 | self.document.appendChild(element) | ||
287 | |||
288 | def insertDoctype(self, token): | ||
289 | name = token["name"] | ||
290 | publicId = token["publicId"] | ||
291 | systemId = token["systemId"] | ||
292 | |||
293 | doctype = self.doctypeClass(name, publicId, systemId) | ||
294 | self.document.appendChild(doctype) | ||
295 | |||
296 | def insertComment(self, token, parent=None): | ||
297 | if parent is None: | ||
298 | parent = self.openElements[-1] | ||
299 | parent.appendChild(self.commentClass(token["data"])) | ||
300 | |||
301 | def createElement(self, token): | ||
302 | """Create an element but don't insert it anywhere""" | ||
303 | name = token["name"] | ||
304 | namespace = token.get("namespace", self.defaultNamespace) | ||
305 | element = self.elementClass(name, namespace) | ||
306 | element.attributes = token["data"] | ||
307 | return element | ||
308 | |||
309 | def _getInsertFromTable(self): | ||
310 | return self._insertFromTable | ||
311 | |||
312 | def _setInsertFromTable(self, value): | ||
313 | """Switch the function used to insert an element from the | ||
314 | normal one to the misnested table one and back again""" | ||
315 | self._insertFromTable = value | ||
316 | if value: | ||
317 | self.insertElement = self.insertElementTable | ||
318 | else: | ||
319 | self.insertElement = self.insertElementNormal | ||
320 | |||
321 | insertFromTable = property(_getInsertFromTable, _setInsertFromTable) | ||
322 | |||
323 | def insertElementNormal(self, token): | ||
324 | name = token["name"] | ||
325 | assert isinstance(name, text_type), "Element %s not unicode" % name | ||
326 | namespace = token.get("namespace", self.defaultNamespace) | ||
327 | element = self.elementClass(name, namespace) | ||
328 | element.attributes = token["data"] | ||
329 | self.openElements[-1].appendChild(element) | ||
330 | self.openElements.append(element) | ||
331 | return element | ||
332 | |||
333 | def insertElementTable(self, token): | ||
334 | """Create an element and insert it into the tree""" | ||
335 | element = self.createElement(token) | ||
336 | if self.openElements[-1].name not in tableInsertModeElements: | ||
337 | return self.insertElementNormal(token) | ||
338 | else: | ||
339 | # We should be in the InTable mode. This means we want to do | ||
340 | # special magic element rearranging | ||
341 | parent, insertBefore = self.getTableMisnestedNodePosition() | ||
342 | if insertBefore is None: | ||
343 | parent.appendChild(element) | ||
344 | else: | ||
345 | parent.insertBefore(element, insertBefore) | ||
346 | self.openElements.append(element) | ||
347 | return element | ||
348 | |||
349 | def insertText(self, data, parent=None): | ||
350 | """Insert text data.""" | ||
351 | if parent is None: | ||
352 | parent = self.openElements[-1] | ||
353 | |||
354 | if (not self.insertFromTable or (self.insertFromTable and | ||
355 | self.openElements[-1].name | ||
356 | not in tableInsertModeElements)): | ||
357 | parent.insertText(data) | ||
358 | else: | ||
359 | # We should be in the InTable mode. This means we want to do | ||
360 | # special magic element rearranging | ||
361 | parent, insertBefore = self.getTableMisnestedNodePosition() | ||
362 | parent.insertText(data, insertBefore) | ||
363 | |||
364 | def getTableMisnestedNodePosition(self): | ||
365 | """Get the foster parent element, and sibling to insert before | ||
366 | (or None) when inserting a misnested table node""" | ||
367 | # The foster parent element is the one which comes before the most | ||
368 | # recently opened table element | ||
369 | # XXX - this is really inelegant | ||
370 | lastTable = None | ||
371 | fosterParent = None | ||
372 | insertBefore = None | ||
373 | for elm in self.openElements[::-1]: | ||
374 | if elm.name == "table": | ||
375 | lastTable = elm | ||
376 | break | ||
377 | if lastTable: | ||
378 | # XXX - we should really check that this parent is actually a | ||
379 | # node here | ||
380 | if lastTable.parent: | ||
381 | fosterParent = lastTable.parent | ||
382 | insertBefore = lastTable | ||
383 | else: | ||
384 | fosterParent = self.openElements[ | ||
385 | self.openElements.index(lastTable) - 1] | ||
386 | else: | ||
387 | fosterParent = self.openElements[0] | ||
388 | return fosterParent, insertBefore | ||
389 | |||
390 | def generateImpliedEndTags(self, exclude=None): | ||
391 | name = self.openElements[-1].name | ||
392 | # XXX td, th and tr are not actually needed | ||
393 | if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and | ||
394 | name != exclude): | ||
395 | self.openElements.pop() | ||
396 | # XXX This is not entirely what the specification says. We should | ||
397 | # investigate it more closely. | ||
398 | self.generateImpliedEndTags(exclude) | ||
399 | |||
400 | def getDocument(self): | ||
401 | """Return the final tree""" | ||
402 | return self.document | ||
403 | |||
404 | def getFragment(self): | ||
405 | """Return the final fragment""" | ||
406 | # assert self.innerHTML | ||
407 | fragment = self.fragmentClass() | ||
408 | self.openElements[0].reparentChildren(fragment) | ||
409 | return fragment | ||
410 | |||
411 | def testSerializer(self, node): | ||
412 | """Serialize the subtree of node in the format required by unit tests | ||
413 | |||
414 | :arg node: the node from which to start serializing | ||
415 | |||
416 | """ | ||
417 | raise NotImplementedError | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/dom.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/dom.py new file mode 100644 index 0000000..8117b2d --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/dom.py | |||
@@ -0,0 +1,236 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | |||
4 | from collections import MutableMapping | ||
5 | from xml.dom import minidom, Node | ||
6 | import weakref | ||
7 | |||
8 | from . import base | ||
9 | from .. import constants | ||
10 | from ..constants import namespaces | ||
11 | from .._utils import moduleFactoryFactory | ||
12 | |||
13 | |||
14 | def getDomBuilder(DomImplementation): | ||
15 | Dom = DomImplementation | ||
16 | |||
17 | class AttrList(MutableMapping): | ||
18 | def __init__(self, element): | ||
19 | self.element = element | ||
20 | |||
21 | def __iter__(self): | ||
22 | return iter(self.element.attributes.keys()) | ||
23 | |||
24 | def __setitem__(self, name, value): | ||
25 | if isinstance(name, tuple): | ||
26 | raise NotImplementedError | ||
27 | else: | ||
28 | attr = self.element.ownerDocument.createAttribute(name) | ||
29 | attr.value = value | ||
30 | self.element.attributes[name] = attr | ||
31 | |||
32 | def __len__(self): | ||
33 | return len(self.element.attributes) | ||
34 | |||
35 | def items(self): | ||
36 | return list(self.element.attributes.items()) | ||
37 | |||
38 | def values(self): | ||
39 | return list(self.element.attributes.values()) | ||
40 | |||
41 | def __getitem__(self, name): | ||
42 | if isinstance(name, tuple): | ||
43 | raise NotImplementedError | ||
44 | else: | ||
45 | return self.element.attributes[name].value | ||
46 | |||
47 | def __delitem__(self, name): | ||
48 | if isinstance(name, tuple): | ||
49 | raise NotImplementedError | ||
50 | else: | ||
51 | del self.element.attributes[name] | ||
52 | |||
53 | class NodeBuilder(base.Node): | ||
54 | def __init__(self, element): | ||
55 | base.Node.__init__(self, element.nodeName) | ||
56 | self.element = element | ||
57 | |||
58 | namespace = property(lambda self: hasattr(self.element, "namespaceURI") and | ||
59 | self.element.namespaceURI or None) | ||
60 | |||
61 | def appendChild(self, node): | ||
62 | node.parent = self | ||
63 | self.element.appendChild(node.element) | ||
64 | |||
65 | def insertText(self, data, insertBefore=None): | ||
66 | text = self.element.ownerDocument.createTextNode(data) | ||
67 | if insertBefore: | ||
68 | self.element.insertBefore(text, insertBefore.element) | ||
69 | else: | ||
70 | self.element.appendChild(text) | ||
71 | |||
72 | def insertBefore(self, node, refNode): | ||
73 | self.element.insertBefore(node.element, refNode.element) | ||
74 | node.parent = self | ||
75 | |||
76 | def removeChild(self, node): | ||
77 | if node.element.parentNode == self.element: | ||
78 | self.element.removeChild(node.element) | ||
79 | node.parent = None | ||
80 | |||
81 | def reparentChildren(self, newParent): | ||
82 | while self.element.hasChildNodes(): | ||
83 | child = self.element.firstChild | ||
84 | self.element.removeChild(child) | ||
85 | newParent.element.appendChild(child) | ||
86 | self.childNodes = [] | ||
87 | |||
88 | def getAttributes(self): | ||
89 | return AttrList(self.element) | ||
90 | |||
91 | def setAttributes(self, attributes): | ||
92 | if attributes: | ||
93 | for name, value in list(attributes.items()): | ||
94 | if isinstance(name, tuple): | ||
95 | if name[0] is not None: | ||
96 | qualifiedName = (name[0] + ":" + name[1]) | ||
97 | else: | ||
98 | qualifiedName = name[1] | ||
99 | self.element.setAttributeNS(name[2], qualifiedName, | ||
100 | value) | ||
101 | else: | ||
102 | self.element.setAttribute( | ||
103 | name, value) | ||
104 | attributes = property(getAttributes, setAttributes) | ||
105 | |||
106 | def cloneNode(self): | ||
107 | return NodeBuilder(self.element.cloneNode(False)) | ||
108 | |||
109 | def hasContent(self): | ||
110 | return self.element.hasChildNodes() | ||
111 | |||
112 | def getNameTuple(self): | ||
113 | if self.namespace is None: | ||
114 | return namespaces["html"], self.name | ||
115 | else: | ||
116 | return self.namespace, self.name | ||
117 | |||
118 | nameTuple = property(getNameTuple) | ||
119 | |||
120 | class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable | ||
121 | def documentClass(self): | ||
122 | self.dom = Dom.getDOMImplementation().createDocument(None, None, None) | ||
123 | return weakref.proxy(self) | ||
124 | |||
125 | def insertDoctype(self, token): | ||
126 | name = token["name"] | ||
127 | publicId = token["publicId"] | ||
128 | systemId = token["systemId"] | ||
129 | |||
130 | domimpl = Dom.getDOMImplementation() | ||
131 | doctype = domimpl.createDocumentType(name, publicId, systemId) | ||
132 | self.document.appendChild(NodeBuilder(doctype)) | ||
133 | if Dom == minidom: | ||
134 | doctype.ownerDocument = self.dom | ||
135 | |||
136 | def elementClass(self, name, namespace=None): | ||
137 | if namespace is None and self.defaultNamespace is None: | ||
138 | node = self.dom.createElement(name) | ||
139 | else: | ||
140 | node = self.dom.createElementNS(namespace, name) | ||
141 | |||
142 | return NodeBuilder(node) | ||
143 | |||
144 | def commentClass(self, data): | ||
145 | return NodeBuilder(self.dom.createComment(data)) | ||
146 | |||
147 | def fragmentClass(self): | ||
148 | return NodeBuilder(self.dom.createDocumentFragment()) | ||
149 | |||
150 | def appendChild(self, node): | ||
151 | self.dom.appendChild(node.element) | ||
152 | |||
153 | def testSerializer(self, element): | ||
154 | return testSerializer(element) | ||
155 | |||
156 | def getDocument(self): | ||
157 | return self.dom | ||
158 | |||
159 | def getFragment(self): | ||
160 | return base.TreeBuilder.getFragment(self).element | ||
161 | |||
162 | def insertText(self, data, parent=None): | ||
163 | data = data | ||
164 | if parent != self: | ||
165 | base.TreeBuilder.insertText(self, data, parent) | ||
166 | else: | ||
167 | # HACK: allow text nodes as children of the document node | ||
168 | if hasattr(self.dom, '_child_node_types'): | ||
169 | # pylint:disable=protected-access | ||
170 | if Node.TEXT_NODE not in self.dom._child_node_types: | ||
171 | self.dom._child_node_types = list(self.dom._child_node_types) | ||
172 | self.dom._child_node_types.append(Node.TEXT_NODE) | ||
173 | self.dom.appendChild(self.dom.createTextNode(data)) | ||
174 | |||
175 | implementation = DomImplementation | ||
176 | name = None | ||
177 | |||
178 | def testSerializer(element): | ||
179 | element.normalize() | ||
180 | rv = [] | ||
181 | |||
182 | def serializeElement(element, indent=0): | ||
183 | if element.nodeType == Node.DOCUMENT_TYPE_NODE: | ||
184 | if element.name: | ||
185 | if element.publicId or element.systemId: | ||
186 | publicId = element.publicId or "" | ||
187 | systemId = element.systemId or "" | ||
188 | rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" % | ||
189 | (' ' * indent, element.name, publicId, systemId)) | ||
190 | else: | ||
191 | rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name)) | ||
192 | else: | ||
193 | rv.append("|%s<!DOCTYPE >" % (' ' * indent,)) | ||
194 | elif element.nodeType == Node.DOCUMENT_NODE: | ||
195 | rv.append("#document") | ||
196 | elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE: | ||
197 | rv.append("#document-fragment") | ||
198 | elif element.nodeType == Node.COMMENT_NODE: | ||
199 | rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue)) | ||
200 | elif element.nodeType == Node.TEXT_NODE: | ||
201 | rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue)) | ||
202 | else: | ||
203 | if (hasattr(element, "namespaceURI") and | ||
204 | element.namespaceURI is not None): | ||
205 | name = "%s %s" % (constants.prefixes[element.namespaceURI], | ||
206 | element.nodeName) | ||
207 | else: | ||
208 | name = element.nodeName | ||
209 | rv.append("|%s<%s>" % (' ' * indent, name)) | ||
210 | if element.hasAttributes(): | ||
211 | attributes = [] | ||
212 | for i in range(len(element.attributes)): | ||
213 | attr = element.attributes.item(i) | ||
214 | name = attr.nodeName | ||
215 | value = attr.value | ||
216 | ns = attr.namespaceURI | ||
217 | if ns: | ||
218 | name = "%s %s" % (constants.prefixes[ns], attr.localName) | ||
219 | else: | ||
220 | name = attr.nodeName | ||
221 | attributes.append((name, value)) | ||
222 | |||
223 | for name, value in sorted(attributes): | ||
224 | rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) | ||
225 | indent += 2 | ||
226 | for child in element.childNodes: | ||
227 | serializeElement(child, indent) | ||
228 | serializeElement(element, 0) | ||
229 | |||
230 | return "\n".join(rv) | ||
231 | |||
232 | return locals() | ||
233 | |||
234 | |||
235 | # The actual means to get a module! | ||
236 | getDomModule = moduleFactoryFactory(getDomBuilder) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree.py new file mode 100644 index 0000000..9a4aa95 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree.py | |||
@@ -0,0 +1,340 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | # pylint:disable=protected-access | ||
3 | |||
4 | from pip._vendor.six import text_type | ||
5 | |||
6 | import re | ||
7 | |||
8 | from . import base | ||
9 | from .. import _ihatexml | ||
10 | from .. import constants | ||
11 | from ..constants import namespaces | ||
12 | from .._utils import moduleFactoryFactory | ||
13 | |||
14 | tag_regexp = re.compile("{([^}]*)}(.*)") | ||
15 | |||
16 | |||
17 | def getETreeBuilder(ElementTreeImplementation, fullTree=False): | ||
18 | ElementTree = ElementTreeImplementation | ||
19 | ElementTreeCommentType = ElementTree.Comment("asd").tag | ||
20 | |||
21 | class Element(base.Node): | ||
22 | def __init__(self, name, namespace=None): | ||
23 | self._name = name | ||
24 | self._namespace = namespace | ||
25 | self._element = ElementTree.Element(self._getETreeTag(name, | ||
26 | namespace)) | ||
27 | if namespace is None: | ||
28 | self.nameTuple = namespaces["html"], self._name | ||
29 | else: | ||
30 | self.nameTuple = self._namespace, self._name | ||
31 | self.parent = None | ||
32 | self._childNodes = [] | ||
33 | self._flags = [] | ||
34 | |||
35 | def _getETreeTag(self, name, namespace): | ||
36 | if namespace is None: | ||
37 | etree_tag = name | ||
38 | else: | ||
39 | etree_tag = "{%s}%s" % (namespace, name) | ||
40 | return etree_tag | ||
41 | |||
42 | def _setName(self, name): | ||
43 | self._name = name | ||
44 | self._element.tag = self._getETreeTag(self._name, self._namespace) | ||
45 | |||
46 | def _getName(self): | ||
47 | return self._name | ||
48 | |||
49 | name = property(_getName, _setName) | ||
50 | |||
51 | def _setNamespace(self, namespace): | ||
52 | self._namespace = namespace | ||
53 | self._element.tag = self._getETreeTag(self._name, self._namespace) | ||
54 | |||
55 | def _getNamespace(self): | ||
56 | return self._namespace | ||
57 | |||
58 | namespace = property(_getNamespace, _setNamespace) | ||
59 | |||
60 | def _getAttributes(self): | ||
61 | return self._element.attrib | ||
62 | |||
63 | def _setAttributes(self, attributes): | ||
64 | # Delete existing attributes first | ||
65 | # XXX - there may be a better way to do this... | ||
66 | for key in list(self._element.attrib.keys()): | ||
67 | del self._element.attrib[key] | ||
68 | for key, value in attributes.items(): | ||
69 | if isinstance(key, tuple): | ||
70 | name = "{%s}%s" % (key[2], key[1]) | ||
71 | else: | ||
72 | name = key | ||
73 | self._element.set(name, value) | ||
74 | |||
75 | attributes = property(_getAttributes, _setAttributes) | ||
76 | |||
77 | def _getChildNodes(self): | ||
78 | return self._childNodes | ||
79 | |||
80 | def _setChildNodes(self, value): | ||
81 | del self._element[:] | ||
82 | self._childNodes = [] | ||
83 | for element in value: | ||
84 | self.insertChild(element) | ||
85 | |||
86 | childNodes = property(_getChildNodes, _setChildNodes) | ||
87 | |||
88 | def hasContent(self): | ||
89 | """Return true if the node has children or text""" | ||
90 | return bool(self._element.text or len(self._element)) | ||
91 | |||
92 | def appendChild(self, node): | ||
93 | self._childNodes.append(node) | ||
94 | self._element.append(node._element) | ||
95 | node.parent = self | ||
96 | |||
97 | def insertBefore(self, node, refNode): | ||
98 | index = list(self._element).index(refNode._element) | ||
99 | self._element.insert(index, node._element) | ||
100 | node.parent = self | ||
101 | |||
102 | def removeChild(self, node): | ||
103 | self._childNodes.remove(node) | ||
104 | self._element.remove(node._element) | ||
105 | node.parent = None | ||
106 | |||
107 | def insertText(self, data, insertBefore=None): | ||
108 | if not(len(self._element)): | ||
109 | if not self._element.text: | ||
110 | self._element.text = "" | ||
111 | self._element.text += data | ||
112 | elif insertBefore is None: | ||
113 | # Insert the text as the tail of the last child element | ||
114 | if not self._element[-1].tail: | ||
115 | self._element[-1].tail = "" | ||
116 | self._element[-1].tail += data | ||
117 | else: | ||
118 | # Insert the text before the specified node | ||
119 | children = list(self._element) | ||
120 | index = children.index(insertBefore._element) | ||
121 | if index > 0: | ||
122 | if not self._element[index - 1].tail: | ||
123 | self._element[index - 1].tail = "" | ||
124 | self._element[index - 1].tail += data | ||
125 | else: | ||
126 | if not self._element.text: | ||
127 | self._element.text = "" | ||
128 | self._element.text += data | ||
129 | |||
130 | def cloneNode(self): | ||
131 | element = type(self)(self.name, self.namespace) | ||
132 | for name, value in self.attributes.items(): | ||
133 | element.attributes[name] = value | ||
134 | return element | ||
135 | |||
136 | def reparentChildren(self, newParent): | ||
137 | if newParent.childNodes: | ||
138 | newParent.childNodes[-1]._element.tail += self._element.text | ||
139 | else: | ||
140 | if not newParent._element.text: | ||
141 | newParent._element.text = "" | ||
142 | if self._element.text is not None: | ||
143 | newParent._element.text += self._element.text | ||
144 | self._element.text = "" | ||
145 | base.Node.reparentChildren(self, newParent) | ||
146 | |||
147 | class Comment(Element): | ||
148 | def __init__(self, data): | ||
149 | # Use the superclass constructor to set all properties on the | ||
150 | # wrapper element | ||
151 | self._element = ElementTree.Comment(data) | ||
152 | self.parent = None | ||
153 | self._childNodes = [] | ||
154 | self._flags = [] | ||
155 | |||
156 | def _getData(self): | ||
157 | return self._element.text | ||
158 | |||
159 | def _setData(self, value): | ||
160 | self._element.text = value | ||
161 | |||
162 | data = property(_getData, _setData) | ||
163 | |||
164 | class DocumentType(Element): | ||
165 | def __init__(self, name, publicId, systemId): | ||
166 | Element.__init__(self, "<!DOCTYPE>") | ||
167 | self._element.text = name | ||
168 | self.publicId = publicId | ||
169 | self.systemId = systemId | ||
170 | |||
171 | def _getPublicId(self): | ||
172 | return self._element.get("publicId", "") | ||
173 | |||
174 | def _setPublicId(self, value): | ||
175 | if value is not None: | ||
176 | self._element.set("publicId", value) | ||
177 | |||
178 | publicId = property(_getPublicId, _setPublicId) | ||
179 | |||
180 | def _getSystemId(self): | ||
181 | return self._element.get("systemId", "") | ||
182 | |||
183 | def _setSystemId(self, value): | ||
184 | if value is not None: | ||
185 | self._element.set("systemId", value) | ||
186 | |||
187 | systemId = property(_getSystemId, _setSystemId) | ||
188 | |||
189 | class Document(Element): | ||
190 | def __init__(self): | ||
191 | Element.__init__(self, "DOCUMENT_ROOT") | ||
192 | |||
193 | class DocumentFragment(Element): | ||
194 | def __init__(self): | ||
195 | Element.__init__(self, "DOCUMENT_FRAGMENT") | ||
196 | |||
197 | def testSerializer(element): | ||
198 | rv = [] | ||
199 | |||
200 | def serializeElement(element, indent=0): | ||
201 | if not(hasattr(element, "tag")): | ||
202 | element = element.getroot() | ||
203 | if element.tag == "<!DOCTYPE>": | ||
204 | if element.get("publicId") or element.get("systemId"): | ||
205 | publicId = element.get("publicId") or "" | ||
206 | systemId = element.get("systemId") or "" | ||
207 | rv.append("""<!DOCTYPE %s "%s" "%s">""" % | ||
208 | (element.text, publicId, systemId)) | ||
209 | else: | ||
210 | rv.append("<!DOCTYPE %s>" % (element.text,)) | ||
211 | elif element.tag == "DOCUMENT_ROOT": | ||
212 | rv.append("#document") | ||
213 | if element.text is not None: | ||
214 | rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) | ||
215 | if element.tail is not None: | ||
216 | raise TypeError("Document node cannot have tail") | ||
217 | if hasattr(element, "attrib") and len(element.attrib): | ||
218 | raise TypeError("Document node cannot have attributes") | ||
219 | elif element.tag == ElementTreeCommentType: | ||
220 | rv.append("|%s<!-- %s -->" % (' ' * indent, element.text)) | ||
221 | else: | ||
222 | assert isinstance(element.tag, text_type), \ | ||
223 | "Expected unicode, got %s, %s" % (type(element.tag), element.tag) | ||
224 | nsmatch = tag_regexp.match(element.tag) | ||
225 | |||
226 | if nsmatch is None: | ||
227 | name = element.tag | ||
228 | else: | ||
229 | ns, name = nsmatch.groups() | ||
230 | prefix = constants.prefixes[ns] | ||
231 | name = "%s %s" % (prefix, name) | ||
232 | rv.append("|%s<%s>" % (' ' * indent, name)) | ||
233 | |||
234 | if hasattr(element, "attrib"): | ||
235 | attributes = [] | ||
236 | for name, value in element.attrib.items(): | ||
237 | nsmatch = tag_regexp.match(name) | ||
238 | if nsmatch is not None: | ||
239 | ns, name = nsmatch.groups() | ||
240 | prefix = constants.prefixes[ns] | ||
241 | attr_string = "%s %s" % (prefix, name) | ||
242 | else: | ||
243 | attr_string = name | ||
244 | attributes.append((attr_string, value)) | ||
245 | |||
246 | for name, value in sorted(attributes): | ||
247 | rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) | ||
248 | if element.text: | ||
249 | rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) | ||
250 | indent += 2 | ||
251 | for child in element: | ||
252 | serializeElement(child, indent) | ||
253 | if element.tail: | ||
254 | rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) | ||
255 | serializeElement(element, 0) | ||
256 | |||
257 | return "\n".join(rv) | ||
258 | |||
259 | def tostring(element): # pylint:disable=unused-variable | ||
260 | """Serialize an element and its child nodes to a string""" | ||
261 | rv = [] | ||
262 | filter = _ihatexml.InfosetFilter() | ||
263 | |||
264 | def serializeElement(element): | ||
265 | if isinstance(element, ElementTree.ElementTree): | ||
266 | element = element.getroot() | ||
267 | |||
268 | if element.tag == "<!DOCTYPE>": | ||
269 | if element.get("publicId") or element.get("systemId"): | ||
270 | publicId = element.get("publicId") or "" | ||
271 | systemId = element.get("systemId") or "" | ||
272 | rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" % | ||
273 | (element.text, publicId, systemId)) | ||
274 | else: | ||
275 | rv.append("<!DOCTYPE %s>" % (element.text,)) | ||
276 | elif element.tag == "DOCUMENT_ROOT": | ||
277 | if element.text is not None: | ||
278 | rv.append(element.text) | ||
279 | if element.tail is not None: | ||
280 | raise TypeError("Document node cannot have tail") | ||
281 | if hasattr(element, "attrib") and len(element.attrib): | ||
282 | raise TypeError("Document node cannot have attributes") | ||
283 | |||
284 | for child in element: | ||
285 | serializeElement(child) | ||
286 | |||
287 | elif element.tag == ElementTreeCommentType: | ||
288 | rv.append("<!--%s-->" % (element.text,)) | ||
289 | else: | ||
290 | # This is assumed to be an ordinary element | ||
291 | if not element.attrib: | ||
292 | rv.append("<%s>" % (filter.fromXmlName(element.tag),)) | ||
293 | else: | ||
294 | attr = " ".join(["%s=\"%s\"" % ( | ||
295 | filter.fromXmlName(name), value) | ||
296 | for name, value in element.attrib.items()]) | ||
297 | rv.append("<%s %s>" % (element.tag, attr)) | ||
298 | if element.text: | ||
299 | rv.append(element.text) | ||
300 | |||
301 | for child in element: | ||
302 | serializeElement(child) | ||
303 | |||
304 | rv.append("</%s>" % (element.tag,)) | ||
305 | |||
306 | if element.tail: | ||
307 | rv.append(element.tail) | ||
308 | |||
309 | serializeElement(element) | ||
310 | |||
311 | return "".join(rv) | ||
312 | |||
313 | class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable | ||
314 | documentClass = Document | ||
315 | doctypeClass = DocumentType | ||
316 | elementClass = Element | ||
317 | commentClass = Comment | ||
318 | fragmentClass = DocumentFragment | ||
319 | implementation = ElementTreeImplementation | ||
320 | |||
321 | def testSerializer(self, element): | ||
322 | return testSerializer(element) | ||
323 | |||
324 | def getDocument(self): | ||
325 | if fullTree: | ||
326 | return self.document._element | ||
327 | else: | ||
328 | if self.defaultNamespace is not None: | ||
329 | return self.document._element.find( | ||
330 | "{%s}html" % self.defaultNamespace) | ||
331 | else: | ||
332 | return self.document._element.find("html") | ||
333 | |||
334 | def getFragment(self): | ||
335 | return base.TreeBuilder.getFragment(self)._element | ||
336 | |||
337 | return locals() | ||
338 | |||
339 | |||
340 | getETreeModule = moduleFactoryFactory(getETreeBuilder) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree_lxml.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree_lxml.py new file mode 100644 index 0000000..66a9ba3 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treebuilders/etree_lxml.py | |||
@@ -0,0 +1,366 @@ | |||
1 | """Module for supporting the lxml.etree library. The idea here is to use as much | ||
2 | of the native library as possible, without using fragile hacks like custom element | ||
3 | names that break between releases. The downside of this is that we cannot represent | ||
4 | all possible trees; specifically the following are known to cause problems: | ||
5 | |||
6 | Text or comments as siblings of the root element | ||
7 | Docypes with no name | ||
8 | |||
9 | When any of these things occur, we emit a DataLossWarning | ||
10 | """ | ||
11 | |||
12 | from __future__ import absolute_import, division, unicode_literals | ||
13 | # pylint:disable=protected-access | ||
14 | |||
15 | import warnings | ||
16 | import re | ||
17 | import sys | ||
18 | |||
19 | from . import base | ||
20 | from ..constants import DataLossWarning | ||
21 | from .. import constants | ||
22 | from . import etree as etree_builders | ||
23 | from .. import _ihatexml | ||
24 | |||
25 | import lxml.etree as etree | ||
26 | |||
27 | |||
28 | fullTree = True | ||
29 | tag_regexp = re.compile("{([^}]*)}(.*)") | ||
30 | |||
31 | comment_type = etree.Comment("asd").tag | ||
32 | |||
33 | |||
34 | class DocumentType(object): | ||
35 | def __init__(self, name, publicId, systemId): | ||
36 | self.name = name | ||
37 | self.publicId = publicId | ||
38 | self.systemId = systemId | ||
39 | |||
40 | |||
41 | class Document(object): | ||
42 | def __init__(self): | ||
43 | self._elementTree = None | ||
44 | self._childNodes = [] | ||
45 | |||
46 | def appendChild(self, element): | ||
47 | self._elementTree.getroot().addnext(element._element) | ||
48 | |||
49 | def _getChildNodes(self): | ||
50 | return self._childNodes | ||
51 | |||
52 | childNodes = property(_getChildNodes) | ||
53 | |||
54 | |||
55 | def testSerializer(element): | ||
56 | rv = [] | ||
57 | infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True) | ||
58 | |||
59 | def serializeElement(element, indent=0): | ||
60 | if not hasattr(element, "tag"): | ||
61 | if hasattr(element, "getroot"): | ||
62 | # Full tree case | ||
63 | rv.append("#document") | ||
64 | if element.docinfo.internalDTD: | ||
65 | if not (element.docinfo.public_id or | ||
66 | element.docinfo.system_url): | ||
67 | dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name | ||
68 | else: | ||
69 | dtd_str = """<!DOCTYPE %s "%s" "%s">""" % ( | ||
70 | element.docinfo.root_name, | ||
71 | element.docinfo.public_id, | ||
72 | element.docinfo.system_url) | ||
73 | rv.append("|%s%s" % (' ' * (indent + 2), dtd_str)) | ||
74 | next_element = element.getroot() | ||
75 | while next_element.getprevious() is not None: | ||
76 | next_element = next_element.getprevious() | ||
77 | while next_element is not None: | ||
78 | serializeElement(next_element, indent + 2) | ||
79 | next_element = next_element.getnext() | ||
80 | elif isinstance(element, str) or isinstance(element, bytes): | ||
81 | # Text in a fragment | ||
82 | assert isinstance(element, str) or sys.version_info[0] == 2 | ||
83 | rv.append("|%s\"%s\"" % (' ' * indent, element)) | ||
84 | else: | ||
85 | # Fragment case | ||
86 | rv.append("#document-fragment") | ||
87 | for next_element in element: | ||
88 | serializeElement(next_element, indent + 2) | ||
89 | elif element.tag == comment_type: | ||
90 | rv.append("|%s<!-- %s -->" % (' ' * indent, element.text)) | ||
91 | if hasattr(element, "tail") and element.tail: | ||
92 | rv.append("|%s\"%s\"" % (' ' * indent, element.tail)) | ||
93 | else: | ||
94 | assert isinstance(element, etree._Element) | ||
95 | nsmatch = etree_builders.tag_regexp.match(element.tag) | ||
96 | if nsmatch is not None: | ||
97 | ns = nsmatch.group(1) | ||
98 | tag = nsmatch.group(2) | ||
99 | prefix = constants.prefixes[ns] | ||
100 | rv.append("|%s<%s %s>" % (' ' * indent, prefix, | ||
101 | infosetFilter.fromXmlName(tag))) | ||
102 | else: | ||
103 | rv.append("|%s<%s>" % (' ' * indent, | ||
104 | infosetFilter.fromXmlName(element.tag))) | ||
105 | |||
106 | if hasattr(element, "attrib"): | ||
107 | attributes = [] | ||
108 | for name, value in element.attrib.items(): | ||
109 | nsmatch = tag_regexp.match(name) | ||
110 | if nsmatch is not None: | ||
111 | ns, name = nsmatch.groups() | ||
112 | name = infosetFilter.fromXmlName(name) | ||
113 | prefix = constants.prefixes[ns] | ||
114 | attr_string = "%s %s" % (prefix, name) | ||
115 | else: | ||
116 | attr_string = infosetFilter.fromXmlName(name) | ||
117 | attributes.append((attr_string, value)) | ||
118 | |||
119 | for name, value in sorted(attributes): | ||
120 | rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) | ||
121 | |||
122 | if element.text: | ||
123 | rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) | ||
124 | indent += 2 | ||
125 | for child in element: | ||
126 | serializeElement(child, indent) | ||
127 | if hasattr(element, "tail") and element.tail: | ||
128 | rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) | ||
129 | serializeElement(element, 0) | ||
130 | |||
131 | return "\n".join(rv) | ||
132 | |||
133 | |||
134 | def tostring(element): | ||
135 | """Serialize an element and its child nodes to a string""" | ||
136 | rv = [] | ||
137 | |||
138 | def serializeElement(element): | ||
139 | if not hasattr(element, "tag"): | ||
140 | if element.docinfo.internalDTD: | ||
141 | if element.docinfo.doctype: | ||
142 | dtd_str = element.docinfo.doctype | ||
143 | else: | ||
144 | dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name | ||
145 | rv.append(dtd_str) | ||
146 | serializeElement(element.getroot()) | ||
147 | |||
148 | elif element.tag == comment_type: | ||
149 | rv.append("<!--%s-->" % (element.text,)) | ||
150 | |||
151 | else: | ||
152 | # This is assumed to be an ordinary element | ||
153 | if not element.attrib: | ||
154 | rv.append("<%s>" % (element.tag,)) | ||
155 | else: | ||
156 | attr = " ".join(["%s=\"%s\"" % (name, value) | ||
157 | for name, value in element.attrib.items()]) | ||
158 | rv.append("<%s %s>" % (element.tag, attr)) | ||
159 | if element.text: | ||
160 | rv.append(element.text) | ||
161 | |||
162 | for child in element: | ||
163 | serializeElement(child) | ||
164 | |||
165 | rv.append("</%s>" % (element.tag,)) | ||
166 | |||
167 | if hasattr(element, "tail") and element.tail: | ||
168 | rv.append(element.tail) | ||
169 | |||
170 | serializeElement(element) | ||
171 | |||
172 | return "".join(rv) | ||
173 | |||
174 | |||
175 | class TreeBuilder(base.TreeBuilder): | ||
176 | documentClass = Document | ||
177 | doctypeClass = DocumentType | ||
178 | elementClass = None | ||
179 | commentClass = None | ||
180 | fragmentClass = Document | ||
181 | implementation = etree | ||
182 | |||
183 | def __init__(self, namespaceHTMLElements, fullTree=False): | ||
184 | builder = etree_builders.getETreeModule(etree, fullTree=fullTree) | ||
185 | infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True) | ||
186 | self.namespaceHTMLElements = namespaceHTMLElements | ||
187 | |||
188 | class Attributes(dict): | ||
189 | def __init__(self, element, value=None): | ||
190 | if value is None: | ||
191 | value = {} | ||
192 | self._element = element | ||
193 | dict.__init__(self, value) # pylint:disable=non-parent-init-called | ||
194 | for key, value in self.items(): | ||
195 | if isinstance(key, tuple): | ||
196 | name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) | ||
197 | else: | ||
198 | name = infosetFilter.coerceAttribute(key) | ||
199 | self._element._element.attrib[name] = value | ||
200 | |||
201 | def __setitem__(self, key, value): | ||
202 | dict.__setitem__(self, key, value) | ||
203 | if isinstance(key, tuple): | ||
204 | name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) | ||
205 | else: | ||
206 | name = infosetFilter.coerceAttribute(key) | ||
207 | self._element._element.attrib[name] = value | ||
208 | |||
209 | class Element(builder.Element): | ||
210 | def __init__(self, name, namespace): | ||
211 | name = infosetFilter.coerceElement(name) | ||
212 | builder.Element.__init__(self, name, namespace=namespace) | ||
213 | self._attributes = Attributes(self) | ||
214 | |||
215 | def _setName(self, name): | ||
216 | self._name = infosetFilter.coerceElement(name) | ||
217 | self._element.tag = self._getETreeTag( | ||
218 | self._name, self._namespace) | ||
219 | |||
220 | def _getName(self): | ||
221 | return infosetFilter.fromXmlName(self._name) | ||
222 | |||
223 | name = property(_getName, _setName) | ||
224 | |||
225 | def _getAttributes(self): | ||
226 | return self._attributes | ||
227 | |||
228 | def _setAttributes(self, attributes): | ||
229 | self._attributes = Attributes(self, attributes) | ||
230 | |||
231 | attributes = property(_getAttributes, _setAttributes) | ||
232 | |||
233 | def insertText(self, data, insertBefore=None): | ||
234 | data = infosetFilter.coerceCharacters(data) | ||
235 | builder.Element.insertText(self, data, insertBefore) | ||
236 | |||
237 | def appendChild(self, child): | ||
238 | builder.Element.appendChild(self, child) | ||
239 | |||
240 | class Comment(builder.Comment): | ||
241 | def __init__(self, data): | ||
242 | data = infosetFilter.coerceComment(data) | ||
243 | builder.Comment.__init__(self, data) | ||
244 | |||
245 | def _setData(self, data): | ||
246 | data = infosetFilter.coerceComment(data) | ||
247 | self._element.text = data | ||
248 | |||
249 | def _getData(self): | ||
250 | return self._element.text | ||
251 | |||
252 | data = property(_getData, _setData) | ||
253 | |||
254 | self.elementClass = Element | ||
255 | self.commentClass = Comment | ||
256 | # self.fragmentClass = builder.DocumentFragment | ||
257 | base.TreeBuilder.__init__(self, namespaceHTMLElements) | ||
258 | |||
259 | def reset(self): | ||
260 | base.TreeBuilder.reset(self) | ||
261 | self.insertComment = self.insertCommentInitial | ||
262 | self.initial_comments = [] | ||
263 | self.doctype = None | ||
264 | |||
265 | def testSerializer(self, element): | ||
266 | return testSerializer(element) | ||
267 | |||
268 | def getDocument(self): | ||
269 | if fullTree: | ||
270 | return self.document._elementTree | ||
271 | else: | ||
272 | return self.document._elementTree.getroot() | ||
273 | |||
274 | def getFragment(self): | ||
275 | fragment = [] | ||
276 | element = self.openElements[0]._element | ||
277 | if element.text: | ||
278 | fragment.append(element.text) | ||
279 | fragment.extend(list(element)) | ||
280 | if element.tail: | ||
281 | fragment.append(element.tail) | ||
282 | return fragment | ||
283 | |||
284 | def insertDoctype(self, token): | ||
285 | name = token["name"] | ||
286 | publicId = token["publicId"] | ||
287 | systemId = token["systemId"] | ||
288 | |||
289 | if not name: | ||
290 | warnings.warn("lxml cannot represent empty doctype", DataLossWarning) | ||
291 | self.doctype = None | ||
292 | else: | ||
293 | coercedName = self.infosetFilter.coerceElement(name) | ||
294 | if coercedName != name: | ||
295 | warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning) | ||
296 | |||
297 | doctype = self.doctypeClass(coercedName, publicId, systemId) | ||
298 | self.doctype = doctype | ||
299 | |||
300 | def insertCommentInitial(self, data, parent=None): | ||
301 | assert parent is None or parent is self.document | ||
302 | assert self.document._elementTree is None | ||
303 | self.initial_comments.append(data) | ||
304 | |||
305 | def insertCommentMain(self, data, parent=None): | ||
306 | if (parent == self.document and | ||
307 | self.document._elementTree.getroot()[-1].tag == comment_type): | ||
308 | warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning) | ||
309 | super(TreeBuilder, self).insertComment(data, parent) | ||
310 | |||
311 | def insertRoot(self, token): | ||
312 | # Because of the way libxml2 works, it doesn't seem to be possible to | ||
313 | # alter information like the doctype after the tree has been parsed. | ||
314 | # Therefore we need to use the built-in parser to create our initial | ||
315 | # tree, after which we can add elements like normal | ||
316 | docStr = "" | ||
317 | if self.doctype: | ||
318 | assert self.doctype.name | ||
319 | docStr += "<!DOCTYPE %s" % self.doctype.name | ||
320 | if (self.doctype.publicId is not None or | ||
321 | self.doctype.systemId is not None): | ||
322 | docStr += (' PUBLIC "%s" ' % | ||
323 | (self.infosetFilter.coercePubid(self.doctype.publicId or ""))) | ||
324 | if self.doctype.systemId: | ||
325 | sysid = self.doctype.systemId | ||
326 | if sysid.find("'") >= 0 and sysid.find('"') >= 0: | ||
327 | warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning) | ||
328 | sysid = sysid.replace("'", 'U00027') | ||
329 | if sysid.find("'") >= 0: | ||
330 | docStr += '"%s"' % sysid | ||
331 | else: | ||
332 | docStr += "'%s'" % sysid | ||
333 | else: | ||
334 | docStr += "''" | ||
335 | docStr += ">" | ||
336 | if self.doctype.name != token["name"]: | ||
337 | warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning) | ||
338 | docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>" | ||
339 | root = etree.fromstring(docStr) | ||
340 | |||
341 | # Append the initial comments: | ||
342 | for comment_token in self.initial_comments: | ||
343 | comment = self.commentClass(comment_token["data"]) | ||
344 | root.addprevious(comment._element) | ||
345 | |||
346 | # Create the root document and add the ElementTree to it | ||
347 | self.document = self.documentClass() | ||
348 | self.document._elementTree = root.getroottree() | ||
349 | |||
350 | # Give the root element the right name | ||
351 | name = token["name"] | ||
352 | namespace = token.get("namespace", self.defaultNamespace) | ||
353 | if namespace is None: | ||
354 | etree_tag = name | ||
355 | else: | ||
356 | etree_tag = "{%s}%s" % (namespace, name) | ||
357 | root.tag = etree_tag | ||
358 | |||
359 | # Add the root element to the internal child/open data structures | ||
360 | root_element = self.elementClass(name, namespace) | ||
361 | root_element._element = root | ||
362 | self.document._childNodes.append(root_element) | ||
363 | self.openElements.append(root_element) | ||
364 | |||
365 | # Reset to the default insert comment function | ||
366 | self.insertComment = self.insertCommentMain | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py new file mode 100644 index 0000000..31a173d --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/__init__.py | |||
@@ -0,0 +1,154 @@ | |||
1 | """A collection of modules for iterating through different kinds of | ||
2 | tree, generating tokens identical to those produced by the tokenizer | ||
3 | module. | ||
4 | |||
5 | To create a tree walker for a new type of tree, you need to do | ||
6 | implement a tree walker object (called TreeWalker by convention) that | ||
7 | implements a 'serialize' method taking a tree as sole argument and | ||
8 | returning an iterator generating tokens. | ||
9 | """ | ||
10 | |||
11 | from __future__ import absolute_import, division, unicode_literals | ||
12 | |||
13 | from .. import constants | ||
14 | from .._utils import default_etree | ||
15 | |||
16 | __all__ = ["getTreeWalker", "pprint"] | ||
17 | |||
18 | treeWalkerCache = {} | ||
19 | |||
20 | |||
21 | def getTreeWalker(treeType, implementation=None, **kwargs): | ||
22 | """Get a TreeWalker class for various types of tree with built-in support | ||
23 | |||
24 | :arg str treeType: the name of the tree type required (case-insensitive). | ||
25 | Supported values are: | ||
26 | |||
27 | * "dom": The xml.dom.minidom DOM implementation | ||
28 | * "etree": A generic walker for tree implementations exposing an | ||
29 | elementtree-like interface (known to work with ElementTree, | ||
30 | cElementTree and lxml.etree). | ||
31 | * "lxml": Optimized walker for lxml.etree | ||
32 | * "genshi": a Genshi stream | ||
33 | |||
34 | :arg implementation: A module implementing the tree type e.g. | ||
35 | xml.etree.ElementTree or cElementTree (Currently applies to the "etree" | ||
36 | tree type only). | ||
37 | |||
38 | :arg kwargs: keyword arguments passed to the etree walker--for other | ||
39 | walkers, this has no effect | ||
40 | |||
41 | :returns: a TreeWalker class | ||
42 | |||
43 | """ | ||
44 | |||
45 | treeType = treeType.lower() | ||
46 | if treeType not in treeWalkerCache: | ||
47 | if treeType == "dom": | ||
48 | from . import dom | ||
49 | treeWalkerCache[treeType] = dom.TreeWalker | ||
50 | elif treeType == "genshi": | ||
51 | from . import genshi | ||
52 | treeWalkerCache[treeType] = genshi.TreeWalker | ||
53 | elif treeType == "lxml": | ||
54 | from . import etree_lxml | ||
55 | treeWalkerCache[treeType] = etree_lxml.TreeWalker | ||
56 | elif treeType == "etree": | ||
57 | from . import etree | ||
58 | if implementation is None: | ||
59 | implementation = default_etree | ||
60 | # XXX: NEVER cache here, caching is done in the etree submodule | ||
61 | return etree.getETreeModule(implementation, **kwargs).TreeWalker | ||
62 | return treeWalkerCache.get(treeType) | ||
63 | |||
64 | |||
65 | def concatenateCharacterTokens(tokens): | ||
66 | pendingCharacters = [] | ||
67 | for token in tokens: | ||
68 | type = token["type"] | ||
69 | if type in ("Characters", "SpaceCharacters"): | ||
70 | pendingCharacters.append(token["data"]) | ||
71 | else: | ||
72 | if pendingCharacters: | ||
73 | yield {"type": "Characters", "data": "".join(pendingCharacters)} | ||
74 | pendingCharacters = [] | ||
75 | yield token | ||
76 | if pendingCharacters: | ||
77 | yield {"type": "Characters", "data": "".join(pendingCharacters)} | ||
78 | |||
79 | |||
80 | def pprint(walker): | ||
81 | """Pretty printer for tree walkers | ||
82 | |||
83 | Takes a TreeWalker instance and pretty prints the output of walking the tree. | ||
84 | |||
85 | :arg walker: a TreeWalker instance | ||
86 | |||
87 | """ | ||
88 | output = [] | ||
89 | indent = 0 | ||
90 | for token in concatenateCharacterTokens(walker): | ||
91 | type = token["type"] | ||
92 | if type in ("StartTag", "EmptyTag"): | ||
93 | # tag name | ||
94 | if token["namespace"] and token["namespace"] != constants.namespaces["html"]: | ||
95 | if token["namespace"] in constants.prefixes: | ||
96 | ns = constants.prefixes[token["namespace"]] | ||
97 | else: | ||
98 | ns = token["namespace"] | ||
99 | name = "%s %s" % (ns, token["name"]) | ||
100 | else: | ||
101 | name = token["name"] | ||
102 | output.append("%s<%s>" % (" " * indent, name)) | ||
103 | indent += 2 | ||
104 | # attributes (sorted for consistent ordering) | ||
105 | attrs = token["data"] | ||
106 | for (namespace, localname), value in sorted(attrs.items()): | ||
107 | if namespace: | ||
108 | if namespace in constants.prefixes: | ||
109 | ns = constants.prefixes[namespace] | ||
110 | else: | ||
111 | ns = namespace | ||
112 | name = "%s %s" % (ns, localname) | ||
113 | else: | ||
114 | name = localname | ||
115 | output.append("%s%s=\"%s\"" % (" " * indent, name, value)) | ||
116 | # self-closing | ||
117 | if type == "EmptyTag": | ||
118 | indent -= 2 | ||
119 | |||
120 | elif type == "EndTag": | ||
121 | indent -= 2 | ||
122 | |||
123 | elif type == "Comment": | ||
124 | output.append("%s<!-- %s -->" % (" " * indent, token["data"])) | ||
125 | |||
126 | elif type == "Doctype": | ||
127 | if token["name"]: | ||
128 | if token["publicId"]: | ||
129 | output.append("""%s<!DOCTYPE %s "%s" "%s">""" % | ||
130 | (" " * indent, | ||
131 | token["name"], | ||
132 | token["publicId"], | ||
133 | token["systemId"] if token["systemId"] else "")) | ||
134 | elif token["systemId"]: | ||
135 | output.append("""%s<!DOCTYPE %s "" "%s">""" % | ||
136 | (" " * indent, | ||
137 | token["name"], | ||
138 | token["systemId"])) | ||
139 | else: | ||
140 | output.append("%s<!DOCTYPE %s>" % (" " * indent, | ||
141 | token["name"])) | ||
142 | else: | ||
143 | output.append("%s<!DOCTYPE >" % (" " * indent,)) | ||
144 | |||
145 | elif type == "Characters": | ||
146 | output.append("%s\"%s\"" % (" " * indent, token["data"])) | ||
147 | |||
148 | elif type == "SpaceCharacters": | ||
149 | assert False, "concatenateCharacterTokens should have got rid of all Space tokens" | ||
150 | |||
151 | else: | ||
152 | raise ValueError("Unknown token type, %s" % type) | ||
153 | |||
154 | return "\n".join(output) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/base.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/base.py new file mode 100644 index 0000000..f82984b --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/base.py | |||
@@ -0,0 +1,252 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from xml.dom import Node | ||
4 | from ..constants import namespaces, voidElements, spaceCharacters | ||
5 | |||
6 | __all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN", | ||
7 | "TreeWalker", "NonRecursiveTreeWalker"] | ||
8 | |||
9 | DOCUMENT = Node.DOCUMENT_NODE | ||
10 | DOCTYPE = Node.DOCUMENT_TYPE_NODE | ||
11 | TEXT = Node.TEXT_NODE | ||
12 | ELEMENT = Node.ELEMENT_NODE | ||
13 | COMMENT = Node.COMMENT_NODE | ||
14 | ENTITY = Node.ENTITY_NODE | ||
15 | UNKNOWN = "<#UNKNOWN#>" | ||
16 | |||
17 | spaceCharacters = "".join(spaceCharacters) | ||
18 | |||
19 | |||
20 | class TreeWalker(object): | ||
21 | """Walks a tree yielding tokens | ||
22 | |||
23 | Tokens are dicts that all have a ``type`` field specifying the type of the | ||
24 | token. | ||
25 | |||
26 | """ | ||
27 | def __init__(self, tree): | ||
28 | """Creates a TreeWalker | ||
29 | |||
30 | :arg tree: the tree to walk | ||
31 | |||
32 | """ | ||
33 | self.tree = tree | ||
34 | |||
35 | def __iter__(self): | ||
36 | raise NotImplementedError | ||
37 | |||
38 | def error(self, msg): | ||
39 | """Generates an error token with the given message | ||
40 | |||
41 | :arg msg: the error message | ||
42 | |||
43 | :returns: SerializeError token | ||
44 | |||
45 | """ | ||
46 | return {"type": "SerializeError", "data": msg} | ||
47 | |||
48 | def emptyTag(self, namespace, name, attrs, hasChildren=False): | ||
49 | """Generates an EmptyTag token | ||
50 | |||
51 | :arg namespace: the namespace of the token--can be ``None`` | ||
52 | |||
53 | :arg name: the name of the element | ||
54 | |||
55 | :arg attrs: the attributes of the element as a dict | ||
56 | |||
57 | :arg hasChildren: whether or not to yield a SerializationError because | ||
58 | this tag shouldn't have children | ||
59 | |||
60 | :returns: EmptyTag token | ||
61 | |||
62 | """ | ||
63 | yield {"type": "EmptyTag", "name": name, | ||
64 | "namespace": namespace, | ||
65 | "data": attrs} | ||
66 | if hasChildren: | ||
67 | yield self.error("Void element has children") | ||
68 | |||
69 | def startTag(self, namespace, name, attrs): | ||
70 | """Generates a StartTag token | ||
71 | |||
72 | :arg namespace: the namespace of the token--can be ``None`` | ||
73 | |||
74 | :arg name: the name of the element | ||
75 | |||
76 | :arg attrs: the attributes of the element as a dict | ||
77 | |||
78 | :returns: StartTag token | ||
79 | |||
80 | """ | ||
81 | return {"type": "StartTag", | ||
82 | "name": name, | ||
83 | "namespace": namespace, | ||
84 | "data": attrs} | ||
85 | |||
86 | def endTag(self, namespace, name): | ||
87 | """Generates an EndTag token | ||
88 | |||
89 | :arg namespace: the namespace of the token--can be ``None`` | ||
90 | |||
91 | :arg name: the name of the element | ||
92 | |||
93 | :returns: EndTag token | ||
94 | |||
95 | """ | ||
96 | return {"type": "EndTag", | ||
97 | "name": name, | ||
98 | "namespace": namespace} | ||
99 | |||
100 | def text(self, data): | ||
101 | """Generates SpaceCharacters and Characters tokens | ||
102 | |||
103 | Depending on what's in the data, this generates one or more | ||
104 | ``SpaceCharacters`` and ``Characters`` tokens. | ||
105 | |||
106 | For example: | ||
107 | |||
108 | >>> from html5lib.treewalkers.base import TreeWalker | ||
109 | >>> # Give it an empty tree just so it instantiates | ||
110 | >>> walker = TreeWalker([]) | ||
111 | >>> list(walker.text('')) | ||
112 | [] | ||
113 | >>> list(walker.text(' ')) | ||
114 | [{u'data': ' ', u'type': u'SpaceCharacters'}] | ||
115 | >>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE | ||
116 | [{u'data': ' ', u'type': u'SpaceCharacters'}, | ||
117 | {u'data': u'abc', u'type': u'Characters'}, | ||
118 | {u'data': u' ', u'type': u'SpaceCharacters'}] | ||
119 | |||
120 | :arg data: the text data | ||
121 | |||
122 | :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens | ||
123 | |||
124 | """ | ||
125 | data = data | ||
126 | middle = data.lstrip(spaceCharacters) | ||
127 | left = data[:len(data) - len(middle)] | ||
128 | if left: | ||
129 | yield {"type": "SpaceCharacters", "data": left} | ||
130 | data = middle | ||
131 | middle = data.rstrip(spaceCharacters) | ||
132 | right = data[len(middle):] | ||
133 | if middle: | ||
134 | yield {"type": "Characters", "data": middle} | ||
135 | if right: | ||
136 | yield {"type": "SpaceCharacters", "data": right} | ||
137 | |||
138 | def comment(self, data): | ||
139 | """Generates a Comment token | ||
140 | |||
141 | :arg data: the comment | ||
142 | |||
143 | :returns: Comment token | ||
144 | |||
145 | """ | ||
146 | return {"type": "Comment", "data": data} | ||
147 | |||
148 | def doctype(self, name, publicId=None, systemId=None): | ||
149 | """Generates a Doctype token | ||
150 | |||
151 | :arg name: | ||
152 | |||
153 | :arg publicId: | ||
154 | |||
155 | :arg systemId: | ||
156 | |||
157 | :returns: the Doctype token | ||
158 | |||
159 | """ | ||
160 | return {"type": "Doctype", | ||
161 | "name": name, | ||
162 | "publicId": publicId, | ||
163 | "systemId": systemId} | ||
164 | |||
165 | def entity(self, name): | ||
166 | """Generates an Entity token | ||
167 | |||
168 | :arg name: the entity name | ||
169 | |||
170 | :returns: an Entity token | ||
171 | |||
172 | """ | ||
173 | return {"type": "Entity", "name": name} | ||
174 | |||
175 | def unknown(self, nodeType): | ||
176 | """Handles unknown node types""" | ||
177 | return self.error("Unknown node type: " + nodeType) | ||
178 | |||
179 | |||
180 | class NonRecursiveTreeWalker(TreeWalker): | ||
181 | def getNodeDetails(self, node): | ||
182 | raise NotImplementedError | ||
183 | |||
184 | def getFirstChild(self, node): | ||
185 | raise NotImplementedError | ||
186 | |||
187 | def getNextSibling(self, node): | ||
188 | raise NotImplementedError | ||
189 | |||
190 | def getParentNode(self, node): | ||
191 | raise NotImplementedError | ||
192 | |||
193 | def __iter__(self): | ||
194 | currentNode = self.tree | ||
195 | while currentNode is not None: | ||
196 | details = self.getNodeDetails(currentNode) | ||
197 | type, details = details[0], details[1:] | ||
198 | hasChildren = False | ||
199 | |||
200 | if type == DOCTYPE: | ||
201 | yield self.doctype(*details) | ||
202 | |||
203 | elif type == TEXT: | ||
204 | for token in self.text(*details): | ||
205 | yield token | ||
206 | |||
207 | elif type == ELEMENT: | ||
208 | namespace, name, attributes, hasChildren = details | ||
209 | if (not namespace or namespace == namespaces["html"]) and name in voidElements: | ||
210 | for token in self.emptyTag(namespace, name, attributes, | ||
211 | hasChildren): | ||
212 | yield token | ||
213 | hasChildren = False | ||
214 | else: | ||
215 | yield self.startTag(namespace, name, attributes) | ||
216 | |||
217 | elif type == COMMENT: | ||
218 | yield self.comment(details[0]) | ||
219 | |||
220 | elif type == ENTITY: | ||
221 | yield self.entity(details[0]) | ||
222 | |||
223 | elif type == DOCUMENT: | ||
224 | hasChildren = True | ||
225 | |||
226 | else: | ||
227 | yield self.unknown(details[0]) | ||
228 | |||
229 | if hasChildren: | ||
230 | firstChild = self.getFirstChild(currentNode) | ||
231 | else: | ||
232 | firstChild = None | ||
233 | |||
234 | if firstChild is not None: | ||
235 | currentNode = firstChild | ||
236 | else: | ||
237 | while currentNode is not None: | ||
238 | details = self.getNodeDetails(currentNode) | ||
239 | type, details = details[0], details[1:] | ||
240 | if type == ELEMENT: | ||
241 | namespace, name, attributes, hasChildren = details | ||
242 | if (namespace and namespace != namespaces["html"]) or name not in voidElements: | ||
243 | yield self.endTag(namespace, name) | ||
244 | if self.tree is currentNode: | ||
245 | currentNode = None | ||
246 | break | ||
247 | nextSibling = self.getNextSibling(currentNode) | ||
248 | if nextSibling is not None: | ||
249 | currentNode = nextSibling | ||
250 | break | ||
251 | else: | ||
252 | currentNode = self.getParentNode(currentNode) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/dom.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/dom.py new file mode 100644 index 0000000..b3e2753 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/dom.py | |||
@@ -0,0 +1,43 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from xml.dom import Node | ||
4 | |||
5 | from . import base | ||
6 | |||
7 | |||
8 | class TreeWalker(base.NonRecursiveTreeWalker): | ||
9 | def getNodeDetails(self, node): | ||
10 | if node.nodeType == Node.DOCUMENT_TYPE_NODE: | ||
11 | return base.DOCTYPE, node.name, node.publicId, node.systemId | ||
12 | |||
13 | elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): | ||
14 | return base.TEXT, node.nodeValue | ||
15 | |||
16 | elif node.nodeType == Node.ELEMENT_NODE: | ||
17 | attrs = {} | ||
18 | for attr in list(node.attributes.keys()): | ||
19 | attr = node.getAttributeNode(attr) | ||
20 | if attr.namespaceURI: | ||
21 | attrs[(attr.namespaceURI, attr.localName)] = attr.value | ||
22 | else: | ||
23 | attrs[(None, attr.name)] = attr.value | ||
24 | return (base.ELEMENT, node.namespaceURI, node.nodeName, | ||
25 | attrs, node.hasChildNodes()) | ||
26 | |||
27 | elif node.nodeType == Node.COMMENT_NODE: | ||
28 | return base.COMMENT, node.nodeValue | ||
29 | |||
30 | elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE): | ||
31 | return (base.DOCUMENT,) | ||
32 | |||
33 | else: | ||
34 | return base.UNKNOWN, node.nodeType | ||
35 | |||
36 | def getFirstChild(self, node): | ||
37 | return node.firstChild | ||
38 | |||
39 | def getNextSibling(self, node): | ||
40 | return node.nextSibling | ||
41 | |||
42 | def getParentNode(self, node): | ||
43 | return node.parentNode | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree.py new file mode 100644 index 0000000..1a35add --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree.py | |||
@@ -0,0 +1,130 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from collections import OrderedDict | ||
4 | import re | ||
5 | |||
6 | from pip._vendor.six import string_types | ||
7 | |||
8 | from . import base | ||
9 | from .._utils import moduleFactoryFactory | ||
10 | |||
11 | tag_regexp = re.compile("{([^}]*)}(.*)") | ||
12 | |||
13 | |||
14 | def getETreeBuilder(ElementTreeImplementation): | ||
15 | ElementTree = ElementTreeImplementation | ||
16 | ElementTreeCommentType = ElementTree.Comment("asd").tag | ||
17 | |||
18 | class TreeWalker(base.NonRecursiveTreeWalker): # pylint:disable=unused-variable | ||
19 | """Given the particular ElementTree representation, this implementation, | ||
20 | to avoid using recursion, returns "nodes" as tuples with the following | ||
21 | content: | ||
22 | |||
23 | 1. The current element | ||
24 | |||
25 | 2. The index of the element relative to its parent | ||
26 | |||
27 | 3. A stack of ancestor elements | ||
28 | |||
29 | 4. A flag "text", "tail" or None to indicate if the current node is a | ||
30 | text node; either the text or tail of the current element (1) | ||
31 | """ | ||
32 | def getNodeDetails(self, node): | ||
33 | if isinstance(node, tuple): # It might be the root Element | ||
34 | elt, _, _, flag = node | ||
35 | if flag in ("text", "tail"): | ||
36 | return base.TEXT, getattr(elt, flag) | ||
37 | else: | ||
38 | node = elt | ||
39 | |||
40 | if not(hasattr(node, "tag")): | ||
41 | node = node.getroot() | ||
42 | |||
43 | if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"): | ||
44 | return (base.DOCUMENT,) | ||
45 | |||
46 | elif node.tag == "<!DOCTYPE>": | ||
47 | return (base.DOCTYPE, node.text, | ||
48 | node.get("publicId"), node.get("systemId")) | ||
49 | |||
50 | elif node.tag == ElementTreeCommentType: | ||
51 | return base.COMMENT, node.text | ||
52 | |||
53 | else: | ||
54 | assert isinstance(node.tag, string_types), type(node.tag) | ||
55 | # This is assumed to be an ordinary element | ||
56 | match = tag_regexp.match(node.tag) | ||
57 | if match: | ||
58 | namespace, tag = match.groups() | ||
59 | else: | ||
60 | namespace = None | ||
61 | tag = node.tag | ||
62 | attrs = OrderedDict() | ||
63 | for name, value in list(node.attrib.items()): | ||
64 | match = tag_regexp.match(name) | ||
65 | if match: | ||
66 | attrs[(match.group(1), match.group(2))] = value | ||
67 | else: | ||
68 | attrs[(None, name)] = value | ||
69 | return (base.ELEMENT, namespace, tag, | ||
70 | attrs, len(node) or node.text) | ||
71 | |||
72 | def getFirstChild(self, node): | ||
73 | if isinstance(node, tuple): | ||
74 | element, key, parents, flag = node | ||
75 | else: | ||
76 | element, key, parents, flag = node, None, [], None | ||
77 | |||
78 | if flag in ("text", "tail"): | ||
79 | return None | ||
80 | else: | ||
81 | if element.text: | ||
82 | return element, key, parents, "text" | ||
83 | elif len(element): | ||
84 | parents.append(element) | ||
85 | return element[0], 0, parents, None | ||
86 | else: | ||
87 | return None | ||
88 | |||
89 | def getNextSibling(self, node): | ||
90 | if isinstance(node, tuple): | ||
91 | element, key, parents, flag = node | ||
92 | else: | ||
93 | return None | ||
94 | |||
95 | if flag == "text": | ||
96 | if len(element): | ||
97 | parents.append(element) | ||
98 | return element[0], 0, parents, None | ||
99 | else: | ||
100 | return None | ||
101 | else: | ||
102 | if element.tail and flag != "tail": | ||
103 | return element, key, parents, "tail" | ||
104 | elif key < len(parents[-1]) - 1: | ||
105 | return parents[-1][key + 1], key + 1, parents, None | ||
106 | else: | ||
107 | return None | ||
108 | |||
109 | def getParentNode(self, node): | ||
110 | if isinstance(node, tuple): | ||
111 | element, key, parents, flag = node | ||
112 | else: | ||
113 | return None | ||
114 | |||
115 | if flag == "text": | ||
116 | if not parents: | ||
117 | return element | ||
118 | else: | ||
119 | return element, key, parents, None | ||
120 | else: | ||
121 | parent = parents.pop() | ||
122 | if not parents: | ||
123 | return parent | ||
124 | else: | ||
125 | assert list(parents[-1]).count(parent) == 1 | ||
126 | return parent, list(parents[-1]).index(parent), parents, None | ||
127 | |||
128 | return locals() | ||
129 | |||
130 | getETreeModule = moduleFactoryFactory(getETreeBuilder) | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree_lxml.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree_lxml.py new file mode 100644 index 0000000..f6f395a --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/etree_lxml.py | |||
@@ -0,0 +1,213 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | from pip._vendor.six import text_type | ||
3 | |||
4 | from lxml import etree | ||
5 | from ..treebuilders.etree import tag_regexp | ||
6 | |||
7 | from . import base | ||
8 | |||
9 | from .. import _ihatexml | ||
10 | |||
11 | |||
12 | def ensure_str(s): | ||
13 | if s is None: | ||
14 | return None | ||
15 | elif isinstance(s, text_type): | ||
16 | return s | ||
17 | else: | ||
18 | return s.decode("ascii", "strict") | ||
19 | |||
20 | |||
21 | class Root(object): | ||
22 | def __init__(self, et): | ||
23 | self.elementtree = et | ||
24 | self.children = [] | ||
25 | |||
26 | try: | ||
27 | if et.docinfo.internalDTD: | ||
28 | self.children.append(Doctype(self, | ||
29 | ensure_str(et.docinfo.root_name), | ||
30 | ensure_str(et.docinfo.public_id), | ||
31 | ensure_str(et.docinfo.system_url))) | ||
32 | except AttributeError: | ||
33 | pass | ||
34 | |||
35 | try: | ||
36 | node = et.getroot() | ||
37 | except AttributeError: | ||
38 | node = et | ||
39 | |||
40 | while node.getprevious() is not None: | ||
41 | node = node.getprevious() | ||
42 | while node is not None: | ||
43 | self.children.append(node) | ||
44 | node = node.getnext() | ||
45 | |||
46 | self.text = None | ||
47 | self.tail = None | ||
48 | |||
49 | def __getitem__(self, key): | ||
50 | return self.children[key] | ||
51 | |||
52 | def getnext(self): | ||
53 | return None | ||
54 | |||
55 | def __len__(self): | ||
56 | return 1 | ||
57 | |||
58 | |||
59 | class Doctype(object): | ||
60 | def __init__(self, root_node, name, public_id, system_id): | ||
61 | self.root_node = root_node | ||
62 | self.name = name | ||
63 | self.public_id = public_id | ||
64 | self.system_id = system_id | ||
65 | |||
66 | self.text = None | ||
67 | self.tail = None | ||
68 | |||
69 | def getnext(self): | ||
70 | return self.root_node.children[1] | ||
71 | |||
72 | |||
73 | class FragmentRoot(Root): | ||
74 | def __init__(self, children): | ||
75 | self.children = [FragmentWrapper(self, child) for child in children] | ||
76 | self.text = self.tail = None | ||
77 | |||
78 | def getnext(self): | ||
79 | return None | ||
80 | |||
81 | |||
82 | class FragmentWrapper(object): | ||
83 | def __init__(self, fragment_root, obj): | ||
84 | self.root_node = fragment_root | ||
85 | self.obj = obj | ||
86 | if hasattr(self.obj, 'text'): | ||
87 | self.text = ensure_str(self.obj.text) | ||
88 | else: | ||
89 | self.text = None | ||
90 | if hasattr(self.obj, 'tail'): | ||
91 | self.tail = ensure_str(self.obj.tail) | ||
92 | else: | ||
93 | self.tail = None | ||
94 | |||
95 | def __getattr__(self, name): | ||
96 | return getattr(self.obj, name) | ||
97 | |||
98 | def getnext(self): | ||
99 | siblings = self.root_node.children | ||
100 | idx = siblings.index(self) | ||
101 | if idx < len(siblings) - 1: | ||
102 | return siblings[idx + 1] | ||
103 | else: | ||
104 | return None | ||
105 | |||
106 | def __getitem__(self, key): | ||
107 | return self.obj[key] | ||
108 | |||
109 | def __bool__(self): | ||
110 | return bool(self.obj) | ||
111 | |||
112 | def getparent(self): | ||
113 | return None | ||
114 | |||
115 | def __str__(self): | ||
116 | return str(self.obj) | ||
117 | |||
118 | def __unicode__(self): | ||
119 | return str(self.obj) | ||
120 | |||
121 | def __len__(self): | ||
122 | return len(self.obj) | ||
123 | |||
124 | |||
125 | class TreeWalker(base.NonRecursiveTreeWalker): | ||
126 | def __init__(self, tree): | ||
127 | # pylint:disable=redefined-variable-type | ||
128 | if isinstance(tree, list): | ||
129 | self.fragmentChildren = set(tree) | ||
130 | tree = FragmentRoot(tree) | ||
131 | else: | ||
132 | self.fragmentChildren = set() | ||
133 | tree = Root(tree) | ||
134 | base.NonRecursiveTreeWalker.__init__(self, tree) | ||
135 | self.filter = _ihatexml.InfosetFilter() | ||
136 | |||
137 | def getNodeDetails(self, node): | ||
138 | if isinstance(node, tuple): # Text node | ||
139 | node, key = node | ||
140 | assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key | ||
141 | return base.TEXT, ensure_str(getattr(node, key)) | ||
142 | |||
143 | elif isinstance(node, Root): | ||
144 | return (base.DOCUMENT,) | ||
145 | |||
146 | elif isinstance(node, Doctype): | ||
147 | return base.DOCTYPE, node.name, node.public_id, node.system_id | ||
148 | |||
149 | elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): | ||
150 | return base.TEXT, ensure_str(node.obj) | ||
151 | |||
152 | elif node.tag == etree.Comment: | ||
153 | return base.COMMENT, ensure_str(node.text) | ||
154 | |||
155 | elif node.tag == etree.Entity: | ||
156 | return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; | ||
157 | |||
158 | else: | ||
159 | # This is assumed to be an ordinary element | ||
160 | match = tag_regexp.match(ensure_str(node.tag)) | ||
161 | if match: | ||
162 | namespace, tag = match.groups() | ||
163 | else: | ||
164 | namespace = None | ||
165 | tag = ensure_str(node.tag) | ||
166 | attrs = {} | ||
167 | for name, value in list(node.attrib.items()): | ||
168 | name = ensure_str(name) | ||
169 | value = ensure_str(value) | ||
170 | match = tag_regexp.match(name) | ||
171 | if match: | ||
172 | attrs[(match.group(1), match.group(2))] = value | ||
173 | else: | ||
174 | attrs[(None, name)] = value | ||
175 | return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), | ||
176 | attrs, len(node) > 0 or node.text) | ||
177 | |||
178 | def getFirstChild(self, node): | ||
179 | assert not isinstance(node, tuple), "Text nodes have no children" | ||
180 | |||
181 | assert len(node) or node.text, "Node has no children" | ||
182 | if node.text: | ||
183 | return (node, "text") | ||
184 | else: | ||
185 | return node[0] | ||
186 | |||
187 | def getNextSibling(self, node): | ||
188 | if isinstance(node, tuple): # Text node | ||
189 | node, key = node | ||
190 | assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key | ||
191 | if key == "text": | ||
192 | # XXX: we cannot use a "bool(node) and node[0] or None" construct here | ||
193 | # because node[0] might evaluate to False if it has no child element | ||
194 | if len(node): | ||
195 | return node[0] | ||
196 | else: | ||
197 | return None | ||
198 | else: # tail | ||
199 | return node.getnext() | ||
200 | |||
201 | return (node, "tail") if node.tail else node.getnext() | ||
202 | |||
203 | def getParentNode(self, node): | ||
204 | if isinstance(node, tuple): # Text node | ||
205 | node, key = node | ||
206 | assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key | ||
207 | if key == "text": | ||
208 | return node | ||
209 | # else: fallback to "normal" processing | ||
210 | elif node in self.fragmentChildren: | ||
211 | return None | ||
212 | |||
213 | return node.getparent() | ||
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/genshi.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/genshi.py new file mode 100644 index 0000000..42cd559 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/treewalkers/genshi.py | |||
@@ -0,0 +1,69 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from genshi.core import QName | ||
4 | from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT | ||
5 | from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT | ||
6 | |||
7 | from . import base | ||
8 | |||
9 | from ..constants import voidElements, namespaces | ||
10 | |||
11 | |||
12 | class TreeWalker(base.TreeWalker): | ||
13 | def __iter__(self): | ||
14 | # Buffer the events so we can pass in the following one | ||
15 | previous = None | ||
16 | for event in self.tree: | ||
17 | if previous is not None: | ||
18 | for token in self.tokens(previous, event): | ||
19 | yield token | ||
20 | previous = event | ||
21 | |||
22 | # Don't forget the final event! | ||
23 | if previous is not None: | ||
24 | for token in self.tokens(previous, None): | ||
25 | yield token | ||
26 | |||
27 | def tokens(self, event, next): | ||
28 | kind, data, _ = event | ||
29 | if kind == START: | ||
30 | tag, attribs = data | ||
31 | name = tag.localname | ||
32 | namespace = tag.namespace | ||
33 | converted_attribs = {} | ||
34 | for k, v in attribs: | ||
35 | if isinstance(k, QName): | ||
36 | converted_attribs[(k.namespace, k.localname)] = v | ||
37 | else: | ||
38 | converted_attribs[(None, k)] = v | ||
39 | |||
40 | if namespace == namespaces["html"] and name in voidElements: | ||
41 | for token in self.emptyTag(namespace, name, converted_attribs, | ||
42 | not next or next[0] != END or | ||
43 | next[1] != tag): | ||
44 | yield token | ||
45 | else: | ||
46 | yield self.startTag(namespace, name, converted_attribs) | ||
47 | |||
48 | elif kind == END: | ||
49 | name = data.localname | ||
50 | namespace = data.namespace | ||
51 | if namespace != namespaces["html"] or name not in voidElements: | ||
52 | yield self.endTag(namespace, name) | ||
53 | |||
54 | elif kind == COMMENT: | ||
55 | yield self.comment(data) | ||
56 | |||
57 | elif kind == TEXT: | ||
58 | for token in self.text(data): | ||
59 | yield token | ||
60 | |||
61 | elif kind == DOCTYPE: | ||
62 | yield self.doctype(*data) | ||
63 | |||
64 | elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, | ||
65 | START_CDATA, END_CDATA, PI): | ||
66 | pass | ||
67 | |||
68 | else: | ||
69 | yield self.unknown(kind) | ||