diff options
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py')
-rw-r--r-- | venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py | 207 |
1 files changed, 207 insertions, 0 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py new file mode 100644 index 0000000..c8d5e54 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py | |||
@@ -0,0 +1,207 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | |||
3 | from . import base | ||
4 | |||
5 | |||
6 | class Filter(base.Filter): | ||
7 | """Removes optional tags from the token stream""" | ||
8 | def slider(self): | ||
9 | previous1 = previous2 = None | ||
10 | for token in self.source: | ||
11 | if previous1 is not None: | ||
12 | yield previous2, previous1, token | ||
13 | previous2 = previous1 | ||
14 | previous1 = token | ||
15 | if previous1 is not None: | ||
16 | yield previous2, previous1, None | ||
17 | |||
18 | def __iter__(self): | ||
19 | for previous, token, next in self.slider(): | ||
20 | type = token["type"] | ||
21 | if type == "StartTag": | ||
22 | if (token["data"] or | ||
23 | not self.is_optional_start(token["name"], previous, next)): | ||
24 | yield token | ||
25 | elif type == "EndTag": | ||
26 | if not self.is_optional_end(token["name"], next): | ||
27 | yield token | ||
28 | else: | ||
29 | yield token | ||
30 | |||
31 | def is_optional_start(self, tagname, previous, next): | ||
32 | type = next and next["type"] or None | ||
33 | if tagname in 'html': | ||
34 | # An html element's start tag may be omitted if the first thing | ||
35 | # inside the html element is not a space character or a comment. | ||
36 | return type not in ("Comment", "SpaceCharacters") | ||
37 | elif tagname == 'head': | ||
38 | # A head element's start tag may be omitted if the first thing | ||
39 | # inside the head element is an element. | ||
40 | # XXX: we also omit the start tag if the head element is empty | ||
41 | if type in ("StartTag", "EmptyTag"): | ||
42 | return True | ||
43 | elif type == "EndTag": | ||
44 | return next["name"] == "head" | ||
45 | elif tagname == 'body': | ||
46 | # A body element's start tag may be omitted if the first thing | ||
47 | # inside the body element is not a space character or a comment, | ||
48 | # except if the first thing inside the body element is a script | ||
49 | # or style element and the node immediately preceding the body | ||
50 | # element is a head element whose end tag has been omitted. | ||
51 | if type in ("Comment", "SpaceCharacters"): | ||
52 | return False | ||
53 | elif type == "StartTag": | ||
54 | # XXX: we do not look at the preceding event, so we never omit | ||
55 | # the body element's start tag if it's followed by a script or | ||
56 | # a style element. | ||
57 | return next["name"] not in ('script', 'style') | ||
58 | else: | ||
59 | return True | ||
60 | elif tagname == 'colgroup': | ||
61 | # A colgroup element's start tag may be omitted if the first thing | ||
62 | # inside the colgroup element is a col element, and if the element | ||
63 | # is not immediately preceded by another colgroup element whose | ||
64 | # end tag has been omitted. | ||
65 | if type in ("StartTag", "EmptyTag"): | ||
66 | # XXX: we do not look at the preceding event, so instead we never | ||
67 | # omit the colgroup element's end tag when it is immediately | ||
68 | # followed by another colgroup element. See is_optional_end. | ||
69 | return next["name"] == "col" | ||
70 | else: | ||
71 | return False | ||
72 | elif tagname == 'tbody': | ||
73 | # A tbody element's start tag may be omitted if the first thing | ||
74 | # inside the tbody element is a tr element, and if the element is | ||
75 | # not immediately preceded by a tbody, thead, or tfoot element | ||
76 | # whose end tag has been omitted. | ||
77 | if type == "StartTag": | ||
78 | # omit the thead and tfoot elements' end tag when they are | ||
79 | # immediately followed by a tbody element. See is_optional_end. | ||
80 | if previous and previous['type'] == 'EndTag' and \ | ||
81 | previous['name'] in ('tbody', 'thead', 'tfoot'): | ||
82 | return False | ||
83 | return next["name"] == 'tr' | ||
84 | else: | ||
85 | return False | ||
86 | return False | ||
87 | |||
88 | def is_optional_end(self, tagname, next): | ||
89 | type = next and next["type"] or None | ||
90 | if tagname in ('html', 'head', 'body'): | ||
91 | # An html element's end tag may be omitted if the html element | ||
92 | # is not immediately followed by a space character or a comment. | ||
93 | return type not in ("Comment", "SpaceCharacters") | ||
94 | elif tagname in ('li', 'optgroup', 'tr'): | ||
95 | # A li element's end tag may be omitted if the li element is | ||
96 | # immediately followed by another li element or if there is | ||
97 | # no more content in the parent element. | ||
98 | # An optgroup element's end tag may be omitted if the optgroup | ||
99 | # element is immediately followed by another optgroup element, | ||
100 | # or if there is no more content in the parent element. | ||
101 | # A tr element's end tag may be omitted if the tr element is | ||
102 | # immediately followed by another tr element, or if there is | ||
103 | # no more content in the parent element. | ||
104 | if type == "StartTag": | ||
105 | return next["name"] == tagname | ||
106 | else: | ||
107 | return type == "EndTag" or type is None | ||
108 | elif tagname in ('dt', 'dd'): | ||
109 | # A dt element's end tag may be omitted if the dt element is | ||
110 | # immediately followed by another dt element or a dd element. | ||
111 | # A dd element's end tag may be omitted if the dd element is | ||
112 | # immediately followed by another dd element or a dt element, | ||
113 | # or if there is no more content in the parent element. | ||
114 | if type == "StartTag": | ||
115 | return next["name"] in ('dt', 'dd') | ||
116 | elif tagname == 'dd': | ||
117 | return type == "EndTag" or type is None | ||
118 | else: | ||
119 | return False | ||
120 | elif tagname == 'p': | ||
121 | # A p element's end tag may be omitted if the p element is | ||
122 | # immediately followed by an address, article, aside, | ||
123 | # blockquote, datagrid, dialog, dir, div, dl, fieldset, | ||
124 | # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu, | ||
125 | # nav, ol, p, pre, section, table, or ul, element, or if | ||
126 | # there is no more content in the parent element. | ||
127 | if type in ("StartTag", "EmptyTag"): | ||
128 | return next["name"] in ('address', 'article', 'aside', | ||
129 | 'blockquote', 'datagrid', 'dialog', | ||
130 | 'dir', 'div', 'dl', 'fieldset', 'footer', | ||
131 | 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', | ||
132 | 'header', 'hr', 'menu', 'nav', 'ol', | ||
133 | 'p', 'pre', 'section', 'table', 'ul') | ||
134 | else: | ||
135 | return type == "EndTag" or type is None | ||
136 | elif tagname == 'option': | ||
137 | # An option element's end tag may be omitted if the option | ||
138 | # element is immediately followed by another option element, | ||
139 | # or if it is immediately followed by an <code>optgroup</code> | ||
140 | # element, or if there is no more content in the parent | ||
141 | # element. | ||
142 | if type == "StartTag": | ||
143 | return next["name"] in ('option', 'optgroup') | ||
144 | else: | ||
145 | return type == "EndTag" or type is None | ||
146 | elif tagname in ('rt', 'rp'): | ||
147 | # An rt element's end tag may be omitted if the rt element is | ||
148 | # immediately followed by an rt or rp element, or if there is | ||
149 | # no more content in the parent element. | ||
150 | # An rp element's end tag may be omitted if the rp element is | ||
151 | # immediately followed by an rt or rp element, or if there is | ||
152 | # no more content in the parent element. | ||
153 | if type == "StartTag": | ||
154 | return next["name"] in ('rt', 'rp') | ||
155 | else: | ||
156 | return type == "EndTag" or type is None | ||
157 | elif tagname == 'colgroup': | ||
158 | # A colgroup element's end tag may be omitted if the colgroup | ||
159 | # element is not immediately followed by a space character or | ||
160 | # a comment. | ||
161 | if type in ("Comment", "SpaceCharacters"): | ||
162 | return False | ||
163 | elif type == "StartTag": | ||
164 | # XXX: we also look for an immediately following colgroup | ||
165 | # element. See is_optional_start. | ||
166 | return next["name"] != 'colgroup' | ||
167 | else: | ||
168 | return True | ||
169 | elif tagname in ('thead', 'tbody'): | ||
170 | # A thead element's end tag may be omitted if the thead element | ||
171 | # is immediately followed by a tbody or tfoot element. | ||
172 | # A tbody element's end tag may be omitted if the tbody element | ||
173 | # is immediately followed by a tbody or tfoot element, or if | ||
174 | # there is no more content in the parent element. | ||
175 | # A tfoot element's end tag may be omitted if the tfoot element | ||
176 | # is immediately followed by a tbody element, or if there is no | ||
177 | # more content in the parent element. | ||
178 | # XXX: we never omit the end tag when the following element is | ||
179 | # a tbody. See is_optional_start. | ||
180 | if type == "StartTag": | ||
181 | return next["name"] in ['tbody', 'tfoot'] | ||
182 | elif tagname == 'tbody': | ||
183 | return type == "EndTag" or type is None | ||
184 | else: | ||
185 | return False | ||
186 | elif tagname == 'tfoot': | ||
187 | # A tfoot element's end tag may be omitted if the tfoot element | ||
188 | # is immediately followed by a tbody element, or if there is no | ||
189 | # more content in the parent element. | ||
190 | # XXX: we never omit the end tag when the following element is | ||
191 | # a tbody. See is_optional_start. | ||
192 | if type == "StartTag": | ||
193 | return next["name"] == 'tbody' | ||
194 | else: | ||
195 | return type == "EndTag" or type is None | ||
196 | elif tagname in ('td', 'th'): | ||
197 | # A td element's end tag may be omitted if the td element is | ||
198 | # immediately followed by a td or th element, or if there is | ||
199 | # no more content in the parent element. | ||
200 | # A th element's end tag may be omitted if the th element is | ||
201 | # immediately followed by a td or th element, or if there is | ||
202 | # no more content in the parent element. | ||
203 | if type == "StartTag": | ||
204 | return next["name"] in ('td', 'th') | ||
205 | else: | ||
206 | return type == "EndTag" or type is None | ||
207 | return False | ||