summaryrefslogtreecommitdiff
path: root/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters
diff options
context:
space:
mode:
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters')
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/__init__.py0
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/alphabeticalattributes.py29
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/base.py12
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/inject_meta_charset.py73
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/lint.py93
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py207
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py896
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/whitespace.py38
8 files changed, 1348 insertions, 0 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/__init__.py
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/alphabeticalattributes.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/alphabeticalattributes.py
new file mode 100644
index 0000000..d9e234a
--- /dev/null
+++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/alphabeticalattributes.py
@@ -0,0 +1,29 @@
1from __future__ import absolute_import, division, unicode_literals
2
3from . import base
4
5from collections import OrderedDict
6
7
8def _attr_key(attr):
9 """Return an appropriate key for an attribute for sorting
10
11 Attributes have a namespace that can be either ``None`` or a string. We
12 can't compare the two because they're different types, so we convert
13 ``None`` to an empty string first.
14
15 """
16 return (attr[0][0] or ''), attr[0][1]
17
18
19class Filter(base.Filter):
20 """Alphabetizes attributes for elements"""
21 def __iter__(self):
22 for token in base.Filter.__iter__(self):
23 if token["type"] in ("StartTag", "EmptyTag"):
24 attrs = OrderedDict()
25 for name, value in sorted(token["data"].items(),
26 key=_attr_key):
27 attrs[name] = value
28 token["data"] = attrs
29 yield token
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/base.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/base.py
new file mode 100644
index 0000000..f5aa523
--- /dev/null
+++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/base.py
@@ -0,0 +1,12 @@
1from __future__ import absolute_import, division, unicode_literals
2
3
4class Filter(object):
5 def __init__(self, source):
6 self.source = source
7
8 def __iter__(self):
9 return iter(self.source)
10
11 def __getattr__(self, name):
12 return getattr(self.source, name)
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/inject_meta_charset.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/inject_meta_charset.py
new file mode 100644
index 0000000..2f8ec4f
--- /dev/null
+++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/inject_meta_charset.py
@@ -0,0 +1,73 @@
1from __future__ import absolute_import, division, unicode_literals
2
3from . import base
4
5
6class Filter(base.Filter):
7 """Injects ``<meta charset=ENCODING>`` tag into head of document"""
8 def __init__(self, source, encoding):
9 """Creates a Filter
10
11 :arg source: the source token stream
12
13 :arg encoding: the encoding to set
14
15 """
16 base.Filter.__init__(self, source)
17 self.encoding = encoding
18
19 def __iter__(self):
20 state = "pre_head"
21 meta_found = (self.encoding is None)
22 pending = []
23
24 for token in base.Filter.__iter__(self):
25 type = token["type"]
26 if type == "StartTag":
27 if token["name"].lower() == "head":
28 state = "in_head"
29
30 elif type == "EmptyTag":
31 if token["name"].lower() == "meta":
32 # replace charset with actual encoding
33 has_http_equiv_content_type = False
34 for (namespace, name), value in token["data"].items():
35 if namespace is not None:
36 continue
37 elif name.lower() == 'charset':
38 token["data"][(namespace, name)] = self.encoding
39 meta_found = True
40 break
41 elif name == 'http-equiv' and value.lower() == 'content-type':
42 has_http_equiv_content_type = True
43 else:
44 if has_http_equiv_content_type and (None, "content") in token["data"]:
45 token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding
46 meta_found = True
47
48 elif token["name"].lower() == "head" and not meta_found:
49 # insert meta into empty head
50 yield {"type": "StartTag", "name": "head",
51 "data": token["data"]}
52 yield {"type": "EmptyTag", "name": "meta",
53 "data": {(None, "charset"): self.encoding}}
54 yield {"type": "EndTag", "name": "head"}
55 meta_found = True
56 continue
57
58 elif type == "EndTag":
59 if token["name"].lower() == "head" and pending:
60 # insert meta into head (if necessary) and flush pending queue
61 yield pending.pop(0)
62 if not meta_found:
63 yield {"type": "EmptyTag", "name": "meta",
64 "data": {(None, "charset"): self.encoding}}
65 while pending:
66 yield pending.pop(0)
67 meta_found = True
68 state = "post_head"
69
70 if state == "in_head":
71 pending.append(token)
72 else:
73 yield token
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/lint.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/lint.py
new file mode 100644
index 0000000..b5bbd97
--- /dev/null
+++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/lint.py
@@ -0,0 +1,93 @@
1from __future__ import absolute_import, division, unicode_literals
2
3from pip._vendor.six import text_type
4
5from . import base
6from ..constants import namespaces, voidElements
7
8from ..constants import spaceCharacters
9spaceCharacters = "".join(spaceCharacters)
10
11
12class Filter(base.Filter):
13 """Lints the token stream for errors
14
15 If it finds any errors, it'll raise an ``AssertionError``.
16
17 """
18 def __init__(self, source, require_matching_tags=True):
19 """Creates a Filter
20
21 :arg source: the source token stream
22
23 :arg require_matching_tags: whether or not to require matching tags
24
25 """
26 super(Filter, self).__init__(source)
27 self.require_matching_tags = require_matching_tags
28
29 def __iter__(self):
30 open_elements = []
31 for token in base.Filter.__iter__(self):
32 type = token["type"]
33 if type in ("StartTag", "EmptyTag"):
34 namespace = token["namespace"]
35 name = token["name"]
36 assert namespace is None or isinstance(namespace, text_type)
37 assert namespace != ""
38 assert isinstance(name, text_type)
39 assert name != ""
40 assert isinstance(token["data"], dict)
41 if (not namespace or namespace == namespaces["html"]) and name in voidElements:
42 assert type == "EmptyTag"
43 else:
44 assert type == "StartTag"
45 if type == "StartTag" and self.require_matching_tags:
46 open_elements.append((namespace, name))
47 for (namespace, name), value in token["data"].items():
48 assert namespace is None or isinstance(namespace, text_type)
49 assert namespace != ""
50 assert isinstance(name, text_type)
51 assert name != ""
52 assert isinstance(value, text_type)
53
54 elif type == "EndTag":
55 namespace = token["namespace"]
56 name = token["name"]
57 assert namespace is None or isinstance(namespace, text_type)
58 assert namespace != ""
59 assert isinstance(name, text_type)
60 assert name != ""
61 if (not namespace or namespace == namespaces["html"]) and name in voidElements:
62 assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
63 elif self.require_matching_tags:
64 start = open_elements.pop()
65 assert start == (namespace, name)
66
67 elif type == "Comment":
68 data = token["data"]
69 assert isinstance(data, text_type)
70
71 elif type in ("Characters", "SpaceCharacters"):
72 data = token["data"]
73 assert isinstance(data, text_type)
74 assert data != ""
75 if type == "SpaceCharacters":
76 assert data.strip(spaceCharacters) == ""
77
78 elif type == "Doctype":
79 name = token["name"]
80 assert name is None or isinstance(name, text_type)
81 assert token["publicId"] is None or isinstance(name, text_type)
82 assert token["systemId"] is None or isinstance(name, text_type)
83
84 elif type == "Entity":
85 assert isinstance(token["name"], text_type)
86
87 elif type == "SerializerError":
88 assert isinstance(token["data"], text_type)
89
90 else:
91 assert False, "Unknown token type: %(type)s" % {"type": type}
92
93 yield token
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py
new file mode 100644
index 0000000..c8d5e54
--- /dev/null
+++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/optionaltags.py
@@ -0,0 +1,207 @@
1from __future__ import absolute_import, division, unicode_literals
2
3from . import base
4
5
6class Filter(base.Filter):
7 """Removes optional tags from the token stream"""
8 def slider(self):
9 previous1 = previous2 = None
10 for token in self.source:
11 if previous1 is not None:
12 yield previous2, previous1, token
13 previous2 = previous1
14 previous1 = token
15 if previous1 is not None:
16 yield previous2, previous1, None
17
18 def __iter__(self):
19 for previous, token, next in self.slider():
20 type = token["type"]
21 if type == "StartTag":
22 if (token["data"] or
23 not self.is_optional_start(token["name"], previous, next)):
24 yield token
25 elif type == "EndTag":
26 if not self.is_optional_end(token["name"], next):
27 yield token
28 else:
29 yield token
30
31 def is_optional_start(self, tagname, previous, next):
32 type = next and next["type"] or None
33 if tagname in 'html':
34 # An html element's start tag may be omitted if the first thing
35 # inside the html element is not a space character or a comment.
36 return type not in ("Comment", "SpaceCharacters")
37 elif tagname == 'head':
38 # A head element's start tag may be omitted if the first thing
39 # inside the head element is an element.
40 # XXX: we also omit the start tag if the head element is empty
41 if type in ("StartTag", "EmptyTag"):
42 return True
43 elif type == "EndTag":
44 return next["name"] == "head"
45 elif tagname == 'body':
46 # A body element's start tag may be omitted if the first thing
47 # inside the body element is not a space character or a comment,
48 # except if the first thing inside the body element is a script
49 # or style element and the node immediately preceding the body
50 # element is a head element whose end tag has been omitted.
51 if type in ("Comment", "SpaceCharacters"):
52 return False
53 elif type == "StartTag":
54 # XXX: we do not look at the preceding event, so we never omit
55 # the body element's start tag if it's followed by a script or
56 # a style element.
57 return next["name"] not in ('script', 'style')
58 else:
59 return True
60 elif tagname == 'colgroup':
61 # A colgroup element's start tag may be omitted if the first thing
62 # inside the colgroup element is a col element, and if the element
63 # is not immediately preceded by another colgroup element whose
64 # end tag has been omitted.
65 if type in ("StartTag", "EmptyTag"):
66 # XXX: we do not look at the preceding event, so instead we never
67 # omit the colgroup element's end tag when it is immediately
68 # followed by another colgroup element. See is_optional_end.
69 return next["name"] == "col"
70 else:
71 return False
72 elif tagname == 'tbody':
73 # A tbody element's start tag may be omitted if the first thing
74 # inside the tbody element is a tr element, and if the element is
75 # not immediately preceded by a tbody, thead, or tfoot element
76 # whose end tag has been omitted.
77 if type == "StartTag":
78 # omit the thead and tfoot elements' end tag when they are
79 # immediately followed by a tbody element. See is_optional_end.
80 if previous and previous['type'] == 'EndTag' and \
81 previous['name'] in ('tbody', 'thead', 'tfoot'):
82 return False
83 return next["name"] == 'tr'
84 else:
85 return False
86 return False
87
88 def is_optional_end(self, tagname, next):
89 type = next and next["type"] or None
90 if tagname in ('html', 'head', 'body'):
91 # An html element's end tag may be omitted if the html element
92 # is not immediately followed by a space character or a comment.
93 return type not in ("Comment", "SpaceCharacters")
94 elif tagname in ('li', 'optgroup', 'tr'):
95 # A li element's end tag may be omitted if the li element is
96 # immediately followed by another li element or if there is
97 # no more content in the parent element.
98 # An optgroup element's end tag may be omitted if the optgroup
99 # element is immediately followed by another optgroup element,
100 # or if there is no more content in the parent element.
101 # A tr element's end tag may be omitted if the tr element is
102 # immediately followed by another tr element, or if there is
103 # no more content in the parent element.
104 if type == "StartTag":
105 return next["name"] == tagname
106 else:
107 return type == "EndTag" or type is None
108 elif tagname in ('dt', 'dd'):
109 # A dt element's end tag may be omitted if the dt element is
110 # immediately followed by another dt element or a dd element.
111 # A dd element's end tag may be omitted if the dd element is
112 # immediately followed by another dd element or a dt element,
113 # or if there is no more content in the parent element.
114 if type == "StartTag":
115 return next["name"] in ('dt', 'dd')
116 elif tagname == 'dd':
117 return type == "EndTag" or type is None
118 else:
119 return False
120 elif tagname == 'p':
121 # A p element's end tag may be omitted if the p element is
122 # immediately followed by an address, article, aside,
123 # blockquote, datagrid, dialog, dir, div, dl, fieldset,
124 # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu,
125 # nav, ol, p, pre, section, table, or ul, element, or if
126 # there is no more content in the parent element.
127 if type in ("StartTag", "EmptyTag"):
128 return next["name"] in ('address', 'article', 'aside',
129 'blockquote', 'datagrid', 'dialog',
130 'dir', 'div', 'dl', 'fieldset', 'footer',
131 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
132 'header', 'hr', 'menu', 'nav', 'ol',
133 'p', 'pre', 'section', 'table', 'ul')
134 else:
135 return type == "EndTag" or type is None
136 elif tagname == 'option':
137 # An option element's end tag may be omitted if the option
138 # element is immediately followed by another option element,
139 # or if it is immediately followed by an <code>optgroup</code>
140 # element, or if there is no more content in the parent
141 # element.
142 if type == "StartTag":
143 return next["name"] in ('option', 'optgroup')
144 else:
145 return type == "EndTag" or type is None
146 elif tagname in ('rt', 'rp'):
147 # An rt element's end tag may be omitted if the rt element is
148 # immediately followed by an rt or rp element, or if there is
149 # no more content in the parent element.
150 # An rp element's end tag may be omitted if the rp element is
151 # immediately followed by an rt or rp element, or if there is
152 # no more content in the parent element.
153 if type == "StartTag":
154 return next["name"] in ('rt', 'rp')
155 else:
156 return type == "EndTag" or type is None
157 elif tagname == 'colgroup':
158 # A colgroup element's end tag may be omitted if the colgroup
159 # element is not immediately followed by a space character or
160 # a comment.
161 if type in ("Comment", "SpaceCharacters"):
162 return False
163 elif type == "StartTag":
164 # XXX: we also look for an immediately following colgroup
165 # element. See is_optional_start.
166 return next["name"] != 'colgroup'
167 else:
168 return True
169 elif tagname in ('thead', 'tbody'):
170 # A thead element's end tag may be omitted if the thead element
171 # is immediately followed by a tbody or tfoot element.
172 # A tbody element's end tag may be omitted if the tbody element
173 # is immediately followed by a tbody or tfoot element, or if
174 # there is no more content in the parent element.
175 # A tfoot element's end tag may be omitted if the tfoot element
176 # is immediately followed by a tbody element, or if there is no
177 # more content in the parent element.
178 # XXX: we never omit the end tag when the following element is
179 # a tbody. See is_optional_start.
180 if type == "StartTag":
181 return next["name"] in ['tbody', 'tfoot']
182 elif tagname == 'tbody':
183 return type == "EndTag" or type is None
184 else:
185 return False
186 elif tagname == 'tfoot':
187 # A tfoot element's end tag may be omitted if the tfoot element
188 # is immediately followed by a tbody element, or if there is no
189 # more content in the parent element.
190 # XXX: we never omit the end tag when the following element is
191 # a tbody. See is_optional_start.
192 if type == "StartTag":
193 return next["name"] == 'tbody'
194 else:
195 return type == "EndTag" or type is None
196 elif tagname in ('td', 'th'):
197 # A td element's end tag may be omitted if the td element is
198 # immediately followed by a td or th element, or if there is
199 # no more content in the parent element.
200 # A th element's end tag may be omitted if the th element is
201 # immediately followed by a td or th element, or if there is
202 # no more content in the parent element.
203 if type == "StartTag":
204 return next["name"] in ('td', 'th')
205 else:
206 return type == "EndTag" or type is None
207 return False
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py
new file mode 100644
index 0000000..c3199a5
--- /dev/null
+++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py
@@ -0,0 +1,896 @@
1from __future__ import absolute_import, division, unicode_literals
2
3import re
4from xml.sax.saxutils import escape, unescape
5
6from pip._vendor.six.moves import urllib_parse as urlparse
7
8from . import base
9from ..constants import namespaces, prefixes
10
11__all__ = ["Filter"]
12
13
14allowed_elements = frozenset((
15 (namespaces['html'], 'a'),
16 (namespaces['html'], 'abbr'),
17 (namespaces['html'], 'acronym'),
18 (namespaces['html'], 'address'),
19 (namespaces['html'], 'area'),
20 (namespaces['html'], 'article'),
21 (namespaces['html'], 'aside'),
22 (namespaces['html'], 'audio'),
23 (namespaces['html'], 'b'),
24 (namespaces['html'], 'big'),
25 (namespaces['html'], 'blockquote'),
26 (namespaces['html'], 'br'),
27 (namespaces['html'], 'button'),
28 (namespaces['html'], 'canvas'),
29 (namespaces['html'], 'caption'),
30 (namespaces['html'], 'center'),
31 (namespaces['html'], 'cite'),
32 (namespaces['html'], 'code'),
33 (namespaces['html'], 'col'),
34 (namespaces['html'], 'colgroup'),
35 (namespaces['html'], 'command'),
36 (namespaces['html'], 'datagrid'),
37 (namespaces['html'], 'datalist'),
38 (namespaces['html'], 'dd'),
39 (namespaces['html'], 'del'),
40 (namespaces['html'], 'details'),
41 (namespaces['html'], 'dfn'),
42 (namespaces['html'], 'dialog'),
43 (namespaces['html'], 'dir'),
44 (namespaces['html'], 'div'),
45 (namespaces['html'], 'dl'),
46 (namespaces['html'], 'dt'),
47 (namespaces['html'], 'em'),
48 (namespaces['html'], 'event-source'),
49 (namespaces['html'], 'fieldset'),
50 (namespaces['html'], 'figcaption'),
51 (namespaces['html'], 'figure'),
52 (namespaces['html'], 'footer'),
53 (namespaces['html'], 'font'),
54 (namespaces['html'], 'form'),
55 (namespaces['html'], 'header'),
56 (namespaces['html'], 'h1'),
57 (namespaces['html'], 'h2'),
58 (namespaces['html'], 'h3'),
59 (namespaces['html'], 'h4'),
60 (namespaces['html'], 'h5'),
61 (namespaces['html'], 'h6'),
62 (namespaces['html'], 'hr'),
63 (namespaces['html'], 'i'),
64 (namespaces['html'], 'img'),
65 (namespaces['html'], 'input'),
66 (namespaces['html'], 'ins'),
67 (namespaces['html'], 'keygen'),
68 (namespaces['html'], 'kbd'),
69 (namespaces['html'], 'label'),
70 (namespaces['html'], 'legend'),
71 (namespaces['html'], 'li'),
72 (namespaces['html'], 'm'),
73 (namespaces['html'], 'map'),
74 (namespaces['html'], 'menu'),
75 (namespaces['html'], 'meter'),
76 (namespaces['html'], 'multicol'),
77 (namespaces['html'], 'nav'),
78 (namespaces['html'], 'nextid'),
79 (namespaces['html'], 'ol'),
80 (namespaces['html'], 'output'),
81 (namespaces['html'], 'optgroup'),
82 (namespaces['html'], 'option'),
83 (namespaces['html'], 'p'),
84 (namespaces['html'], 'pre'),
85 (namespaces['html'], 'progress'),
86 (namespaces['html'], 'q'),
87 (namespaces['html'], 's'),
88 (namespaces['html'], 'samp'),
89 (namespaces['html'], 'section'),
90 (namespaces['html'], 'select'),
91 (namespaces['html'], 'small'),
92 (namespaces['html'], 'sound'),
93 (namespaces['html'], 'source'),
94 (namespaces['html'], 'spacer'),
95 (namespaces['html'], 'span'),
96 (namespaces['html'], 'strike'),
97 (namespaces['html'], 'strong'),
98 (namespaces['html'], 'sub'),
99 (namespaces['html'], 'sup'),
100 (namespaces['html'], 'table'),
101 (namespaces['html'], 'tbody'),
102 (namespaces['html'], 'td'),
103 (namespaces['html'], 'textarea'),
104 (namespaces['html'], 'time'),
105 (namespaces['html'], 'tfoot'),
106 (namespaces['html'], 'th'),
107 (namespaces['html'], 'thead'),
108 (namespaces['html'], 'tr'),
109 (namespaces['html'], 'tt'),
110 (namespaces['html'], 'u'),
111 (namespaces['html'], 'ul'),
112 (namespaces['html'], 'var'),
113 (namespaces['html'], 'video'),
114 (namespaces['mathml'], 'maction'),
115 (namespaces['mathml'], 'math'),
116 (namespaces['mathml'], 'merror'),
117 (namespaces['mathml'], 'mfrac'),
118 (namespaces['mathml'], 'mi'),
119 (namespaces['mathml'], 'mmultiscripts'),
120 (namespaces['mathml'], 'mn'),
121 (namespaces['mathml'], 'mo'),
122 (namespaces['mathml'], 'mover'),
123 (namespaces['mathml'], 'mpadded'),
124 (namespaces['mathml'], 'mphantom'),
125 (namespaces['mathml'], 'mprescripts'),
126 (namespaces['mathml'], 'mroot'),
127 (namespaces['mathml'], 'mrow'),
128 (namespaces['mathml'], 'mspace'),
129 (namespaces['mathml'], 'msqrt'),
130 (namespaces['mathml'], 'mstyle'),
131 (namespaces['mathml'], 'msub'),
132 (namespaces['mathml'], 'msubsup'),
133 (namespaces['mathml'], 'msup'),
134 (namespaces['mathml'], 'mtable'),
135 (namespaces['mathml'], 'mtd'),
136 (namespaces['mathml'], 'mtext'),
137 (namespaces['mathml'], 'mtr'),
138 (namespaces['mathml'], 'munder'),
139 (namespaces['mathml'], 'munderover'),
140 (namespaces['mathml'], 'none'),
141 (namespaces['svg'], 'a'),
142 (namespaces['svg'], 'animate'),
143 (namespaces['svg'], 'animateColor'),
144 (namespaces['svg'], 'animateMotion'),
145 (namespaces['svg'], 'animateTransform'),
146 (namespaces['svg'], 'clipPath'),
147 (namespaces['svg'], 'circle'),
148 (namespaces['svg'], 'defs'),
149 (namespaces['svg'], 'desc'),
150 (namespaces['svg'], 'ellipse'),
151 (namespaces['svg'], 'font-face'),
152 (namespaces['svg'], 'font-face-name'),
153 (namespaces['svg'], 'font-face-src'),
154 (namespaces['svg'], 'g'),
155 (namespaces['svg'], 'glyph'),
156 (namespaces['svg'], 'hkern'),
157 (namespaces['svg'], 'linearGradient'),
158 (namespaces['svg'], 'line'),
159 (namespaces['svg'], 'marker'),
160 (namespaces['svg'], 'metadata'),
161 (namespaces['svg'], 'missing-glyph'),
162 (namespaces['svg'], 'mpath'),
163 (namespaces['svg'], 'path'),
164 (namespaces['svg'], 'polygon'),
165 (namespaces['svg'], 'polyline'),
166 (namespaces['svg'], 'radialGradient'),
167 (namespaces['svg'], 'rect'),
168 (namespaces['svg'], 'set'),
169 (namespaces['svg'], 'stop'),
170 (namespaces['svg'], 'svg'),
171 (namespaces['svg'], 'switch'),
172 (namespaces['svg'], 'text'),
173 (namespaces['svg'], 'title'),
174 (namespaces['svg'], 'tspan'),
175 (namespaces['svg'], 'use'),
176))
177
178allowed_attributes = frozenset((
179 # HTML attributes
180 (None, 'abbr'),
181 (None, 'accept'),
182 (None, 'accept-charset'),
183 (None, 'accesskey'),
184 (None, 'action'),
185 (None, 'align'),
186 (None, 'alt'),
187 (None, 'autocomplete'),
188 (None, 'autofocus'),
189 (None, 'axis'),
190 (None, 'background'),
191 (None, 'balance'),
192 (None, 'bgcolor'),
193 (None, 'bgproperties'),
194 (None, 'border'),
195 (None, 'bordercolor'),
196 (None, 'bordercolordark'),
197 (None, 'bordercolorlight'),
198 (None, 'bottompadding'),
199 (None, 'cellpadding'),
200 (None, 'cellspacing'),
201 (None, 'ch'),
202 (None, 'challenge'),
203 (None, 'char'),
204 (None, 'charoff'),
205 (None, 'choff'),
206 (None, 'charset'),
207 (None, 'checked'),
208 (None, 'cite'),
209 (None, 'class'),
210 (None, 'clear'),
211 (None, 'color'),
212 (None, 'cols'),
213 (None, 'colspan'),
214 (None, 'compact'),
215 (None, 'contenteditable'),
216 (None, 'controls'),
217 (None, 'coords'),
218 (None, 'data'),
219 (None, 'datafld'),
220 (None, 'datapagesize'),
221 (None, 'datasrc'),
222 (None, 'datetime'),
223 (None, 'default'),
224 (None, 'delay'),
225 (None, 'dir'),
226 (None, 'disabled'),
227 (None, 'draggable'),
228 (None, 'dynsrc'),
229 (None, 'enctype'),
230 (None, 'end'),
231 (None, 'face'),
232 (None, 'for'),
233 (None, 'form'),
234 (None, 'frame'),
235 (None, 'galleryimg'),
236 (None, 'gutter'),
237 (None, 'headers'),
238 (None, 'height'),
239 (None, 'hidefocus'),
240 (None, 'hidden'),
241 (None, 'high'),
242 (None, 'href'),
243 (None, 'hreflang'),
244 (None, 'hspace'),
245 (None, 'icon'),
246 (None, 'id'),
247 (None, 'inputmode'),
248 (None, 'ismap'),
249 (None, 'keytype'),
250 (None, 'label'),
251 (None, 'leftspacing'),
252 (None, 'lang'),
253 (None, 'list'),
254 (None, 'longdesc'),
255 (None, 'loop'),
256 (None, 'loopcount'),
257 (None, 'loopend'),
258 (None, 'loopstart'),
259 (None, 'low'),
260 (None, 'lowsrc'),
261 (None, 'max'),
262 (None, 'maxlength'),
263 (None, 'media'),
264 (None, 'method'),
265 (None, 'min'),
266 (None, 'multiple'),
267 (None, 'name'),
268 (None, 'nohref'),
269 (None, 'noshade'),
270 (None, 'nowrap'),
271 (None, 'open'),
272 (None, 'optimum'),
273 (None, 'pattern'),
274 (None, 'ping'),
275 (None, 'point-size'),
276 (None, 'poster'),
277 (None, 'pqg'),
278 (None, 'preload'),
279 (None, 'prompt'),
280 (None, 'radiogroup'),
281 (None, 'readonly'),
282 (None, 'rel'),
283 (None, 'repeat-max'),
284 (None, 'repeat-min'),
285 (None, 'replace'),
286 (None, 'required'),
287 (None, 'rev'),
288 (None, 'rightspacing'),
289 (None, 'rows'),
290 (None, 'rowspan'),
291 (None, 'rules'),
292 (None, 'scope'),
293 (None, 'selected'),
294 (None, 'shape'),
295 (None, 'size'),
296 (None, 'span'),
297 (None, 'src'),
298 (None, 'start'),
299 (None, 'step'),
300 (None, 'style'),
301 (None, 'summary'),
302 (None, 'suppress'),
303 (None, 'tabindex'),
304 (None, 'target'),
305 (None, 'template'),
306 (None, 'title'),
307 (None, 'toppadding'),
308 (None, 'type'),
309 (None, 'unselectable'),
310 (None, 'usemap'),
311 (None, 'urn'),
312 (None, 'valign'),
313 (None, 'value'),
314 (None, 'variable'),
315 (None, 'volume'),
316 (None, 'vspace'),
317 (None, 'vrml'),
318 (None, 'width'),
319 (None, 'wrap'),
320 (namespaces['xml'], 'lang'),
321 # MathML attributes
322 (None, 'actiontype'),
323 (None, 'align'),
324 (None, 'columnalign'),
325 (None, 'columnalign'),
326 (None, 'columnalign'),
327 (None, 'columnlines'),
328 (None, 'columnspacing'),
329 (None, 'columnspan'),
330 (None, 'depth'),
331 (None, 'display'),
332 (None, 'displaystyle'),
333 (None, 'equalcolumns'),
334 (None, 'equalrows'),
335 (None, 'fence'),
336 (None, 'fontstyle'),
337 (None, 'fontweight'),
338 (None, 'frame'),
339 (None, 'height'),
340 (None, 'linethickness'),
341 (None, 'lspace'),
342 (None, 'mathbackground'),
343 (None, 'mathcolor'),
344 (None, 'mathvariant'),
345 (None, 'mathvariant'),
346 (None, 'maxsize'),
347 (None, 'minsize'),
348 (None, 'other'),
349 (None, 'rowalign'),
350 (None, 'rowalign'),
351 (None, 'rowalign'),
352 (None, 'rowlines'),
353 (None, 'rowspacing'),
354 (None, 'rowspan'),
355 (None, 'rspace'),
356 (None, 'scriptlevel'),
357 (None, 'selection'),
358 (None, 'separator'),
359 (None, 'stretchy'),
360 (None, 'width'),
361 (None, 'width'),
362 (namespaces['xlink'], 'href'),
363 (namespaces['xlink'], 'show'),
364 (namespaces['xlink'], 'type'),
365 # SVG attributes
366 (None, 'accent-height'),
367 (None, 'accumulate'),
368 (None, 'additive'),
369 (None, 'alphabetic'),
370 (None, 'arabic-form'),
371 (None, 'ascent'),
372 (None, 'attributeName'),
373 (None, 'attributeType'),
374 (None, 'baseProfile'),
375 (None, 'bbox'),
376 (None, 'begin'),
377 (None, 'by'),
378 (None, 'calcMode'),
379 (None, 'cap-height'),
380 (None, 'class'),
381 (None, 'clip-path'),
382 (None, 'color'),
383 (None, 'color-rendering'),
384 (None, 'content'),
385 (None, 'cx'),
386 (None, 'cy'),
387 (None, 'd'),
388 (None, 'dx'),
389 (None, 'dy'),
390 (None, 'descent'),
391 (None, 'display'),
392 (None, 'dur'),
393 (None, 'end'),
394 (None, 'fill'),
395 (None, 'fill-opacity'),
396 (None, 'fill-rule'),
397 (None, 'font-family'),
398 (None, 'font-size'),
399 (None, 'font-stretch'),
400 (None, 'font-style'),
401 (None, 'font-variant'),
402 (None, 'font-weight'),
403 (None, 'from'),
404 (None, 'fx'),
405 (None, 'fy'),
406 (None, 'g1'),
407 (None, 'g2'),
408 (None, 'glyph-name'),
409 (None, 'gradientUnits'),
410 (None, 'hanging'),
411 (None, 'height'),
412 (None, 'horiz-adv-x'),
413 (None, 'horiz-origin-x'),
414 (None, 'id'),
415 (None, 'ideographic'),
416 (None, 'k'),
417 (None, 'keyPoints'),
418 (None, 'keySplines'),
419 (None, 'keyTimes'),
420 (None, 'lang'),
421 (None, 'marker-end'),
422 (None, 'marker-mid'),
423 (None, 'marker-start'),
424 (None, 'markerHeight'),
425 (None, 'markerUnits'),
426 (None, 'markerWidth'),
427 (None, 'mathematical'),
428 (None, 'max'),
429 (None, 'min'),
430 (None, 'name'),
431 (None, 'offset'),
432 (None, 'opacity'),
433 (None, 'orient'),
434 (None, 'origin'),
435 (None, 'overline-position'),
436 (None, 'overline-thickness'),
437 (None, 'panose-1'),
438 (None, 'path'),
439 (None, 'pathLength'),
440 (None, 'points'),
441 (None, 'preserveAspectRatio'),
442 (None, 'r'),
443 (None, 'refX'),
444 (None, 'refY'),
445 (None, 'repeatCount'),
446 (None, 'repeatDur'),
447 (None, 'requiredExtensions'),
448 (None, 'requiredFeatures'),
449 (None, 'restart'),
450 (None, 'rotate'),
451 (None, 'rx'),
452 (None, 'ry'),
453 (None, 'slope'),
454 (None, 'stemh'),
455 (None, 'stemv'),
456 (None, 'stop-color'),
457 (None, 'stop-opacity'),
458 (None, 'strikethrough-position'),
459 (None, 'strikethrough-thickness'),
460 (None, 'stroke'),
461 (None, 'stroke-dasharray'),
462 (None, 'stroke-dashoffset'),
463 (None, 'stroke-linecap'),
464 (None, 'stroke-linejoin'),
465 (None, 'stroke-miterlimit'),
466 (None, 'stroke-opacity'),
467 (None, 'stroke-width'),
468 (None, 'systemLanguage'),
469 (None, 'target'),
470 (None, 'text-anchor'),
471 (None, 'to'),
472 (None, 'transform'),
473 (None, 'type'),
474 (None, 'u1'),
475 (None, 'u2'),
476 (None, 'underline-position'),
477 (None, 'underline-thickness'),
478 (None, 'unicode'),
479 (None, 'unicode-range'),
480 (None, 'units-per-em'),
481 (None, 'values'),
482 (None, 'version'),
483 (None, 'viewBox'),
484 (None, 'visibility'),
485 (None, 'width'),
486 (None, 'widths'),
487 (None, 'x'),
488 (None, 'x-height'),
489 (None, 'x1'),
490 (None, 'x2'),
491 (namespaces['xlink'], 'actuate'),
492 (namespaces['xlink'], 'arcrole'),
493 (namespaces['xlink'], 'href'),
494 (namespaces['xlink'], 'role'),
495 (namespaces['xlink'], 'show'),
496 (namespaces['xlink'], 'title'),
497 (namespaces['xlink'], 'type'),
498 (namespaces['xml'], 'base'),
499 (namespaces['xml'], 'lang'),
500 (namespaces['xml'], 'space'),
501 (None, 'y'),
502 (None, 'y1'),
503 (None, 'y2'),
504 (None, 'zoomAndPan'),
505))
506
507attr_val_is_uri = frozenset((
508 (None, 'href'),
509 (None, 'src'),
510 (None, 'cite'),
511 (None, 'action'),
512 (None, 'longdesc'),
513 (None, 'poster'),
514 (None, 'background'),
515 (None, 'datasrc'),
516 (None, 'dynsrc'),
517 (None, 'lowsrc'),
518 (None, 'ping'),
519 (namespaces['xlink'], 'href'),
520 (namespaces['xml'], 'base'),
521))
522
523svg_attr_val_allows_ref = frozenset((
524 (None, 'clip-path'),
525 (None, 'color-profile'),
526 (None, 'cursor'),
527 (None, 'fill'),
528 (None, 'filter'),
529 (None, 'marker'),
530 (None, 'marker-start'),
531 (None, 'marker-mid'),
532 (None, 'marker-end'),
533 (None, 'mask'),
534 (None, 'stroke'),
535))
536
537svg_allow_local_href = frozenset((
538 (None, 'altGlyph'),
539 (None, 'animate'),
540 (None, 'animateColor'),
541 (None, 'animateMotion'),
542 (None, 'animateTransform'),
543 (None, 'cursor'),
544 (None, 'feImage'),
545 (None, 'filter'),
546 (None, 'linearGradient'),
547 (None, 'pattern'),
548 (None, 'radialGradient'),
549 (None, 'textpath'),
550 (None, 'tref'),
551 (None, 'set'),
552 (None, 'use')
553))
554
555allowed_css_properties = frozenset((
556 'azimuth',
557 'background-color',
558 'border-bottom-color',
559 'border-collapse',
560 'border-color',
561 'border-left-color',
562 'border-right-color',
563 'border-top-color',
564 'clear',
565 'color',
566 'cursor',
567 'direction',
568 'display',
569 'elevation',
570 'float',
571 'font',
572 'font-family',
573 'font-size',
574 'font-style',
575 'font-variant',
576 'font-weight',
577 'height',
578 'letter-spacing',
579 'line-height',
580 'overflow',
581 'pause',
582 'pause-after',
583 'pause-before',
584 'pitch',
585 'pitch-range',
586 'richness',
587 'speak',
588 'speak-header',
589 'speak-numeral',
590 'speak-punctuation',
591 'speech-rate',
592 'stress',
593 'text-align',
594 'text-decoration',
595 'text-indent',
596 'unicode-bidi',
597 'vertical-align',
598 'voice-family',
599 'volume',
600 'white-space',
601 'width',
602))
603
604allowed_css_keywords = frozenset((
605 'auto',
606 'aqua',
607 'black',
608 'block',
609 'blue',
610 'bold',
611 'both',
612 'bottom',
613 'brown',
614 'center',
615 'collapse',
616 'dashed',
617 'dotted',
618 'fuchsia',
619 'gray',
620 'green',
621 '!important',
622 'italic',
623 'left',
624 'lime',
625 'maroon',
626 'medium',
627 'none',
628 'navy',
629 'normal',
630 'nowrap',
631 'olive',
632 'pointer',
633 'purple',
634 'red',
635 'right',
636 'solid',
637 'silver',
638 'teal',
639 'top',
640 'transparent',
641 'underline',
642 'white',
643 'yellow',
644))
645
646allowed_svg_properties = frozenset((
647 'fill',
648 'fill-opacity',
649 'fill-rule',
650 'stroke',
651 'stroke-width',
652 'stroke-linecap',
653 'stroke-linejoin',
654 'stroke-opacity',
655))
656
657allowed_protocols = frozenset((
658 'ed2k',
659 'ftp',
660 'http',
661 'https',
662 'irc',
663 'mailto',
664 'news',
665 'gopher',
666 'nntp',
667 'telnet',
668 'webcal',
669 'xmpp',
670 'callto',
671 'feed',
672 'urn',
673 'aim',
674 'rsync',
675 'tag',
676 'ssh',
677 'sftp',
678 'rtsp',
679 'afs',
680 'data',
681))
682
683allowed_content_types = frozenset((
684 'image/png',
685 'image/jpeg',
686 'image/gif',
687 'image/webp',
688 'image/bmp',
689 'text/plain',
690))
691
692
693data_content_type = re.compile(r'''
694 ^
695 # Match a content type <application>/<type>
696 (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
697 # Match any character set and encoding
698 (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
699 |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
700 # Assume the rest is data
701 ,.*
702 $
703 ''',
704 re.VERBOSE)
705
706
707class Filter(base.Filter):
708 """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
709 def __init__(self,
710 source,
711 allowed_elements=allowed_elements,
712 allowed_attributes=allowed_attributes,
713 allowed_css_properties=allowed_css_properties,
714 allowed_css_keywords=allowed_css_keywords,
715 allowed_svg_properties=allowed_svg_properties,
716 allowed_protocols=allowed_protocols,
717 allowed_content_types=allowed_content_types,
718 attr_val_is_uri=attr_val_is_uri,
719 svg_attr_val_allows_ref=svg_attr_val_allows_ref,
720 svg_allow_local_href=svg_allow_local_href):
721 """Creates a Filter
722
723 :arg allowed_elements: set of elements to allow--everything else will
724 be escaped
725
726 :arg allowed_attributes: set of attributes to allow in
727 elements--everything else will be stripped
728
729 :arg allowed_css_properties: set of CSS properties to allow--everything
730 else will be stripped
731
732 :arg allowed_css_keywords: set of CSS keywords to allow--everything
733 else will be stripped
734
735 :arg allowed_svg_properties: set of SVG properties to allow--everything
736 else will be removed
737
738 :arg allowed_protocols: set of allowed protocols for URIs
739
740 :arg allowed_content_types: set of allowed content types for ``data`` URIs.
741
742 :arg attr_val_is_uri: set of attributes that have URI values--values
743 that have a scheme not listed in ``allowed_protocols`` are removed
744
745 :arg svg_attr_val_allows_ref: set of SVG attributes that can have
746 references
747
748 :arg svg_allow_local_href: set of SVG elements that can have local
749 hrefs--these are removed
750
751 """
752 super(Filter, self).__init__(source)
753 self.allowed_elements = allowed_elements
754 self.allowed_attributes = allowed_attributes
755 self.allowed_css_properties = allowed_css_properties
756 self.allowed_css_keywords = allowed_css_keywords
757 self.allowed_svg_properties = allowed_svg_properties
758 self.allowed_protocols = allowed_protocols
759 self.allowed_content_types = allowed_content_types
760 self.attr_val_is_uri = attr_val_is_uri
761 self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
762 self.svg_allow_local_href = svg_allow_local_href
763
764 def __iter__(self):
765 for token in base.Filter.__iter__(self):
766 token = self.sanitize_token(token)
767 if token:
768 yield token
769
770 # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
771 # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
772 # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
773 # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
774 # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
775 # allowed.
776 #
777 # sanitize_html('<script> do_nasty_stuff() </script>')
778 # => &lt;script> do_nasty_stuff() &lt;/script>
779 # sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
780 # => <a>Click here for $100</a>
781 def sanitize_token(self, token):
782
783 # accommodate filters which use token_type differently
784 token_type = token["type"]
785 if token_type in ("StartTag", "EndTag", "EmptyTag"):
786 name = token["name"]
787 namespace = token["namespace"]
788 if ((namespace, name) in self.allowed_elements or
789 (namespace is None and
790 (namespaces["html"], name) in self.allowed_elements)):
791 return self.allowed_token(token)
792 else:
793 return self.disallowed_token(token)
794 elif token_type == "Comment":
795 pass
796 else:
797 return token
798
799 def allowed_token(self, token):
800 if "data" in token:
801 attrs = token["data"]
802 attr_names = set(attrs.keys())
803
804 # Remove forbidden attributes
805 for to_remove in (attr_names - self.allowed_attributes):
806 del token["data"][to_remove]
807 attr_names.remove(to_remove)
808
809 # Remove attributes with disallowed URL values
810 for attr in (attr_names & self.attr_val_is_uri):
811 assert attr in attrs
812 # I don't have a clue where this regexp comes from or why it matches those
813 # characters, nor why we call unescape. I just know it's always been here.
814 # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
815 # this will do is remove *more* than it otherwise would.
816 val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
817 unescape(attrs[attr])).lower()
818 # remove replacement characters from unescaped characters
819 val_unescaped = val_unescaped.replace("\ufffd", "")
820 try:
821 uri = urlparse.urlparse(val_unescaped)
822 except ValueError:
823 uri = None
824 del attrs[attr]
825 if uri and uri.scheme:
826 if uri.scheme not in self.allowed_protocols:
827 del attrs[attr]
828 if uri.scheme == 'data':
829 m = data_content_type.match(uri.path)
830 if not m:
831 del attrs[attr]
832 elif m.group('content_type') not in self.allowed_content_types:
833 del attrs[attr]
834
835 for attr in self.svg_attr_val_allows_ref:
836 if attr in attrs:
837 attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
838 ' ',
839 unescape(attrs[attr]))
840 if (token["name"] in self.svg_allow_local_href and
841 (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*',
842 attrs[(namespaces['xlink'], 'href')])):
843 del attrs[(namespaces['xlink'], 'href')]
844 if (None, 'style') in attrs:
845 attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
846 token["data"] = attrs
847 return token
848
849 def disallowed_token(self, token):
850 token_type = token["type"]
851 if token_type == "EndTag":
852 token["data"] = "</%s>" % token["name"]
853 elif token["data"]:
854 assert token_type in ("StartTag", "EmptyTag")
855 attrs = []
856 for (ns, name), v in token["data"].items():
857 attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
858 token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
859 else:
860 token["data"] = "<%s>" % token["name"]
861 if token.get("selfClosing"):
862 token["data"] = token["data"][:-1] + "/>"
863
864 token["type"] = "Characters"
865
866 del token["name"]
867 return token
868
869 def sanitize_css(self, style):
870 # disallow urls
871 style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
872
873 # gauntlet
874 if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
875 return ''
876 if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
877 return ''
878
879 clean = []
880 for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
881 if not value:
882 continue
883 if prop.lower() in self.allowed_css_properties:
884 clean.append(prop + ': ' + value + ';')
885 elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
886 'padding']:
887 for keyword in value.split():
888 if keyword not in self.allowed_css_keywords and \
889 not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa
890 break
891 else:
892 clean.append(prop + ': ' + value + ';')
893 elif prop.lower() in self.allowed_svg_properties:
894 clean.append(prop + ': ' + value + ';')
895
896 return ' '.join(clean)
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/whitespace.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/whitespace.py
new file mode 100644
index 0000000..24bb0de
--- /dev/null
+++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/whitespace.py
@@ -0,0 +1,38 @@
1from __future__ import absolute_import, division, unicode_literals
2
3import re
4
5from . import base
6from ..constants import rcdataElements, spaceCharacters
7spaceCharacters = "".join(spaceCharacters)
8
9SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
10
11
12class Filter(base.Filter):
13 """Collapses whitespace except in pre, textarea, and script elements"""
14 spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
15
16 def __iter__(self):
17 preserve = 0
18 for token in base.Filter.__iter__(self):
19 type = token["type"]
20 if type == "StartTag" \
21 and (preserve or token["name"] in self.spacePreserveElements):
22 preserve += 1
23
24 elif type == "EndTag" and preserve:
25 preserve -= 1
26
27 elif not preserve and type == "SpaceCharacters" and token["data"]:
28 # Test on token["data"] above to not introduce spaces where there were not
29 token["data"] = " "
30
31 elif not preserve and type == "Characters":
32 token["data"] = collapse_spaces(token["data"])
33
34 yield token
35
36
37def collapse_spaces(text):
38 return SPACES_REGEX.sub(' ', text)