From 68df54d6629ec019142eb149dd037774f2d11e7c Mon Sep 17 00:00:00 2001 From: Shubham Saini Date: Tue, 11 Dec 2018 15:31:23 +0530 Subject: First commit --- .../pip/_vendor/html5lib/filters/sanitizer.py | 896 +++++++++++++++++++++ 1 file changed, 896 insertions(+) create mode 100644 venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py') diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py new file mode 100644 index 0000000..c3199a5 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py @@ -0,0 +1,896 @@ +from __future__ import absolute_import, division, unicode_literals + +import re +from xml.sax.saxutils import escape, unescape + +from pip._vendor.six.moves import urllib_parse as urlparse + +from . import base +from ..constants import namespaces, prefixes + +__all__ = ["Filter"] + + +allowed_elements = frozenset(( + (namespaces['html'], 'a'), + (namespaces['html'], 'abbr'), + (namespaces['html'], 'acronym'), + (namespaces['html'], 'address'), + (namespaces['html'], 'area'), + (namespaces['html'], 'article'), + (namespaces['html'], 'aside'), + (namespaces['html'], 'audio'), + (namespaces['html'], 'b'), + (namespaces['html'], 'big'), + (namespaces['html'], 'blockquote'), + (namespaces['html'], 'br'), + (namespaces['html'], 'button'), + (namespaces['html'], 'canvas'), + (namespaces['html'], 'caption'), + (namespaces['html'], 'center'), + (namespaces['html'], 'cite'), + (namespaces['html'], 'code'), + (namespaces['html'], 'col'), + (namespaces['html'], 'colgroup'), + (namespaces['html'], 'command'), + (namespaces['html'], 'datagrid'), + (namespaces['html'], 'datalist'), + (namespaces['html'], 'dd'), + (namespaces['html'], 'del'), + (namespaces['html'], 'details'), + (namespaces['html'], 'dfn'), + (namespaces['html'], 'dialog'), + (namespaces['html'], 'dir'), + (namespaces['html'], 'div'), + (namespaces['html'], 'dl'), + (namespaces['html'], 'dt'), + (namespaces['html'], 'em'), + (namespaces['html'], 'event-source'), + (namespaces['html'], 'fieldset'), + (namespaces['html'], 'figcaption'), + (namespaces['html'], 'figure'), + (namespaces['html'], 'footer'), + (namespaces['html'], 'font'), + (namespaces['html'], 'form'), + (namespaces['html'], 'header'), + (namespaces['html'], 'h1'), + (namespaces['html'], 'h2'), + (namespaces['html'], 'h3'), + (namespaces['html'], 'h4'), + (namespaces['html'], 'h5'), + (namespaces['html'], 'h6'), + (namespaces['html'], 'hr'), + (namespaces['html'], 'i'), + (namespaces['html'], 'img'), + (namespaces['html'], 'input'), + (namespaces['html'], 'ins'), + (namespaces['html'], 'keygen'), + (namespaces['html'], 'kbd'), + (namespaces['html'], 'label'), + (namespaces['html'], 'legend'), + (namespaces['html'], 'li'), + (namespaces['html'], 'm'), + (namespaces['html'], 'map'), + (namespaces['html'], 'menu'), + (namespaces['html'], 'meter'), + (namespaces['html'], 'multicol'), + (namespaces['html'], 'nav'), + (namespaces['html'], 'nextid'), + (namespaces['html'], 'ol'), + (namespaces['html'], 'output'), + (namespaces['html'], 'optgroup'), + (namespaces['html'], 'option'), + (namespaces['html'], 'p'), + (namespaces['html'], 'pre'), + (namespaces['html'], 'progress'), + (namespaces['html'], 'q'), + (namespaces['html'], 's'), + (namespaces['html'], 'samp'), + (namespaces['html'], 'section'), + (namespaces['html'], 'select'), + (namespaces['html'], 'small'), + (namespaces['html'], 'sound'), + (namespaces['html'], 'source'), + (namespaces['html'], 'spacer'), + (namespaces['html'], 'span'), + (namespaces['html'], 'strike'), + (namespaces['html'], 'strong'), + (namespaces['html'], 'sub'), + (namespaces['html'], 'sup'), + (namespaces['html'], 'table'), + (namespaces['html'], 'tbody'), + (namespaces['html'], 'td'), + (namespaces['html'], 'textarea'), + (namespaces['html'], 'time'), + (namespaces['html'], 'tfoot'), + (namespaces['html'], 'th'), + (namespaces['html'], 'thead'), + (namespaces['html'], 'tr'), + (namespaces['html'], 'tt'), + (namespaces['html'], 'u'), + (namespaces['html'], 'ul'), + (namespaces['html'], 'var'), + (namespaces['html'], 'video'), + (namespaces['mathml'], 'maction'), + (namespaces['mathml'], 'math'), + (namespaces['mathml'], 'merror'), + (namespaces['mathml'], 'mfrac'), + (namespaces['mathml'], 'mi'), + (namespaces['mathml'], 'mmultiscripts'), + (namespaces['mathml'], 'mn'), + (namespaces['mathml'], 'mo'), + (namespaces['mathml'], 'mover'), + (namespaces['mathml'], 'mpadded'), + (namespaces['mathml'], 'mphantom'), + (namespaces['mathml'], 'mprescripts'), + (namespaces['mathml'], 'mroot'), + (namespaces['mathml'], 'mrow'), + (namespaces['mathml'], 'mspace'), + (namespaces['mathml'], 'msqrt'), + (namespaces['mathml'], 'mstyle'), + (namespaces['mathml'], 'msub'), + (namespaces['mathml'], 'msubsup'), + (namespaces['mathml'], 'msup'), + (namespaces['mathml'], 'mtable'), + (namespaces['mathml'], 'mtd'), + (namespaces['mathml'], 'mtext'), + (namespaces['mathml'], 'mtr'), + (namespaces['mathml'], 'munder'), + (namespaces['mathml'], 'munderover'), + (namespaces['mathml'], 'none'), + (namespaces['svg'], 'a'), + (namespaces['svg'], 'animate'), + (namespaces['svg'], 'animateColor'), + (namespaces['svg'], 'animateMotion'), + (namespaces['svg'], 'animateTransform'), + (namespaces['svg'], 'clipPath'), + (namespaces['svg'], 'circle'), + (namespaces['svg'], 'defs'), + (namespaces['svg'], 'desc'), + (namespaces['svg'], 'ellipse'), + (namespaces['svg'], 'font-face'), + (namespaces['svg'], 'font-face-name'), + (namespaces['svg'], 'font-face-src'), + (namespaces['svg'], 'g'), + (namespaces['svg'], 'glyph'), + (namespaces['svg'], 'hkern'), + (namespaces['svg'], 'linearGradient'), + (namespaces['svg'], 'line'), + (namespaces['svg'], 'marker'), + (namespaces['svg'], 'metadata'), + (namespaces['svg'], 'missing-glyph'), + (namespaces['svg'], 'mpath'), + (namespaces['svg'], 'path'), + (namespaces['svg'], 'polygon'), + (namespaces['svg'], 'polyline'), + (namespaces['svg'], 'radialGradient'), + (namespaces['svg'], 'rect'), + (namespaces['svg'], 'set'), + (namespaces['svg'], 'stop'), + (namespaces['svg'], 'svg'), + (namespaces['svg'], 'switch'), + (namespaces['svg'], 'text'), + (namespaces['svg'], 'title'), + (namespaces['svg'], 'tspan'), + (namespaces['svg'], 'use'), +)) + +allowed_attributes = frozenset(( + # HTML attributes + (None, 'abbr'), + (None, 'accept'), + (None, 'accept-charset'), + (None, 'accesskey'), + (None, 'action'), + (None, 'align'), + (None, 'alt'), + (None, 'autocomplete'), + (None, 'autofocus'), + (None, 'axis'), + (None, 'background'), + (None, 'balance'), + (None, 'bgcolor'), + (None, 'bgproperties'), + (None, 'border'), + (None, 'bordercolor'), + (None, 'bordercolordark'), + (None, 'bordercolorlight'), + (None, 'bottompadding'), + (None, 'cellpadding'), + (None, 'cellspacing'), + (None, 'ch'), + (None, 'challenge'), + (None, 'char'), + (None, 'charoff'), + (None, 'choff'), + (None, 'charset'), + (None, 'checked'), + (None, 'cite'), + (None, 'class'), + (None, 'clear'), + (None, 'color'), + (None, 'cols'), + (None, 'colspan'), + (None, 'compact'), + (None, 'contenteditable'), + (None, 'controls'), + (None, 'coords'), + (None, 'data'), + (None, 'datafld'), + (None, 'datapagesize'), + (None, 'datasrc'), + (None, 'datetime'), + (None, 'default'), + (None, 'delay'), + (None, 'dir'), + (None, 'disabled'), + (None, 'draggable'), + (None, 'dynsrc'), + (None, 'enctype'), + (None, 'end'), + (None, 'face'), + (None, 'for'), + (None, 'form'), + (None, 'frame'), + (None, 'galleryimg'), + (None, 'gutter'), + (None, 'headers'), + (None, 'height'), + (None, 'hidefocus'), + (None, 'hidden'), + (None, 'high'), + (None, 'href'), + (None, 'hreflang'), + (None, 'hspace'), + (None, 'icon'), + (None, 'id'), + (None, 'inputmode'), + (None, 'ismap'), + (None, 'keytype'), + (None, 'label'), + (None, 'leftspacing'), + (None, 'lang'), + (None, 'list'), + (None, 'longdesc'), + (None, 'loop'), + (None, 'loopcount'), + (None, 'loopend'), + (None, 'loopstart'), + (None, 'low'), + (None, 'lowsrc'), + (None, 'max'), + (None, 'maxlength'), + (None, 'media'), + (None, 'method'), + (None, 'min'), + (None, 'multiple'), + (None, 'name'), + (None, 'nohref'), + (None, 'noshade'), + (None, 'nowrap'), + (None, 'open'), + (None, 'optimum'), + (None, 'pattern'), + (None, 'ping'), + (None, 'point-size'), + (None, 'poster'), + (None, 'pqg'), + (None, 'preload'), + (None, 'prompt'), + (None, 'radiogroup'), + (None, 'readonly'), + (None, 'rel'), + (None, 'repeat-max'), + (None, 'repeat-min'), + (None, 'replace'), + (None, 'required'), + (None, 'rev'), + (None, 'rightspacing'), + (None, 'rows'), + (None, 'rowspan'), + (None, 'rules'), + (None, 'scope'), + (None, 'selected'), + (None, 'shape'), + (None, 'size'), + (None, 'span'), + (None, 'src'), + (None, 'start'), + (None, 'step'), + (None, 'style'), + (None, 'summary'), + (None, 'suppress'), + (None, 'tabindex'), + (None, 'target'), + (None, 'template'), + (None, 'title'), + (None, 'toppadding'), + (None, 'type'), + (None, 'unselectable'), + (None, 'usemap'), + (None, 'urn'), + (None, 'valign'), + (None, 'value'), + (None, 'variable'), + (None, 'volume'), + (None, 'vspace'), + (None, 'vrml'), + (None, 'width'), + (None, 'wrap'), + (namespaces['xml'], 'lang'), + # MathML attributes + (None, 'actiontype'), + (None, 'align'), + (None, 'columnalign'), + (None, 'columnalign'), + (None, 'columnalign'), + (None, 'columnlines'), + (None, 'columnspacing'), + (None, 'columnspan'), + (None, 'depth'), + (None, 'display'), + (None, 'displaystyle'), + (None, 'equalcolumns'), + (None, 'equalrows'), + (None, 'fence'), + (None, 'fontstyle'), + (None, 'fontweight'), + (None, 'frame'), + (None, 'height'), + (None, 'linethickness'), + (None, 'lspace'), + (None, 'mathbackground'), + (None, 'mathcolor'), + (None, 'mathvariant'), + (None, 'mathvariant'), + (None, 'maxsize'), + (None, 'minsize'), + (None, 'other'), + (None, 'rowalign'), + (None, 'rowalign'), + (None, 'rowalign'), + (None, 'rowlines'), + (None, 'rowspacing'), + (None, 'rowspan'), + (None, 'rspace'), + (None, 'scriptlevel'), + (None, 'selection'), + (None, 'separator'), + (None, 'stretchy'), + (None, 'width'), + (None, 'width'), + (namespaces['xlink'], 'href'), + (namespaces['xlink'], 'show'), + (namespaces['xlink'], 'type'), + # SVG attributes + (None, 'accent-height'), + (None, 'accumulate'), + (None, 'additive'), + (None, 'alphabetic'), + (None, 'arabic-form'), + (None, 'ascent'), + (None, 'attributeName'), + (None, 'attributeType'), + (None, 'baseProfile'), + (None, 'bbox'), + (None, 'begin'), + (None, 'by'), + (None, 'calcMode'), + (None, 'cap-height'), + (None, 'class'), + (None, 'clip-path'), + (None, 'color'), + (None, 'color-rendering'), + (None, 'content'), + (None, 'cx'), + (None, 'cy'), + (None, 'd'), + (None, 'dx'), + (None, 'dy'), + (None, 'descent'), + (None, 'display'), + (None, 'dur'), + (None, 'end'), + (None, 'fill'), + (None, 'fill-opacity'), + (None, 'fill-rule'), + (None, 'font-family'), + (None, 'font-size'), + (None, 'font-stretch'), + (None, 'font-style'), + (None, 'font-variant'), + (None, 'font-weight'), + (None, 'from'), + (None, 'fx'), + (None, 'fy'), + (None, 'g1'), + (None, 'g2'), + (None, 'glyph-name'), + (None, 'gradientUnits'), + (None, 'hanging'), + (None, 'height'), + (None, 'horiz-adv-x'), + (None, 'horiz-origin-x'), + (None, 'id'), + (None, 'ideographic'), + (None, 'k'), + (None, 'keyPoints'), + (None, 'keySplines'), + (None, 'keyTimes'), + (None, 'lang'), + (None, 'marker-end'), + (None, 'marker-mid'), + (None, 'marker-start'), + (None, 'markerHeight'), + (None, 'markerUnits'), + (None, 'markerWidth'), + (None, 'mathematical'), + (None, 'max'), + (None, 'min'), + (None, 'name'), + (None, 'offset'), + (None, 'opacity'), + (None, 'orient'), + (None, 'origin'), + (None, 'overline-position'), + (None, 'overline-thickness'), + (None, 'panose-1'), + (None, 'path'), + (None, 'pathLength'), + (None, 'points'), + (None, 'preserveAspectRatio'), + (None, 'r'), + (None, 'refX'), + (None, 'refY'), + (None, 'repeatCount'), + (None, 'repeatDur'), + (None, 'requiredExtensions'), + (None, 'requiredFeatures'), + (None, 'restart'), + (None, 'rotate'), + (None, 'rx'), + (None, 'ry'), + (None, 'slope'), + (None, 'stemh'), + (None, 'stemv'), + (None, 'stop-color'), + (None, 'stop-opacity'), + (None, 'strikethrough-position'), + (None, 'strikethrough-thickness'), + (None, 'stroke'), + (None, 'stroke-dasharray'), + (None, 'stroke-dashoffset'), + (None, 'stroke-linecap'), + (None, 'stroke-linejoin'), + (None, 'stroke-miterlimit'), + (None, 'stroke-opacity'), + (None, 'stroke-width'), + (None, 'systemLanguage'), + (None, 'target'), + (None, 'text-anchor'), + (None, 'to'), + (None, 'transform'), + (None, 'type'), + (None, 'u1'), + (None, 'u2'), + (None, 'underline-position'), + (None, 'underline-thickness'), + (None, 'unicode'), + (None, 'unicode-range'), + (None, 'units-per-em'), + (None, 'values'), + (None, 'version'), + (None, 'viewBox'), + (None, 'visibility'), + (None, 'width'), + (None, 'widths'), + (None, 'x'), + (None, 'x-height'), + (None, 'x1'), + (None, 'x2'), + (namespaces['xlink'], 'actuate'), + (namespaces['xlink'], 'arcrole'), + (namespaces['xlink'], 'href'), + (namespaces['xlink'], 'role'), + (namespaces['xlink'], 'show'), + (namespaces['xlink'], 'title'), + (namespaces['xlink'], 'type'), + (namespaces['xml'], 'base'), + (namespaces['xml'], 'lang'), + (namespaces['xml'], 'space'), + (None, 'y'), + (None, 'y1'), + (None, 'y2'), + (None, 'zoomAndPan'), +)) + +attr_val_is_uri = frozenset(( + (None, 'href'), + (None, 'src'), + (None, 'cite'), + (None, 'action'), + (None, 'longdesc'), + (None, 'poster'), + (None, 'background'), + (None, 'datasrc'), + (None, 'dynsrc'), + (None, 'lowsrc'), + (None, 'ping'), + (namespaces['xlink'], 'href'), + (namespaces['xml'], 'base'), +)) + +svg_attr_val_allows_ref = frozenset(( + (None, 'clip-path'), + (None, 'color-profile'), + (None, 'cursor'), + (None, 'fill'), + (None, 'filter'), + (None, 'marker'), + (None, 'marker-start'), + (None, 'marker-mid'), + (None, 'marker-end'), + (None, 'mask'), + (None, 'stroke'), +)) + +svg_allow_local_href = frozenset(( + (None, 'altGlyph'), + (None, 'animate'), + (None, 'animateColor'), + (None, 'animateMotion'), + (None, 'animateTransform'), + (None, 'cursor'), + (None, 'feImage'), + (None, 'filter'), + (None, 'linearGradient'), + (None, 'pattern'), + (None, 'radialGradient'), + (None, 'textpath'), + (None, 'tref'), + (None, 'set'), + (None, 'use') +)) + +allowed_css_properties = frozenset(( + 'azimuth', + 'background-color', + 'border-bottom-color', + 'border-collapse', + 'border-color', + 'border-left-color', + 'border-right-color', + 'border-top-color', + 'clear', + 'color', + 'cursor', + 'direction', + 'display', + 'elevation', + 'float', + 'font', + 'font-family', + 'font-size', + 'font-style', + 'font-variant', + 'font-weight', + 'height', + 'letter-spacing', + 'line-height', + 'overflow', + 'pause', + 'pause-after', + 'pause-before', + 'pitch', + 'pitch-range', + 'richness', + 'speak', + 'speak-header', + 'speak-numeral', + 'speak-punctuation', + 'speech-rate', + 'stress', + 'text-align', + 'text-decoration', + 'text-indent', + 'unicode-bidi', + 'vertical-align', + 'voice-family', + 'volume', + 'white-space', + 'width', +)) + +allowed_css_keywords = frozenset(( + 'auto', + 'aqua', + 'black', + 'block', + 'blue', + 'bold', + 'both', + 'bottom', + 'brown', + 'center', + 'collapse', + 'dashed', + 'dotted', + 'fuchsia', + 'gray', + 'green', + '!important', + 'italic', + 'left', + 'lime', + 'maroon', + 'medium', + 'none', + 'navy', + 'normal', + 'nowrap', + 'olive', + 'pointer', + 'purple', + 'red', + 'right', + 'solid', + 'silver', + 'teal', + 'top', + 'transparent', + 'underline', + 'white', + 'yellow', +)) + +allowed_svg_properties = frozenset(( + 'fill', + 'fill-opacity', + 'fill-rule', + 'stroke', + 'stroke-width', + 'stroke-linecap', + 'stroke-linejoin', + 'stroke-opacity', +)) + +allowed_protocols = frozenset(( + 'ed2k', + 'ftp', + 'http', + 'https', + 'irc', + 'mailto', + 'news', + 'gopher', + 'nntp', + 'telnet', + 'webcal', + 'xmpp', + 'callto', + 'feed', + 'urn', + 'aim', + 'rsync', + 'tag', + 'ssh', + 'sftp', + 'rtsp', + 'afs', + 'data', +)) + +allowed_content_types = frozenset(( + 'image/png', + 'image/jpeg', + 'image/gif', + 'image/webp', + 'image/bmp', + 'text/plain', +)) + + +data_content_type = re.compile(r''' + ^ + # Match a content type / + (?P[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) + # Match any character set and encoding + (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) + |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) + # Assume the rest is data + ,.* + $ + ''', + re.VERBOSE) + + +class Filter(base.Filter): + """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes""" + def __init__(self, + source, + allowed_elements=allowed_elements, + allowed_attributes=allowed_attributes, + allowed_css_properties=allowed_css_properties, + allowed_css_keywords=allowed_css_keywords, + allowed_svg_properties=allowed_svg_properties, + allowed_protocols=allowed_protocols, + allowed_content_types=allowed_content_types, + attr_val_is_uri=attr_val_is_uri, + svg_attr_val_allows_ref=svg_attr_val_allows_ref, + svg_allow_local_href=svg_allow_local_href): + """Creates a Filter + + :arg allowed_elements: set of elements to allow--everything else will + be escaped + + :arg allowed_attributes: set of attributes to allow in + elements--everything else will be stripped + + :arg allowed_css_properties: set of CSS properties to allow--everything + else will be stripped + + :arg allowed_css_keywords: set of CSS keywords to allow--everything + else will be stripped + + :arg allowed_svg_properties: set of SVG properties to allow--everything + else will be removed + + :arg allowed_protocols: set of allowed protocols for URIs + + :arg allowed_content_types: set of allowed content types for ``data`` URIs. + + :arg attr_val_is_uri: set of attributes that have URI values--values + that have a scheme not listed in ``allowed_protocols`` are removed + + :arg svg_attr_val_allows_ref: set of SVG attributes that can have + references + + :arg svg_allow_local_href: set of SVG elements that can have local + hrefs--these are removed + + """ + super(Filter, self).__init__(source) + self.allowed_elements = allowed_elements + self.allowed_attributes = allowed_attributes + self.allowed_css_properties = allowed_css_properties + self.allowed_css_keywords = allowed_css_keywords + self.allowed_svg_properties = allowed_svg_properties + self.allowed_protocols = allowed_protocols + self.allowed_content_types = allowed_content_types + self.attr_val_is_uri = attr_val_is_uri + self.svg_attr_val_allows_ref = svg_attr_val_allows_ref + self.svg_allow_local_href = svg_allow_local_href + + def __iter__(self): + for token in base.Filter.__iter__(self): + token = self.sanitize_token(token) + if token: + yield token + + # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and + # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes + # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and + # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI + # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are + # allowed. + # + # sanitize_html('') + # => <script> do_nasty_stuff() </script> + # sanitize_html('Click here for $100') + # => Click here for $100 + def sanitize_token(self, token): + + # accommodate filters which use token_type differently + token_type = token["type"] + if token_type in ("StartTag", "EndTag", "EmptyTag"): + name = token["name"] + namespace = token["namespace"] + if ((namespace, name) in self.allowed_elements or + (namespace is None and + (namespaces["html"], name) in self.allowed_elements)): + return self.allowed_token(token) + else: + return self.disallowed_token(token) + elif token_type == "Comment": + pass + else: + return token + + def allowed_token(self, token): + if "data" in token: + attrs = token["data"] + attr_names = set(attrs.keys()) + + # Remove forbidden attributes + for to_remove in (attr_names - self.allowed_attributes): + del token["data"][to_remove] + attr_names.remove(to_remove) + + # Remove attributes with disallowed URL values + for attr in (attr_names & self.attr_val_is_uri): + assert attr in attrs + # I don't have a clue where this regexp comes from or why it matches those + # characters, nor why we call unescape. I just know it's always been here. + # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all + # this will do is remove *more* than it otherwise would. + val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '', + unescape(attrs[attr])).lower() + # remove replacement characters from unescaped characters + val_unescaped = val_unescaped.replace("\ufffd", "") + try: + uri = urlparse.urlparse(val_unescaped) + except ValueError: + uri = None + del attrs[attr] + if uri and uri.scheme: + if uri.scheme not in self.allowed_protocols: + del attrs[attr] + if uri.scheme == 'data': + m = data_content_type.match(uri.path) + if not m: + del attrs[attr] + elif m.group('content_type') not in self.allowed_content_types: + del attrs[attr] + + for attr in self.svg_attr_val_allows_ref: + if attr in attrs: + attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', + ' ', + unescape(attrs[attr])) + if (token["name"] in self.svg_allow_local_href and + (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*', + attrs[(namespaces['xlink'], 'href')])): + del attrs[(namespaces['xlink'], 'href')] + if (None, 'style') in attrs: + attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')]) + token["data"] = attrs + return token + + def disallowed_token(self, token): + token_type = token["type"] + if token_type == "EndTag": + token["data"] = "" % token["name"] + elif token["data"]: + assert token_type in ("StartTag", "EmptyTag") + attrs = [] + for (ns, name), v in token["data"].items(): + attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v))) + token["data"] = "<%s%s>" % (token["name"], ''.join(attrs)) + else: + token["data"] = "<%s>" % token["name"] + if token.get("selfClosing"): + token["data"] = token["data"][:-1] + "/>" + + token["type"] = "Characters" + + del token["name"] + return token + + def sanitize_css(self, style): + # disallow urls + style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) + + # gauntlet + if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): + return '' + if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): + return '' + + clean = [] + for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style): + if not value: + continue + if prop.lower() in self.allowed_css_properties: + clean.append(prop + ': ' + value + ';') + elif prop.split('-')[0].lower() in ['background', 'border', 'margin', + 'padding']: + for keyword in value.split(): + if keyword not in self.allowed_css_keywords and \ + not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa + break + else: + clean.append(prop + ': ' + value + ';') + elif prop.lower() in self.allowed_svg_properties: + clean.append(prop + ': ' + value + ';') + + return ' '.join(clean) -- cgit v1.2.3