summaryrefslogtreecommitdiff
path: root/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py
diff options
context:
space:
mode:
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py')
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py896
1 files changed, 0 insertions, 896 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py
deleted file mode 100644
index c3199a5..0000000
--- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/filters/sanitizer.py
+++ /dev/null
@@ -1,896 +0,0 @@
1from __future__ import absolute_import, division, unicode_literals
2
3import re
4from xml.sax.saxutils import escape, unescape
5
6from pip._vendor.six.moves import urllib_parse as urlparse
7
8from . import base
9from ..constants import namespaces, prefixes
10
11__all__ = ["Filter"]
12
13
14allowed_elements = frozenset((
15 (namespaces['html'], 'a'),
16 (namespaces['html'], 'abbr'),
17 (namespaces['html'], 'acronym'),
18 (namespaces['html'], 'address'),
19 (namespaces['html'], 'area'),
20 (namespaces['html'], 'article'),
21 (namespaces['html'], 'aside'),
22 (namespaces['html'], 'audio'),
23 (namespaces['html'], 'b'),
24 (namespaces['html'], 'big'),
25 (namespaces['html'], 'blockquote'),
26 (namespaces['html'], 'br'),
27 (namespaces['html'], 'button'),
28 (namespaces['html'], 'canvas'),
29 (namespaces['html'], 'caption'),
30 (namespaces['html'], 'center'),
31 (namespaces['html'], 'cite'),
32 (namespaces['html'], 'code'),
33 (namespaces['html'], 'col'),
34 (namespaces['html'], 'colgroup'),
35 (namespaces['html'], 'command'),
36 (namespaces['html'], 'datagrid'),
37 (namespaces['html'], 'datalist'),
38 (namespaces['html'], 'dd'),
39 (namespaces['html'], 'del'),
40 (namespaces['html'], 'details'),
41 (namespaces['html'], 'dfn'),
42 (namespaces['html'], 'dialog'),
43 (namespaces['html'], 'dir'),
44 (namespaces['html'], 'div'),
45 (namespaces['html'], 'dl'),
46 (namespaces['html'], 'dt'),
47 (namespaces['html'], 'em'),
48 (namespaces['html'], 'event-source'),
49 (namespaces['html'], 'fieldset'),
50 (namespaces['html'], 'figcaption'),
51 (namespaces['html'], 'figure'),
52 (namespaces['html'], 'footer'),
53 (namespaces['html'], 'font'),
54 (namespaces['html'], 'form'),
55 (namespaces['html'], 'header'),
56 (namespaces['html'], 'h1'),
57 (namespaces['html'], 'h2'),
58 (namespaces['html'], 'h3'),
59 (namespaces['html'], 'h4'),
60 (namespaces['html'], 'h5'),
61 (namespaces['html'], 'h6'),
62 (namespaces['html'], 'hr'),
63 (namespaces['html'], 'i'),
64 (namespaces['html'], 'img'),
65 (namespaces['html'], 'input'),
66 (namespaces['html'], 'ins'),
67 (namespaces['html'], 'keygen'),
68 (namespaces['html'], 'kbd'),
69 (namespaces['html'], 'label'),
70 (namespaces['html'], 'legend'),
71 (namespaces['html'], 'li'),
72 (namespaces['html'], 'm'),
73 (namespaces['html'], 'map'),
74 (namespaces['html'], 'menu'),
75 (namespaces['html'], 'meter'),
76 (namespaces['html'], 'multicol'),
77 (namespaces['html'], 'nav'),
78 (namespaces['html'], 'nextid'),
79 (namespaces['html'], 'ol'),
80 (namespaces['html'], 'output'),
81 (namespaces['html'], 'optgroup'),
82 (namespaces['html'], 'option'),
83 (namespaces['html'], 'p'),
84 (namespaces['html'], 'pre'),
85 (namespaces['html'], 'progress'),
86 (namespaces['html'], 'q'),
87 (namespaces['html'], 's'),
88 (namespaces['html'], 'samp'),
89 (namespaces['html'], 'section'),
90 (namespaces['html'], 'select'),
91 (namespaces['html'], 'small'),
92 (namespaces['html'], 'sound'),
93 (namespaces['html'], 'source'),
94 (namespaces['html'], 'spacer'),
95 (namespaces['html'], 'span'),
96 (namespaces['html'], 'strike'),
97 (namespaces['html'], 'strong'),
98 (namespaces['html'], 'sub'),
99 (namespaces['html'], 'sup'),
100 (namespaces['html'], 'table'),
101 (namespaces['html'], 'tbody'),
102 (namespaces['html'], 'td'),
103 (namespaces['html'], 'textarea'),
104 (namespaces['html'], 'time'),
105 (namespaces['html'], 'tfoot'),
106 (namespaces['html'], 'th'),
107 (namespaces['html'], 'thead'),
108 (namespaces['html'], 'tr'),
109 (namespaces['html'], 'tt'),
110 (namespaces['html'], 'u'),
111 (namespaces['html'], 'ul'),
112 (namespaces['html'], 'var'),
113 (namespaces['html'], 'video'),
114 (namespaces['mathml'], 'maction'),
115 (namespaces['mathml'], 'math'),
116 (namespaces['mathml'], 'merror'),
117 (namespaces['mathml'], 'mfrac'),
118 (namespaces['mathml'], 'mi'),
119 (namespaces['mathml'], 'mmultiscripts'),
120 (namespaces['mathml'], 'mn'),
121 (namespaces['mathml'], 'mo'),
122 (namespaces['mathml'], 'mover'),
123 (namespaces['mathml'], 'mpadded'),
124 (namespaces['mathml'], 'mphantom'),
125 (namespaces['mathml'], 'mprescripts'),
126 (namespaces['mathml'], 'mroot'),
127 (namespaces['mathml'], 'mrow'),
128 (namespaces['mathml'], 'mspace'),
129 (namespaces['mathml'], 'msqrt'),
130 (namespaces['mathml'], 'mstyle'),
131 (namespaces['mathml'], 'msub'),
132 (namespaces['mathml'], 'msubsup'),
133 (namespaces['mathml'], 'msup'),
134 (namespaces['mathml'], 'mtable'),
135 (namespaces['mathml'], 'mtd'),
136 (namespaces['mathml'], 'mtext'),
137 (namespaces['mathml'], 'mtr'),
138 (namespaces['mathml'], 'munder'),
139 (namespaces['mathml'], 'munderover'),
140 (namespaces['mathml'], 'none'),
141 (namespaces['svg'], 'a'),
142 (namespaces['svg'], 'animate'),
143 (namespaces['svg'], 'animateColor'),
144 (namespaces['svg'], 'animateMotion'),
145 (namespaces['svg'], 'animateTransform'),
146 (namespaces['svg'], 'clipPath'),
147 (namespaces['svg'], 'circle'),
148 (namespaces['svg'], 'defs'),
149 (namespaces['svg'], 'desc'),
150 (namespaces['svg'], 'ellipse'),
151 (namespaces['svg'], 'font-face'),
152 (namespaces['svg'], 'font-face-name'),
153 (namespaces['svg'], 'font-face-src'),
154 (namespaces['svg'], 'g'),
155 (namespaces['svg'], 'glyph'),
156 (namespaces['svg'], 'hkern'),
157 (namespaces['svg'], 'linearGradient'),
158 (namespaces['svg'], 'line'),
159 (namespaces['svg'], 'marker'),
160 (namespaces['svg'], 'metadata'),
161 (namespaces['svg'], 'missing-glyph'),
162 (namespaces['svg'], 'mpath'),
163 (namespaces['svg'], 'path'),
164 (namespaces['svg'], 'polygon'),
165 (namespaces['svg'], 'polyline'),
166 (namespaces['svg'], 'radialGradient'),
167 (namespaces['svg'], 'rect'),
168 (namespaces['svg'], 'set'),
169 (namespaces['svg'], 'stop'),
170 (namespaces['svg'], 'svg'),
171 (namespaces['svg'], 'switch'),
172 (namespaces['svg'], 'text'),
173 (namespaces['svg'], 'title'),
174 (namespaces['svg'], 'tspan'),
175 (namespaces['svg'], 'use'),
176))
177
178allowed_attributes = frozenset((
179 # HTML attributes
180 (None, 'abbr'),
181 (None, 'accept'),
182 (None, 'accept-charset'),
183 (None, 'accesskey'),
184 (None, 'action'),
185 (None, 'align'),
186 (None, 'alt'),
187 (None, 'autocomplete'),
188 (None, 'autofocus'),
189 (None, 'axis'),
190 (None, 'background'),
191 (None, 'balance'),
192 (None, 'bgcolor'),
193 (None, 'bgproperties'),
194 (None, 'border'),
195 (None, 'bordercolor'),
196 (None, 'bordercolordark'),
197 (None, 'bordercolorlight'),
198 (None, 'bottompadding'),
199 (None, 'cellpadding'),
200 (None, 'cellspacing'),
201 (None, 'ch'),
202 (None, 'challenge'),
203 (None, 'char'),
204 (None, 'charoff'),
205 (None, 'choff'),
206 (None, 'charset'),
207 (None, 'checked'),
208 (None, 'cite'),
209 (None, 'class'),
210 (None, 'clear'),
211 (None, 'color'),
212 (None, 'cols'),
213 (None, 'colspan'),
214 (None, 'compact'),
215 (None, 'contenteditable'),
216 (None, 'controls'),
217 (None, 'coords'),
218 (None, 'data'),
219 (None, 'datafld'),
220 (None, 'datapagesize'),
221 (None, 'datasrc'),
222 (None, 'datetime'),
223 (None, 'default'),
224 (None, 'delay'),
225 (None, 'dir'),
226 (None, 'disabled'),
227 (None, 'draggable'),
228 (None, 'dynsrc'),
229 (None, 'enctype'),
230 (None, 'end'),
231 (None, 'face'),
232 (None, 'for'),
233 (None, 'form'),
234 (None, 'frame'),
235 (None, 'galleryimg'),
236 (None, 'gutter'),
237 (None, 'headers'),
238 (None, 'height'),
239 (None, 'hidefocus'),
240 (None, 'hidden'),
241 (None, 'high'),
242 (None, 'href'),
243 (None, 'hreflang'),
244 (None, 'hspace'),
245 (None, 'icon'),
246 (None, 'id'),
247 (None, 'inputmode'),
248 (None, 'ismap'),
249 (None, 'keytype'),
250 (None, 'label'),
251 (None, 'leftspacing'),
252 (None, 'lang'),
253 (None, 'list'),
254 (None, 'longdesc'),
255 (None, 'loop'),
256 (None, 'loopcount'),
257 (None, 'loopend'),
258 (None, 'loopstart'),
259 (None, 'low'),
260 (None, 'lowsrc'),
261 (None, 'max'),
262 (None, 'maxlength'),
263 (None, 'media'),
264 (None, 'method'),
265 (None, 'min'),
266 (None, 'multiple'),
267 (None, 'name'),
268 (None, 'nohref'),
269 (None, 'noshade'),
270 (None, 'nowrap'),
271 (None, 'open'),
272 (None, 'optimum'),
273 (None, 'pattern'),
274 (None, 'ping'),
275 (None, 'point-size'),
276 (None, 'poster'),
277 (None, 'pqg'),
278 (None, 'preload'),
279 (None, 'prompt'),
280 (None, 'radiogroup'),
281 (None, 'readonly'),
282 (None, 'rel'),
283 (None, 'repeat-max'),
284 (None, 'repeat-min'),
285 (None, 'replace'),
286 (None, 'required'),
287 (None, 'rev'),
288 (None, 'rightspacing'),
289 (None, 'rows'),
290 (None, 'rowspan'),
291 (None, 'rules'),
292 (None, 'scope'),
293 (None, 'selected'),
294 (None, 'shape'),
295 (None, 'size'),
296 (None, 'span'),
297 (None, 'src'),
298 (None, 'start'),
299 (None, 'step'),
300 (None, 'style'),
301 (None, 'summary'),
302 (None, 'suppress'),
303 (None, 'tabindex'),
304 (None, 'target'),
305 (None, 'template'),
306 (None, 'title'),
307 (None, 'toppadding'),
308 (None, 'type'),
309 (None, 'unselectable'),
310 (None, 'usemap'),
311 (None, 'urn'),
312 (None, 'valign'),
313 (None, 'value'),
314 (None, 'variable'),
315 (None, 'volume'),
316 (None, 'vspace'),
317 (None, 'vrml'),
318 (None, 'width'),
319 (None, 'wrap'),
320 (namespaces['xml'], 'lang'),
321 # MathML attributes
322 (None, 'actiontype'),
323 (None, 'align'),
324 (None, 'columnalign'),
325 (None, 'columnalign'),
326 (None, 'columnalign'),
327 (None, 'columnlines'),
328 (None, 'columnspacing'),
329 (None, 'columnspan'),
330 (None, 'depth'),
331 (None, 'display'),
332 (None, 'displaystyle'),
333 (None, 'equalcolumns'),
334 (None, 'equalrows'),
335 (None, 'fence'),
336 (None, 'fontstyle'),
337 (None, 'fontweight'),
338 (None, 'frame'),
339 (None, 'height'),
340 (None, 'linethickness'),
341 (None, 'lspace'),
342 (None, 'mathbackground'),
343 (None, 'mathcolor'),
344 (None, 'mathvariant'),
345 (None, 'mathvariant'),
346 (None, 'maxsize'),
347 (None, 'minsize'),
348 (None, 'other'),
349 (None, 'rowalign'),
350 (None, 'rowalign'),
351 (None, 'rowalign'),
352 (None, 'rowlines'),
353 (None, 'rowspacing'),
354 (None, 'rowspan'),
355 (None, 'rspace'),
356 (None, 'scriptlevel'),
357 (None, 'selection'),
358 (None, 'separator'),
359 (None, 'stretchy'),
360 (None, 'width'),
361 (None, 'width'),
362 (namespaces['xlink'], 'href'),
363 (namespaces['xlink'], 'show'),
364 (namespaces['xlink'], 'type'),
365 # SVG attributes
366 (None, 'accent-height'),
367 (None, 'accumulate'),
368 (None, 'additive'),
369 (None, 'alphabetic'),
370 (None, 'arabic-form'),
371 (None, 'ascent'),
372 (None, 'attributeName'),
373 (None, 'attributeType'),
374 (None, 'baseProfile'),
375 (None, 'bbox'),
376 (None, 'begin'),
377 (None, 'by'),
378 (None, 'calcMode'),
379 (None, 'cap-height'),
380 (None, 'class'),
381 (None, 'clip-path'),
382 (None, 'color'),
383 (None, 'color-rendering'),
384 (None, 'content'),
385 (None, 'cx'),
386 (None, 'cy'),
387 (None, 'd'),
388 (None, 'dx'),
389 (None, 'dy'),
390 (None, 'descent'),
391 (None, 'display'),
392 (None, 'dur'),
393 (None, 'end'),
394 (None, 'fill'),
395 (None, 'fill-opacity'),
396 (None, 'fill-rule'),
397 (None, 'font-family'),
398 (None, 'font-size'),
399 (None, 'font-stretch'),
400 (None, 'font-style'),
401 (None, 'font-variant'),
402 (None, 'font-weight'),
403 (None, 'from'),
404 (None, 'fx'),
405 (None, 'fy'),
406 (None, 'g1'),
407 (None, 'g2'),
408 (None, 'glyph-name'),
409 (None, 'gradientUnits'),
410 (None, 'hanging'),
411 (None, 'height'),
412 (None, 'horiz-adv-x'),
413 (None, 'horiz-origin-x'),
414 (None, 'id'),
415 (None, 'ideographic'),
416 (None, 'k'),
417 (None, 'keyPoints'),
418 (None, 'keySplines'),
419 (None, 'keyTimes'),
420 (None, 'lang'),
421 (None, 'marker-end'),
422 (None, 'marker-mid'),
423 (None, 'marker-start'),
424 (None, 'markerHeight'),
425 (None, 'markerUnits'),
426 (None, 'markerWidth'),
427 (None, 'mathematical'),
428 (None, 'max'),
429 (None, 'min'),
430 (None, 'name'),
431 (None, 'offset'),
432 (None, 'opacity'),
433 (None, 'orient'),
434 (None, 'origin'),
435 (None, 'overline-position'),
436 (None, 'overline-thickness'),
437 (None, 'panose-1'),
438 (None, 'path'),
439 (None, 'pathLength'),
440 (None, 'points'),
441 (None, 'preserveAspectRatio'),
442 (None, 'r'),
443 (None, 'refX'),
444 (None, 'refY'),
445 (None, 'repeatCount'),
446 (None, 'repeatDur'),
447 (None, 'requiredExtensions'),
448 (None, 'requiredFeatures'),
449 (None, 'restart'),
450 (None, 'rotate'),
451 (None, 'rx'),
452 (None, 'ry'),
453 (None, 'slope'),
454 (None, 'stemh'),
455 (None, 'stemv'),
456 (None, 'stop-color'),
457 (None, 'stop-opacity'),
458 (None, 'strikethrough-position'),
459 (None, 'strikethrough-thickness'),
460 (None, 'stroke'),
461 (None, 'stroke-dasharray'),
462 (None, 'stroke-dashoffset'),
463 (None, 'stroke-linecap'),
464 (None, 'stroke-linejoin'),
465 (None, 'stroke-miterlimit'),
466 (None, 'stroke-opacity'),
467 (None, 'stroke-width'),
468 (None, 'systemLanguage'),
469 (None, 'target'),
470 (None, 'text-anchor'),
471 (None, 'to'),
472 (None, 'transform'),
473 (None, 'type'),
474 (None, 'u1'),
475 (None, 'u2'),
476 (None, 'underline-position'),
477 (None, 'underline-thickness'),
478 (None, 'unicode'),
479 (None, 'unicode-range'),
480 (None, 'units-per-em'),
481 (None, 'values'),
482 (None, 'version'),
483 (None, 'viewBox'),
484 (None, 'visibility'),
485 (None, 'width'),
486 (None, 'widths'),
487 (None, 'x'),
488 (None, 'x-height'),
489 (None, 'x1'),
490 (None, 'x2'),
491 (namespaces['xlink'], 'actuate'),
492 (namespaces['xlink'], 'arcrole'),
493 (namespaces['xlink'], 'href'),
494 (namespaces['xlink'], 'role'),
495 (namespaces['xlink'], 'show'),
496 (namespaces['xlink'], 'title'),
497 (namespaces['xlink'], 'type'),
498 (namespaces['xml'], 'base'),
499 (namespaces['xml'], 'lang'),
500 (namespaces['xml'], 'space'),
501 (None, 'y'),
502 (None, 'y1'),
503 (None, 'y2'),
504 (None, 'zoomAndPan'),
505))
506
507attr_val_is_uri = frozenset((
508 (None, 'href'),
509 (None, 'src'),
510 (None, 'cite'),
511 (None, 'action'),
512 (None, 'longdesc'),
513 (None, 'poster'),
514 (None, 'background'),
515 (None, 'datasrc'),
516 (None, 'dynsrc'),
517 (None, 'lowsrc'),
518 (None, 'ping'),
519 (namespaces['xlink'], 'href'),
520 (namespaces['xml'], 'base'),
521))
522
523svg_attr_val_allows_ref = frozenset((
524 (None, 'clip-path'),
525 (None, 'color-profile'),
526 (None, 'cursor'),
527 (None, 'fill'),
528 (None, 'filter'),
529 (None, 'marker'),
530 (None, 'marker-start'),
531 (None, 'marker-mid'),
532 (None, 'marker-end'),
533 (None, 'mask'),
534 (None, 'stroke'),
535))
536
537svg_allow_local_href = frozenset((
538 (None, 'altGlyph'),
539 (None, 'animate'),
540 (None, 'animateColor'),
541 (None, 'animateMotion'),
542 (None, 'animateTransform'),
543 (None, 'cursor'),
544 (None, 'feImage'),
545 (None, 'filter'),
546 (None, 'linearGradient'),
547 (None, 'pattern'),
548 (None, 'radialGradient'),
549 (None, 'textpath'),
550 (None, 'tref'),
551 (None, 'set'),
552 (None, 'use')
553))
554
555allowed_css_properties = frozenset((
556 'azimuth',
557 'background-color',
558 'border-bottom-color',
559 'border-collapse',
560 'border-color',
561 'border-left-color',
562 'border-right-color',
563 'border-top-color',
564 'clear',
565 'color',
566 'cursor',
567 'direction',
568 'display',
569 'elevation',
570 'float',
571 'font',
572 'font-family',
573 'font-size',
574 'font-style',
575 'font-variant',
576 'font-weight',
577 'height',
578 'letter-spacing',
579 'line-height',
580 'overflow',
581 'pause',
582 'pause-after',
583 'pause-before',
584 'pitch',
585 'pitch-range',
586 'richness',
587 'speak',
588 'speak-header',
589 'speak-numeral',
590 'speak-punctuation',
591 'speech-rate',
592 'stress',
593 'text-align',
594 'text-decoration',
595 'text-indent',
596 'unicode-bidi',
597 'vertical-align',
598 'voice-family',
599 'volume',
600 'white-space',
601 'width',
602))
603
604allowed_css_keywords = frozenset((
605 'auto',
606 'aqua',
607 'black',
608 'block',
609 'blue',
610 'bold',
611 'both',
612 'bottom',
613 'brown',
614 'center',
615 'collapse',
616 'dashed',
617 'dotted',
618 'fuchsia',
619 'gray',
620 'green',
621 '!important',
622 'italic',
623 'left',
624 'lime',
625 'maroon',
626 'medium',
627 'none',
628 'navy',
629 'normal',
630 'nowrap',
631 'olive',
632 'pointer',
633 'purple',
634 'red',
635 'right',
636 'solid',
637 'silver',
638 'teal',
639 'top',
640 'transparent',
641 'underline',
642 'white',
643 'yellow',
644))
645
646allowed_svg_properties = frozenset((
647 'fill',
648 'fill-opacity',
649 'fill-rule',
650 'stroke',
651 'stroke-width',
652 'stroke-linecap',
653 'stroke-linejoin',
654 'stroke-opacity',
655))
656
657allowed_protocols = frozenset((
658 'ed2k',
659 'ftp',
660 'http',
661 'https',
662 'irc',
663 'mailto',
664 'news',
665 'gopher',
666 'nntp',
667 'telnet',
668 'webcal',
669 'xmpp',
670 'callto',
671 'feed',
672 'urn',
673 'aim',
674 'rsync',
675 'tag',
676 'ssh',
677 'sftp',
678 'rtsp',
679 'afs',
680 'data',
681))
682
683allowed_content_types = frozenset((
684 'image/png',
685 'image/jpeg',
686 'image/gif',
687 'image/webp',
688 'image/bmp',
689 'text/plain',
690))
691
692
693data_content_type = re.compile(r'''
694 ^
695 # Match a content type <application>/<type>
696 (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
697 # Match any character set and encoding
698 (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
699 |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
700 # Assume the rest is data
701 ,.*
702 $
703 ''',
704 re.VERBOSE)
705
706
707class Filter(base.Filter):
708 """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
709 def __init__(self,
710 source,
711 allowed_elements=allowed_elements,
712 allowed_attributes=allowed_attributes,
713 allowed_css_properties=allowed_css_properties,
714 allowed_css_keywords=allowed_css_keywords,
715 allowed_svg_properties=allowed_svg_properties,
716 allowed_protocols=allowed_protocols,
717 allowed_content_types=allowed_content_types,
718 attr_val_is_uri=attr_val_is_uri,
719 svg_attr_val_allows_ref=svg_attr_val_allows_ref,
720 svg_allow_local_href=svg_allow_local_href):
721 """Creates a Filter
722
723 :arg allowed_elements: set of elements to allow--everything else will
724 be escaped
725
726 :arg allowed_attributes: set of attributes to allow in
727 elements--everything else will be stripped
728
729 :arg allowed_css_properties: set of CSS properties to allow--everything
730 else will be stripped
731
732 :arg allowed_css_keywords: set of CSS keywords to allow--everything
733 else will be stripped
734
735 :arg allowed_svg_properties: set of SVG properties to allow--everything
736 else will be removed
737
738 :arg allowed_protocols: set of allowed protocols for URIs
739
740 :arg allowed_content_types: set of allowed content types for ``data`` URIs.
741
742 :arg attr_val_is_uri: set of attributes that have URI values--values
743 that have a scheme not listed in ``allowed_protocols`` are removed
744
745 :arg svg_attr_val_allows_ref: set of SVG attributes that can have
746 references
747
748 :arg svg_allow_local_href: set of SVG elements that can have local
749 hrefs--these are removed
750
751 """
752 super(Filter, self).__init__(source)
753 self.allowed_elements = allowed_elements
754 self.allowed_attributes = allowed_attributes
755 self.allowed_css_properties = allowed_css_properties
756 self.allowed_css_keywords = allowed_css_keywords
757 self.allowed_svg_properties = allowed_svg_properties
758 self.allowed_protocols = allowed_protocols
759 self.allowed_content_types = allowed_content_types
760 self.attr_val_is_uri = attr_val_is_uri
761 self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
762 self.svg_allow_local_href = svg_allow_local_href
763
764 def __iter__(self):
765 for token in base.Filter.__iter__(self):
766 token = self.sanitize_token(token)
767 if token:
768 yield token
769
770 # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
771 # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
772 # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
773 # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
774 # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
775 # allowed.
776 #
777 # sanitize_html('<script> do_nasty_stuff() </script>')
778 # => &lt;script> do_nasty_stuff() &lt;/script>
779 # sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
780 # => <a>Click here for $100</a>
781 def sanitize_token(self, token):
782
783 # accommodate filters which use token_type differently
784 token_type = token["type"]
785 if token_type in ("StartTag", "EndTag", "EmptyTag"):
786 name = token["name"]
787 namespace = token["namespace"]
788 if ((namespace, name) in self.allowed_elements or
789 (namespace is None and
790 (namespaces["html"], name) in self.allowed_elements)):
791 return self.allowed_token(token)
792 else:
793 return self.disallowed_token(token)
794 elif token_type == "Comment":
795 pass
796 else:
797 return token
798
799 def allowed_token(self, token):
800 if "data" in token:
801 attrs = token["data"]
802 attr_names = set(attrs.keys())
803
804 # Remove forbidden attributes
805 for to_remove in (attr_names - self.allowed_attributes):
806 del token["data"][to_remove]
807 attr_names.remove(to_remove)
808
809 # Remove attributes with disallowed URL values
810 for attr in (attr_names & self.attr_val_is_uri):
811 assert attr in attrs
812 # I don't have a clue where this regexp comes from or why it matches those
813 # characters, nor why we call unescape. I just know it's always been here.
814 # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
815 # this will do is remove *more* than it otherwise would.
816 val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
817 unescape(attrs[attr])).lower()
818 # remove replacement characters from unescaped characters
819 val_unescaped = val_unescaped.replace("\ufffd", "")
820 try:
821 uri = urlparse.urlparse(val_unescaped)
822 except ValueError:
823 uri = None
824 del attrs[attr]
825 if uri and uri.scheme:
826 if uri.scheme not in self.allowed_protocols:
827 del attrs[attr]
828 if uri.scheme == 'data':
829 m = data_content_type.match(uri.path)
830 if not m:
831 del attrs[attr]
832 elif m.group('content_type') not in self.allowed_content_types:
833 del attrs[attr]
834
835 for attr in self.svg_attr_val_allows_ref:
836 if attr in attrs:
837 attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
838 ' ',
839 unescape(attrs[attr]))
840 if (token["name"] in self.svg_allow_local_href and
841 (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*',
842 attrs[(namespaces['xlink'], 'href')])):
843 del attrs[(namespaces['xlink'], 'href')]
844 if (None, 'style') in attrs:
845 attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
846 token["data"] = attrs
847 return token
848
849 def disallowed_token(self, token):
850 token_type = token["type"]
851 if token_type == "EndTag":
852 token["data"] = "</%s>" % token["name"]
853 elif token["data"]:
854 assert token_type in ("StartTag", "EmptyTag")
855 attrs = []
856 for (ns, name), v in token["data"].items():
857 attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
858 token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
859 else:
860 token["data"] = "<%s>" % token["name"]
861 if token.get("selfClosing"):
862 token["data"] = token["data"][:-1] + "/>"
863
864 token["type"] = "Characters"
865
866 del token["name"]
867 return token
868
869 def sanitize_css(self, style):
870 # disallow urls
871 style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
872
873 # gauntlet
874 if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
875 return ''
876 if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
877 return ''
878
879 clean = []
880 for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
881 if not value:
882 continue
883 if prop.lower() in self.allowed_css_properties:
884 clean.append(prop + ': ' + value + ';')
885 elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
886 'padding']:
887 for keyword in value.split():
888 if keyword not in self.allowed_css_keywords and \
889 not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa
890 break
891 else:
892 clean.append(prop + ': ' + value + ';')
893 elif prop.lower() in self.allowed_svg_properties:
894 clean.append(prop + ': ' + value + ';')
895
896 return ' '.join(clean)