diff options
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/html5parser.py')
-rw-r--r-- | venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/html5parser.py | 2791 |
1 files changed, 2791 insertions, 0 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/html5parser.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/html5parser.py new file mode 100644 index 0000000..b185971 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/html5lib/html5parser.py | |||
@@ -0,0 +1,2791 @@ | |||
1 | from __future__ import absolute_import, division, unicode_literals | ||
2 | from pip._vendor.six import with_metaclass, viewkeys | ||
3 | |||
4 | import types | ||
5 | from collections import OrderedDict | ||
6 | |||
7 | from . import _inputstream | ||
8 | from . import _tokenizer | ||
9 | |||
10 | from . import treebuilders | ||
11 | from .treebuilders.base import Marker | ||
12 | |||
13 | from . import _utils | ||
14 | from .constants import ( | ||
15 | spaceCharacters, asciiUpper2Lower, | ||
16 | specialElements, headingElements, cdataElements, rcdataElements, | ||
17 | tokenTypes, tagTokenTypes, | ||
18 | namespaces, | ||
19 | htmlIntegrationPointElements, mathmlTextIntegrationPointElements, | ||
20 | adjustForeignAttributes as adjustForeignAttributesMap, | ||
21 | adjustMathMLAttributes, adjustSVGAttributes, | ||
22 | E, | ||
23 | _ReparseException | ||
24 | ) | ||
25 | |||
26 | |||
27 | def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): | ||
28 | """Parse an HTML document as a string or file-like object into a tree | ||
29 | |||
30 | :arg doc: the document to parse as a string or file-like object | ||
31 | |||
32 | :arg treebuilder: the treebuilder to use when parsing | ||
33 | |||
34 | :arg namespaceHTMLElements: whether or not to namespace HTML elements | ||
35 | |||
36 | :returns: parsed tree | ||
37 | |||
38 | Example: | ||
39 | |||
40 | >>> from html5lib.html5parser import parse | ||
41 | >>> parse('<html><body><p>This is a doc</p></body></html>') | ||
42 | <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0> | ||
43 | |||
44 | """ | ||
45 | tb = treebuilders.getTreeBuilder(treebuilder) | ||
46 | p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) | ||
47 | return p.parse(doc, **kwargs) | ||
48 | |||
49 | |||
50 | def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): | ||
51 | """Parse an HTML fragment as a string or file-like object into a tree | ||
52 | |||
53 | :arg doc: the fragment to parse as a string or file-like object | ||
54 | |||
55 | :arg container: the container context to parse the fragment in | ||
56 | |||
57 | :arg treebuilder: the treebuilder to use when parsing | ||
58 | |||
59 | :arg namespaceHTMLElements: whether or not to namespace HTML elements | ||
60 | |||
61 | :returns: parsed tree | ||
62 | |||
63 | Example: | ||
64 | |||
65 | >>> from html5lib.html5libparser import parseFragment | ||
66 | >>> parseFragment('<b>this is a fragment</b>') | ||
67 | <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090> | ||
68 | |||
69 | """ | ||
70 | tb = treebuilders.getTreeBuilder(treebuilder) | ||
71 | p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) | ||
72 | return p.parseFragment(doc, container=container, **kwargs) | ||
73 | |||
74 | |||
75 | def method_decorator_metaclass(function): | ||
76 | class Decorated(type): | ||
77 | def __new__(meta, classname, bases, classDict): | ||
78 | for attributeName, attribute in classDict.items(): | ||
79 | if isinstance(attribute, types.FunctionType): | ||
80 | attribute = function(attribute) | ||
81 | |||
82 | classDict[attributeName] = attribute | ||
83 | return type.__new__(meta, classname, bases, classDict) | ||
84 | return Decorated | ||
85 | |||
86 | |||
87 | class HTMLParser(object): | ||
88 | """HTML parser | ||
89 | |||
90 | Generates a tree structure from a stream of (possibly malformed) HTML. | ||
91 | |||
92 | """ | ||
93 | |||
94 | def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): | ||
95 | """ | ||
96 | :arg tree: a treebuilder class controlling the type of tree that will be | ||
97 | returned. Built in treebuilders can be accessed through | ||
98 | html5lib.treebuilders.getTreeBuilder(treeType) | ||
99 | |||
100 | :arg strict: raise an exception when a parse error is encountered | ||
101 | |||
102 | :arg namespaceHTMLElements: whether or not to namespace HTML elements | ||
103 | |||
104 | :arg debug: whether or not to enable debug mode which logs things | ||
105 | |||
106 | Example: | ||
107 | |||
108 | >>> from html5lib.html5parser import HTMLParser | ||
109 | >>> parser = HTMLParser() # generates parser with etree builder | ||
110 | >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict | ||
111 | |||
112 | """ | ||
113 | |||
114 | # Raise an exception on the first error encountered | ||
115 | self.strict = strict | ||
116 | |||
117 | if tree is None: | ||
118 | tree = treebuilders.getTreeBuilder("etree") | ||
119 | self.tree = tree(namespaceHTMLElements) | ||
120 | self.errors = [] | ||
121 | |||
122 | self.phases = dict([(name, cls(self, self.tree)) for name, cls in | ||
123 | getPhases(debug).items()]) | ||
124 | |||
125 | def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): | ||
126 | |||
127 | self.innerHTMLMode = innerHTML | ||
128 | self.container = container | ||
129 | self.scripting = scripting | ||
130 | self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) | ||
131 | self.reset() | ||
132 | |||
133 | try: | ||
134 | self.mainLoop() | ||
135 | except _ReparseException: | ||
136 | self.reset() | ||
137 | self.mainLoop() | ||
138 | |||
139 | def reset(self): | ||
140 | self.tree.reset() | ||
141 | self.firstStartTag = False | ||
142 | self.errors = [] | ||
143 | self.log = [] # only used with debug mode | ||
144 | # "quirks" / "limited quirks" / "no quirks" | ||
145 | self.compatMode = "no quirks" | ||
146 | |||
147 | if self.innerHTMLMode: | ||
148 | self.innerHTML = self.container.lower() | ||
149 | |||
150 | if self.innerHTML in cdataElements: | ||
151 | self.tokenizer.state = self.tokenizer.rcdataState | ||
152 | elif self.innerHTML in rcdataElements: | ||
153 | self.tokenizer.state = self.tokenizer.rawtextState | ||
154 | elif self.innerHTML == 'plaintext': | ||
155 | self.tokenizer.state = self.tokenizer.plaintextState | ||
156 | else: | ||
157 | # state already is data state | ||
158 | # self.tokenizer.state = self.tokenizer.dataState | ||
159 | pass | ||
160 | self.phase = self.phases["beforeHtml"] | ||
161 | self.phase.insertHtmlElement() | ||
162 | self.resetInsertionMode() | ||
163 | else: | ||
164 | self.innerHTML = False # pylint:disable=redefined-variable-type | ||
165 | self.phase = self.phases["initial"] | ||
166 | |||
167 | self.lastPhase = None | ||
168 | |||
169 | self.beforeRCDataPhase = None | ||
170 | |||
171 | self.framesetOK = True | ||
172 | |||
173 | @property | ||
174 | def documentEncoding(self): | ||
175 | """Name of the character encoding that was used to decode the input stream, or | ||
176 | :obj:`None` if that is not determined yet | ||
177 | |||
178 | """ | ||
179 | if not hasattr(self, 'tokenizer'): | ||
180 | return None | ||
181 | return self.tokenizer.stream.charEncoding[0].name | ||
182 | |||
183 | def isHTMLIntegrationPoint(self, element): | ||
184 | if (element.name == "annotation-xml" and | ||
185 | element.namespace == namespaces["mathml"]): | ||
186 | return ("encoding" in element.attributes and | ||
187 | element.attributes["encoding"].translate( | ||
188 | asciiUpper2Lower) in | ||
189 | ("text/html", "application/xhtml+xml")) | ||
190 | else: | ||
191 | return (element.namespace, element.name) in htmlIntegrationPointElements | ||
192 | |||
193 | def isMathMLTextIntegrationPoint(self, element): | ||
194 | return (element.namespace, element.name) in mathmlTextIntegrationPointElements | ||
195 | |||
196 | def mainLoop(self): | ||
197 | CharactersToken = tokenTypes["Characters"] | ||
198 | SpaceCharactersToken = tokenTypes["SpaceCharacters"] | ||
199 | StartTagToken = tokenTypes["StartTag"] | ||
200 | EndTagToken = tokenTypes["EndTag"] | ||
201 | CommentToken = tokenTypes["Comment"] | ||
202 | DoctypeToken = tokenTypes["Doctype"] | ||
203 | ParseErrorToken = tokenTypes["ParseError"] | ||
204 | |||
205 | for token in self.normalizedTokens(): | ||
206 | prev_token = None | ||
207 | new_token = token | ||
208 | while new_token is not None: | ||
209 | prev_token = new_token | ||
210 | currentNode = self.tree.openElements[-1] if self.tree.openElements else None | ||
211 | currentNodeNamespace = currentNode.namespace if currentNode else None | ||
212 | currentNodeName = currentNode.name if currentNode else None | ||
213 | |||
214 | type = new_token["type"] | ||
215 | |||
216 | if type == ParseErrorToken: | ||
217 | self.parseError(new_token["data"], new_token.get("datavars", {})) | ||
218 | new_token = None | ||
219 | else: | ||
220 | if (len(self.tree.openElements) == 0 or | ||
221 | currentNodeNamespace == self.tree.defaultNamespace or | ||
222 | (self.isMathMLTextIntegrationPoint(currentNode) and | ||
223 | ((type == StartTagToken and | ||
224 | token["name"] not in frozenset(["mglyph", "malignmark"])) or | ||
225 | type in (CharactersToken, SpaceCharactersToken))) or | ||
226 | (currentNodeNamespace == namespaces["mathml"] and | ||
227 | currentNodeName == "annotation-xml" and | ||
228 | type == StartTagToken and | ||
229 | token["name"] == "svg") or | ||
230 | (self.isHTMLIntegrationPoint(currentNode) and | ||
231 | type in (StartTagToken, CharactersToken, SpaceCharactersToken))): | ||
232 | phase = self.phase | ||
233 | else: | ||
234 | phase = self.phases["inForeignContent"] | ||
235 | |||
236 | if type == CharactersToken: | ||
237 | new_token = phase.processCharacters(new_token) | ||
238 | elif type == SpaceCharactersToken: | ||
239 | new_token = phase.processSpaceCharacters(new_token) | ||
240 | elif type == StartTagToken: | ||
241 | new_token = phase.processStartTag(new_token) | ||
242 | elif type == EndTagToken: | ||
243 | new_token = phase.processEndTag(new_token) | ||
244 | elif type == CommentToken: | ||
245 | new_token = phase.processComment(new_token) | ||
246 | elif type == DoctypeToken: | ||
247 | new_token = phase.processDoctype(new_token) | ||
248 | |||
249 | if (type == StartTagToken and prev_token["selfClosing"] and | ||
250 | not prev_token["selfClosingAcknowledged"]): | ||
251 | self.parseError("non-void-element-with-trailing-solidus", | ||
252 | {"name": prev_token["name"]}) | ||
253 | |||
254 | # When the loop finishes it's EOF | ||
255 | reprocess = True | ||
256 | phases = [] | ||
257 | while reprocess: | ||
258 | phases.append(self.phase) | ||
259 | reprocess = self.phase.processEOF() | ||
260 | if reprocess: | ||
261 | assert self.phase not in phases | ||
262 | |||
263 | def normalizedTokens(self): | ||
264 | for token in self.tokenizer: | ||
265 | yield self.normalizeToken(token) | ||
266 | |||
267 | def parse(self, stream, *args, **kwargs): | ||
268 | """Parse a HTML document into a well-formed tree | ||
269 | |||
270 | :arg stream: a file-like object or string containing the HTML to be parsed | ||
271 | |||
272 | The optional encoding parameter must be a string that indicates | ||
273 | the encoding. If specified, that encoding will be used, | ||
274 | regardless of any BOM or later declaration (such as in a meta | ||
275 | element). | ||
276 | |||
277 | :arg scripting: treat noscript elements as if JavaScript was turned on | ||
278 | |||
279 | :returns: parsed tree | ||
280 | |||
281 | Example: | ||
282 | |||
283 | >>> from html5lib.html5parser import HTMLParser | ||
284 | >>> parser = HTMLParser() | ||
285 | >>> parser.parse('<html><body><p>This is a doc</p></body></html>') | ||
286 | <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0> | ||
287 | |||
288 | """ | ||
289 | self._parse(stream, False, None, *args, **kwargs) | ||
290 | return self.tree.getDocument() | ||
291 | |||
292 | def parseFragment(self, stream, *args, **kwargs): | ||
293 | """Parse a HTML fragment into a well-formed tree fragment | ||
294 | |||
295 | :arg container: name of the element we're setting the innerHTML | ||
296 | property if set to None, default to 'div' | ||
297 | |||
298 | :arg stream: a file-like object or string containing the HTML to be parsed | ||
299 | |||
300 | The optional encoding parameter must be a string that indicates | ||
301 | the encoding. If specified, that encoding will be used, | ||
302 | regardless of any BOM or later declaration (such as in a meta | ||
303 | element) | ||
304 | |||
305 | :arg scripting: treat noscript elements as if JavaScript was turned on | ||
306 | |||
307 | :returns: parsed tree | ||
308 | |||
309 | Example: | ||
310 | |||
311 | >>> from html5lib.html5libparser import HTMLParser | ||
312 | >>> parser = HTMLParser() | ||
313 | >>> parser.parseFragment('<b>this is a fragment</b>') | ||
314 | <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090> | ||
315 | |||
316 | """ | ||
317 | self._parse(stream, True, *args, **kwargs) | ||
318 | return self.tree.getFragment() | ||
319 | |||
320 | def parseError(self, errorcode="XXX-undefined-error", datavars=None): | ||
321 | # XXX The idea is to make errorcode mandatory. | ||
322 | if datavars is None: | ||
323 | datavars = {} | ||
324 | self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) | ||
325 | if self.strict: | ||
326 | raise ParseError(E[errorcode] % datavars) | ||
327 | |||
328 | def normalizeToken(self, token): | ||
329 | # HTML5 specific normalizations to the token stream | ||
330 | if token["type"] == tokenTypes["StartTag"]: | ||
331 | raw = token["data"] | ||
332 | token["data"] = OrderedDict(raw) | ||
333 | if len(raw) > len(token["data"]): | ||
334 | # we had some duplicated attribute, fix so first wins | ||
335 | token["data"].update(raw[::-1]) | ||
336 | |||
337 | return token | ||
338 | |||
339 | def adjustMathMLAttributes(self, token): | ||
340 | adjust_attributes(token, adjustMathMLAttributes) | ||
341 | |||
342 | def adjustSVGAttributes(self, token): | ||
343 | adjust_attributes(token, adjustSVGAttributes) | ||
344 | |||
345 | def adjustForeignAttributes(self, token): | ||
346 | adjust_attributes(token, adjustForeignAttributesMap) | ||
347 | |||
348 | def reparseTokenNormal(self, token): | ||
349 | # pylint:disable=unused-argument | ||
350 | self.parser.phase() | ||
351 | |||
352 | def resetInsertionMode(self): | ||
353 | # The name of this method is mostly historical. (It's also used in the | ||
354 | # specification.) | ||
355 | last = False | ||
356 | newModes = { | ||
357 | "select": "inSelect", | ||
358 | "td": "inCell", | ||
359 | "th": "inCell", | ||
360 | "tr": "inRow", | ||
361 | "tbody": "inTableBody", | ||
362 | "thead": "inTableBody", | ||
363 | "tfoot": "inTableBody", | ||
364 | "caption": "inCaption", | ||
365 | "colgroup": "inColumnGroup", | ||
366 | "table": "inTable", | ||
367 | "head": "inBody", | ||
368 | "body": "inBody", | ||
369 | "frameset": "inFrameset", | ||
370 | "html": "beforeHead" | ||
371 | } | ||
372 | for node in self.tree.openElements[::-1]: | ||
373 | nodeName = node.name | ||
374 | new_phase = None | ||
375 | if node == self.tree.openElements[0]: | ||
376 | assert self.innerHTML | ||
377 | last = True | ||
378 | nodeName = self.innerHTML | ||
379 | # Check for conditions that should only happen in the innerHTML | ||
380 | # case | ||
381 | if nodeName in ("select", "colgroup", "head", "html"): | ||
382 | assert self.innerHTML | ||
383 | |||
384 | if not last and node.namespace != self.tree.defaultNamespace: | ||
385 | continue | ||
386 | |||
387 | if nodeName in newModes: | ||
388 | new_phase = self.phases[newModes[nodeName]] | ||
389 | break | ||
390 | elif last: | ||
391 | new_phase = self.phases["inBody"] | ||
392 | break | ||
393 | |||
394 | self.phase = new_phase | ||
395 | |||
396 | def parseRCDataRawtext(self, token, contentType): | ||
397 | # Generic RCDATA/RAWTEXT Parsing algorithm | ||
398 | assert contentType in ("RAWTEXT", "RCDATA") | ||
399 | |||
400 | self.tree.insertElement(token) | ||
401 | |||
402 | if contentType == "RAWTEXT": | ||
403 | self.tokenizer.state = self.tokenizer.rawtextState | ||
404 | else: | ||
405 | self.tokenizer.state = self.tokenizer.rcdataState | ||
406 | |||
407 | self.originalPhase = self.phase | ||
408 | |||
409 | self.phase = self.phases["text"] | ||
410 | |||
411 | |||
412 | @_utils.memoize | ||
413 | def getPhases(debug): | ||
414 | def log(function): | ||
415 | """Logger that records which phase processes each token""" | ||
416 | type_names = dict((value, key) for key, value in | ||
417 | tokenTypes.items()) | ||
418 | |||
419 | def wrapped(self, *args, **kwargs): | ||
420 | if function.__name__.startswith("process") and len(args) > 0: | ||
421 | token = args[0] | ||
422 | try: | ||
423 | info = {"type": type_names[token['type']]} | ||
424 | except: | ||
425 | raise | ||
426 | if token['type'] in tagTokenTypes: | ||
427 | info["name"] = token['name'] | ||
428 | |||
429 | self.parser.log.append((self.parser.tokenizer.state.__name__, | ||
430 | self.parser.phase.__class__.__name__, | ||
431 | self.__class__.__name__, | ||
432 | function.__name__, | ||
433 | info)) | ||
434 | return function(self, *args, **kwargs) | ||
435 | else: | ||
436 | return function(self, *args, **kwargs) | ||
437 | return wrapped | ||
438 | |||
439 | def getMetaclass(use_metaclass, metaclass_func): | ||
440 | if use_metaclass: | ||
441 | return method_decorator_metaclass(metaclass_func) | ||
442 | else: | ||
443 | return type | ||
444 | |||
445 | # pylint:disable=unused-argument | ||
446 | class Phase(with_metaclass(getMetaclass(debug, log))): | ||
447 | """Base class for helper object that implements each phase of processing | ||
448 | """ | ||
449 | |||
450 | def __init__(self, parser, tree): | ||
451 | self.parser = parser | ||
452 | self.tree = tree | ||
453 | |||
454 | def processEOF(self): | ||
455 | raise NotImplementedError | ||
456 | |||
457 | def processComment(self, token): | ||
458 | # For most phases the following is correct. Where it's not it will be | ||
459 | # overridden. | ||
460 | self.tree.insertComment(token, self.tree.openElements[-1]) | ||
461 | |||
462 | def processDoctype(self, token): | ||
463 | self.parser.parseError("unexpected-doctype") | ||
464 | |||
465 | def processCharacters(self, token): | ||
466 | self.tree.insertText(token["data"]) | ||
467 | |||
468 | def processSpaceCharacters(self, token): | ||
469 | self.tree.insertText(token["data"]) | ||
470 | |||
471 | def processStartTag(self, token): | ||
472 | return self.startTagHandler[token["name"]](token) | ||
473 | |||
474 | def startTagHtml(self, token): | ||
475 | if not self.parser.firstStartTag and token["name"] == "html": | ||
476 | self.parser.parseError("non-html-root") | ||
477 | # XXX Need a check here to see if the first start tag token emitted is | ||
478 | # this token... If it's not, invoke self.parser.parseError(). | ||
479 | for attr, value in token["data"].items(): | ||
480 | if attr not in self.tree.openElements[0].attributes: | ||
481 | self.tree.openElements[0].attributes[attr] = value | ||
482 | self.parser.firstStartTag = False | ||
483 | |||
484 | def processEndTag(self, token): | ||
485 | return self.endTagHandler[token["name"]](token) | ||
486 | |||
487 | class InitialPhase(Phase): | ||
488 | def processSpaceCharacters(self, token): | ||
489 | pass | ||
490 | |||
491 | def processComment(self, token): | ||
492 | self.tree.insertComment(token, self.tree.document) | ||
493 | |||
494 | def processDoctype(self, token): | ||
495 | name = token["name"] | ||
496 | publicId = token["publicId"] | ||
497 | systemId = token["systemId"] | ||
498 | correct = token["correct"] | ||
499 | |||
500 | if (name != "html" or publicId is not None or | ||
501 | systemId is not None and systemId != "about:legacy-compat"): | ||
502 | self.parser.parseError("unknown-doctype") | ||
503 | |||
504 | if publicId is None: | ||
505 | publicId = "" | ||
506 | |||
507 | self.tree.insertDoctype(token) | ||
508 | |||
509 | if publicId != "": | ||
510 | publicId = publicId.translate(asciiUpper2Lower) | ||
511 | |||
512 | if (not correct or token["name"] != "html" or | ||
513 | publicId.startswith( | ||
514 | ("+//silmaril//dtd html pro v0r11 19970101//", | ||
515 | "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", | ||
516 | "-//as//dtd html 3.0 aswedit + extensions//", | ||
517 | "-//ietf//dtd html 2.0 level 1//", | ||
518 | "-//ietf//dtd html 2.0 level 2//", | ||
519 | "-//ietf//dtd html 2.0 strict level 1//", | ||
520 | "-//ietf//dtd html 2.0 strict level 2//", | ||
521 | "-//ietf//dtd html 2.0 strict//", | ||
522 | "-//ietf//dtd html 2.0//", | ||
523 | "-//ietf//dtd html 2.1e//", | ||
524 | "-//ietf//dtd html 3.0//", | ||
525 | "-//ietf//dtd html 3.2 final//", | ||
526 | "-//ietf//dtd html 3.2//", | ||
527 | "-//ietf//dtd html 3//", | ||
528 | "-//ietf//dtd html level 0//", | ||
529 | "-//ietf//dtd html level 1//", | ||
530 | "-//ietf//dtd html level 2//", | ||
531 | "-//ietf//dtd html level 3//", | ||
532 | "-//ietf//dtd html strict level 0//", | ||
533 | "-//ietf//dtd html strict level 1//", | ||
534 | "-//ietf//dtd html strict level 2//", | ||
535 | "-//ietf//dtd html strict level 3//", | ||
536 | "-//ietf//dtd html strict//", | ||
537 | "-//ietf//dtd html//", | ||
538 | "-//metrius//dtd metrius presentational//", | ||
539 | "-//microsoft//dtd internet explorer 2.0 html strict//", | ||
540 | "-//microsoft//dtd internet explorer 2.0 html//", | ||
541 | "-//microsoft//dtd internet explorer 2.0 tables//", | ||
542 | "-//microsoft//dtd internet explorer 3.0 html strict//", | ||
543 | "-//microsoft//dtd internet explorer 3.0 html//", | ||
544 | "-//microsoft//dtd internet explorer 3.0 tables//", | ||
545 | "-//netscape comm. corp.//dtd html//", | ||
546 | "-//netscape comm. corp.//dtd strict html//", | ||
547 | "-//o'reilly and associates//dtd html 2.0//", | ||
548 | "-//o'reilly and associates//dtd html extended 1.0//", | ||
549 | "-//o'reilly and associates//dtd html extended relaxed 1.0//", | ||
550 | "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", | ||
551 | "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", | ||
552 | "-//spyglass//dtd html 2.0 extended//", | ||
553 | "-//sq//dtd html 2.0 hotmetal + extensions//", | ||
554 | "-//sun microsystems corp.//dtd hotjava html//", | ||
555 | "-//sun microsystems corp.//dtd hotjava strict html//", | ||
556 | "-//w3c//dtd html 3 1995-03-24//", | ||
557 | "-//w3c//dtd html 3.2 draft//", | ||
558 | "-//w3c//dtd html 3.2 final//", | ||
559 | "-//w3c//dtd html 3.2//", | ||
560 | "-//w3c//dtd html 3.2s draft//", | ||
561 | "-//w3c//dtd html 4.0 frameset//", | ||
562 | "-//w3c//dtd html 4.0 transitional//", | ||
563 | "-//w3c//dtd html experimental 19960712//", | ||
564 | "-//w3c//dtd html experimental 970421//", | ||
565 | "-//w3c//dtd w3 html//", | ||
566 | "-//w3o//dtd w3 html 3.0//", | ||
567 | "-//webtechs//dtd mozilla html 2.0//", | ||
568 | "-//webtechs//dtd mozilla html//")) or | ||
569 | publicId in ("-//w3o//dtd w3 html strict 3.0//en//", | ||
570 | "-/w3c/dtd html 4.0 transitional/en", | ||
571 | "html") or | ||
572 | publicId.startswith( | ||
573 | ("-//w3c//dtd html 4.01 frameset//", | ||
574 | "-//w3c//dtd html 4.01 transitional//")) and | ||
575 | systemId is None or | ||
576 | systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): | ||
577 | self.parser.compatMode = "quirks" | ||
578 | elif (publicId.startswith( | ||
579 | ("-//w3c//dtd xhtml 1.0 frameset//", | ||
580 | "-//w3c//dtd xhtml 1.0 transitional//")) or | ||
581 | publicId.startswith( | ||
582 | ("-//w3c//dtd html 4.01 frameset//", | ||
583 | "-//w3c//dtd html 4.01 transitional//")) and | ||
584 | systemId is not None): | ||
585 | self.parser.compatMode = "limited quirks" | ||
586 | |||
587 | self.parser.phase = self.parser.phases["beforeHtml"] | ||
588 | |||
589 | def anythingElse(self): | ||
590 | self.parser.compatMode = "quirks" | ||
591 | self.parser.phase = self.parser.phases["beforeHtml"] | ||
592 | |||
593 | def processCharacters(self, token): | ||
594 | self.parser.parseError("expected-doctype-but-got-chars") | ||
595 | self.anythingElse() | ||
596 | return token | ||
597 | |||
598 | def processStartTag(self, token): | ||
599 | self.parser.parseError("expected-doctype-but-got-start-tag", | ||
600 | {"name": token["name"]}) | ||
601 | self.anythingElse() | ||
602 | return token | ||
603 | |||
604 | def processEndTag(self, token): | ||
605 | self.parser.parseError("expected-doctype-but-got-end-tag", | ||
606 | {"name": token["name"]}) | ||
607 | self.anythingElse() | ||
608 | return token | ||
609 | |||
610 | def processEOF(self): | ||
611 | self.parser.parseError("expected-doctype-but-got-eof") | ||
612 | self.anythingElse() | ||
613 | return True | ||
614 | |||
615 | class BeforeHtmlPhase(Phase): | ||
616 | # helper methods | ||
617 | def insertHtmlElement(self): | ||
618 | self.tree.insertRoot(impliedTagToken("html", "StartTag")) | ||
619 | self.parser.phase = self.parser.phases["beforeHead"] | ||
620 | |||
621 | # other | ||
622 | def processEOF(self): | ||
623 | self.insertHtmlElement() | ||
624 | return True | ||
625 | |||
626 | def processComment(self, token): | ||
627 | self.tree.insertComment(token, self.tree.document) | ||
628 | |||
629 | def processSpaceCharacters(self, token): | ||
630 | pass | ||
631 | |||
632 | def processCharacters(self, token): | ||
633 | self.insertHtmlElement() | ||
634 | return token | ||
635 | |||
636 | def processStartTag(self, token): | ||
637 | if token["name"] == "html": | ||
638 | self.parser.firstStartTag = True | ||
639 | self.insertHtmlElement() | ||
640 | return token | ||
641 | |||
642 | def processEndTag(self, token): | ||
643 | if token["name"] not in ("head", "body", "html", "br"): | ||
644 | self.parser.parseError("unexpected-end-tag-before-html", | ||
645 | {"name": token["name"]}) | ||
646 | else: | ||
647 | self.insertHtmlElement() | ||
648 | return token | ||
649 | |||
650 | class BeforeHeadPhase(Phase): | ||
651 | def __init__(self, parser, tree): | ||
652 | Phase.__init__(self, parser, tree) | ||
653 | |||
654 | self.startTagHandler = _utils.MethodDispatcher([ | ||
655 | ("html", self.startTagHtml), | ||
656 | ("head", self.startTagHead) | ||
657 | ]) | ||
658 | self.startTagHandler.default = self.startTagOther | ||
659 | |||
660 | self.endTagHandler = _utils.MethodDispatcher([ | ||
661 | (("head", "body", "html", "br"), self.endTagImplyHead) | ||
662 | ]) | ||
663 | self.endTagHandler.default = self.endTagOther | ||
664 | |||
665 | def processEOF(self): | ||
666 | self.startTagHead(impliedTagToken("head", "StartTag")) | ||
667 | return True | ||
668 | |||
669 | def processSpaceCharacters(self, token): | ||
670 | pass | ||
671 | |||
672 | def processCharacters(self, token): | ||
673 | self.startTagHead(impliedTagToken("head", "StartTag")) | ||
674 | return token | ||
675 | |||
676 | def startTagHtml(self, token): | ||
677 | return self.parser.phases["inBody"].processStartTag(token) | ||
678 | |||
679 | def startTagHead(self, token): | ||
680 | self.tree.insertElement(token) | ||
681 | self.tree.headPointer = self.tree.openElements[-1] | ||
682 | self.parser.phase = self.parser.phases["inHead"] | ||
683 | |||
684 | def startTagOther(self, token): | ||
685 | self.startTagHead(impliedTagToken("head", "StartTag")) | ||
686 | return token | ||
687 | |||
688 | def endTagImplyHead(self, token): | ||
689 | self.startTagHead(impliedTagToken("head", "StartTag")) | ||
690 | return token | ||
691 | |||
692 | def endTagOther(self, token): | ||
693 | self.parser.parseError("end-tag-after-implied-root", | ||
694 | {"name": token["name"]}) | ||
695 | |||
696 | class InHeadPhase(Phase): | ||
697 | def __init__(self, parser, tree): | ||
698 | Phase.__init__(self, parser, tree) | ||
699 | |||
700 | self.startTagHandler = _utils.MethodDispatcher([ | ||
701 | ("html", self.startTagHtml), | ||
702 | ("title", self.startTagTitle), | ||
703 | (("noframes", "style"), self.startTagNoFramesStyle), | ||
704 | ("noscript", self.startTagNoscript), | ||
705 | ("script", self.startTagScript), | ||
706 | (("base", "basefont", "bgsound", "command", "link"), | ||
707 | self.startTagBaseLinkCommand), | ||
708 | ("meta", self.startTagMeta), | ||
709 | ("head", self.startTagHead) | ||
710 | ]) | ||
711 | self.startTagHandler.default = self.startTagOther | ||
712 | |||
713 | self.endTagHandler = _utils.MethodDispatcher([ | ||
714 | ("head", self.endTagHead), | ||
715 | (("br", "html", "body"), self.endTagHtmlBodyBr) | ||
716 | ]) | ||
717 | self.endTagHandler.default = self.endTagOther | ||
718 | |||
719 | # the real thing | ||
720 | def processEOF(self): | ||
721 | self.anythingElse() | ||
722 | return True | ||
723 | |||
724 | def processCharacters(self, token): | ||
725 | self.anythingElse() | ||
726 | return token | ||
727 | |||
728 | def startTagHtml(self, token): | ||
729 | return self.parser.phases["inBody"].processStartTag(token) | ||
730 | |||
731 | def startTagHead(self, token): | ||
732 | self.parser.parseError("two-heads-are-not-better-than-one") | ||
733 | |||
734 | def startTagBaseLinkCommand(self, token): | ||
735 | self.tree.insertElement(token) | ||
736 | self.tree.openElements.pop() | ||
737 | token["selfClosingAcknowledged"] = True | ||
738 | |||
739 | def startTagMeta(self, token): | ||
740 | self.tree.insertElement(token) | ||
741 | self.tree.openElements.pop() | ||
742 | token["selfClosingAcknowledged"] = True | ||
743 | |||
744 | attributes = token["data"] | ||
745 | if self.parser.tokenizer.stream.charEncoding[1] == "tentative": | ||
746 | if "charset" in attributes: | ||
747 | self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) | ||
748 | elif ("content" in attributes and | ||
749 | "http-equiv" in attributes and | ||
750 | attributes["http-equiv"].lower() == "content-type"): | ||
751 | # Encoding it as UTF-8 here is a hack, as really we should pass | ||
752 | # the abstract Unicode string, and just use the | ||
753 | # ContentAttrParser on that, but using UTF-8 allows all chars | ||
754 | # to be encoded and as a ASCII-superset works. | ||
755 | data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) | ||
756 | parser = _inputstream.ContentAttrParser(data) | ||
757 | codec = parser.parse() | ||
758 | self.parser.tokenizer.stream.changeEncoding(codec) | ||
759 | |||
760 | def startTagTitle(self, token): | ||
761 | self.parser.parseRCDataRawtext(token, "RCDATA") | ||
762 | |||
763 | def startTagNoFramesStyle(self, token): | ||
764 | # Need to decide whether to implement the scripting-disabled case | ||
765 | self.parser.parseRCDataRawtext(token, "RAWTEXT") | ||
766 | |||
767 | def startTagNoscript(self, token): | ||
768 | if self.parser.scripting: | ||
769 | self.parser.parseRCDataRawtext(token, "RAWTEXT") | ||
770 | else: | ||
771 | self.tree.insertElement(token) | ||
772 | self.parser.phase = self.parser.phases["inHeadNoscript"] | ||
773 | |||
774 | def startTagScript(self, token): | ||
775 | self.tree.insertElement(token) | ||
776 | self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState | ||
777 | self.parser.originalPhase = self.parser.phase | ||
778 | self.parser.phase = self.parser.phases["text"] | ||
779 | |||
780 | def startTagOther(self, token): | ||
781 | self.anythingElse() | ||
782 | return token | ||
783 | |||
784 | def endTagHead(self, token): | ||
785 | node = self.parser.tree.openElements.pop() | ||
786 | assert node.name == "head", "Expected head got %s" % node.name | ||
787 | self.parser.phase = self.parser.phases["afterHead"] | ||
788 | |||
789 | def endTagHtmlBodyBr(self, token): | ||
790 | self.anythingElse() | ||
791 | return token | ||
792 | |||
793 | def endTagOther(self, token): | ||
794 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
795 | |||
796 | def anythingElse(self): | ||
797 | self.endTagHead(impliedTagToken("head")) | ||
798 | |||
799 | class InHeadNoscriptPhase(Phase): | ||
800 | def __init__(self, parser, tree): | ||
801 | Phase.__init__(self, parser, tree) | ||
802 | |||
803 | self.startTagHandler = _utils.MethodDispatcher([ | ||
804 | ("html", self.startTagHtml), | ||
805 | (("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand), | ||
806 | (("head", "noscript"), self.startTagHeadNoscript), | ||
807 | ]) | ||
808 | self.startTagHandler.default = self.startTagOther | ||
809 | |||
810 | self.endTagHandler = _utils.MethodDispatcher([ | ||
811 | ("noscript", self.endTagNoscript), | ||
812 | ("br", self.endTagBr), | ||
813 | ]) | ||
814 | self.endTagHandler.default = self.endTagOther | ||
815 | |||
816 | def processEOF(self): | ||
817 | self.parser.parseError("eof-in-head-noscript") | ||
818 | self.anythingElse() | ||
819 | return True | ||
820 | |||
821 | def processComment(self, token): | ||
822 | return self.parser.phases["inHead"].processComment(token) | ||
823 | |||
824 | def processCharacters(self, token): | ||
825 | self.parser.parseError("char-in-head-noscript") | ||
826 | self.anythingElse() | ||
827 | return token | ||
828 | |||
829 | def processSpaceCharacters(self, token): | ||
830 | return self.parser.phases["inHead"].processSpaceCharacters(token) | ||
831 | |||
832 | def startTagHtml(self, token): | ||
833 | return self.parser.phases["inBody"].processStartTag(token) | ||
834 | |||
835 | def startTagBaseLinkCommand(self, token): | ||
836 | return self.parser.phases["inHead"].processStartTag(token) | ||
837 | |||
838 | def startTagHeadNoscript(self, token): | ||
839 | self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) | ||
840 | |||
841 | def startTagOther(self, token): | ||
842 | self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) | ||
843 | self.anythingElse() | ||
844 | return token | ||
845 | |||
846 | def endTagNoscript(self, token): | ||
847 | node = self.parser.tree.openElements.pop() | ||
848 | assert node.name == "noscript", "Expected noscript got %s" % node.name | ||
849 | self.parser.phase = self.parser.phases["inHead"] | ||
850 | |||
851 | def endTagBr(self, token): | ||
852 | self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) | ||
853 | self.anythingElse() | ||
854 | return token | ||
855 | |||
856 | def endTagOther(self, token): | ||
857 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
858 | |||
859 | def anythingElse(self): | ||
860 | # Caller must raise parse error first! | ||
861 | self.endTagNoscript(impliedTagToken("noscript")) | ||
862 | |||
863 | class AfterHeadPhase(Phase): | ||
864 | def __init__(self, parser, tree): | ||
865 | Phase.__init__(self, parser, tree) | ||
866 | |||
867 | self.startTagHandler = _utils.MethodDispatcher([ | ||
868 | ("html", self.startTagHtml), | ||
869 | ("body", self.startTagBody), | ||
870 | ("frameset", self.startTagFrameset), | ||
871 | (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", | ||
872 | "style", "title"), | ||
873 | self.startTagFromHead), | ||
874 | ("head", self.startTagHead) | ||
875 | ]) | ||
876 | self.startTagHandler.default = self.startTagOther | ||
877 | self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), | ||
878 | self.endTagHtmlBodyBr)]) | ||
879 | self.endTagHandler.default = self.endTagOther | ||
880 | |||
881 | def processEOF(self): | ||
882 | self.anythingElse() | ||
883 | return True | ||
884 | |||
885 | def processCharacters(self, token): | ||
886 | self.anythingElse() | ||
887 | return token | ||
888 | |||
889 | def startTagHtml(self, token): | ||
890 | return self.parser.phases["inBody"].processStartTag(token) | ||
891 | |||
892 | def startTagBody(self, token): | ||
893 | self.parser.framesetOK = False | ||
894 | self.tree.insertElement(token) | ||
895 | self.parser.phase = self.parser.phases["inBody"] | ||
896 | |||
897 | def startTagFrameset(self, token): | ||
898 | self.tree.insertElement(token) | ||
899 | self.parser.phase = self.parser.phases["inFrameset"] | ||
900 | |||
901 | def startTagFromHead(self, token): | ||
902 | self.parser.parseError("unexpected-start-tag-out-of-my-head", | ||
903 | {"name": token["name"]}) | ||
904 | self.tree.openElements.append(self.tree.headPointer) | ||
905 | self.parser.phases["inHead"].processStartTag(token) | ||
906 | for node in self.tree.openElements[::-1]: | ||
907 | if node.name == "head": | ||
908 | self.tree.openElements.remove(node) | ||
909 | break | ||
910 | |||
911 | def startTagHead(self, token): | ||
912 | self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) | ||
913 | |||
914 | def startTagOther(self, token): | ||
915 | self.anythingElse() | ||
916 | return token | ||
917 | |||
918 | def endTagHtmlBodyBr(self, token): | ||
919 | self.anythingElse() | ||
920 | return token | ||
921 | |||
922 | def endTagOther(self, token): | ||
923 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
924 | |||
925 | def anythingElse(self): | ||
926 | self.tree.insertElement(impliedTagToken("body", "StartTag")) | ||
927 | self.parser.phase = self.parser.phases["inBody"] | ||
928 | self.parser.framesetOK = True | ||
929 | |||
930 | class InBodyPhase(Phase): | ||
931 | # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody | ||
932 | # the really-really-really-very crazy mode | ||
933 | def __init__(self, parser, tree): | ||
934 | Phase.__init__(self, parser, tree) | ||
935 | |||
936 | # Set this to the default handler | ||
937 | self.processSpaceCharacters = self.processSpaceCharactersNonPre | ||
938 | |||
939 | self.startTagHandler = _utils.MethodDispatcher([ | ||
940 | ("html", self.startTagHtml), | ||
941 | (("base", "basefont", "bgsound", "command", "link", "meta", | ||
942 | "script", "style", "title"), | ||
943 | self.startTagProcessInHead), | ||
944 | ("body", self.startTagBody), | ||
945 | ("frameset", self.startTagFrameset), | ||
946 | (("address", "article", "aside", "blockquote", "center", "details", | ||
947 | "dir", "div", "dl", "fieldset", "figcaption", "figure", | ||
948 | "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p", | ||
949 | "section", "summary", "ul"), | ||
950 | self.startTagCloseP), | ||
951 | (headingElements, self.startTagHeading), | ||
952 | (("pre", "listing"), self.startTagPreListing), | ||
953 | ("form", self.startTagForm), | ||
954 | (("li", "dd", "dt"), self.startTagListItem), | ||
955 | ("plaintext", self.startTagPlaintext), | ||
956 | ("a", self.startTagA), | ||
957 | (("b", "big", "code", "em", "font", "i", "s", "small", "strike", | ||
958 | "strong", "tt", "u"), self.startTagFormatting), | ||
959 | ("nobr", self.startTagNobr), | ||
960 | ("button", self.startTagButton), | ||
961 | (("applet", "marquee", "object"), self.startTagAppletMarqueeObject), | ||
962 | ("xmp", self.startTagXmp), | ||
963 | ("table", self.startTagTable), | ||
964 | (("area", "br", "embed", "img", "keygen", "wbr"), | ||
965 | self.startTagVoidFormatting), | ||
966 | (("param", "source", "track"), self.startTagParamSource), | ||
967 | ("input", self.startTagInput), | ||
968 | ("hr", self.startTagHr), | ||
969 | ("image", self.startTagImage), | ||
970 | ("isindex", self.startTagIsIndex), | ||
971 | ("textarea", self.startTagTextarea), | ||
972 | ("iframe", self.startTagIFrame), | ||
973 | ("noscript", self.startTagNoscript), | ||
974 | (("noembed", "noframes"), self.startTagRawtext), | ||
975 | ("select", self.startTagSelect), | ||
976 | (("rp", "rt"), self.startTagRpRt), | ||
977 | (("option", "optgroup"), self.startTagOpt), | ||
978 | (("math"), self.startTagMath), | ||
979 | (("svg"), self.startTagSvg), | ||
980 | (("caption", "col", "colgroup", "frame", "head", | ||
981 | "tbody", "td", "tfoot", "th", "thead", | ||
982 | "tr"), self.startTagMisplaced) | ||
983 | ]) | ||
984 | self.startTagHandler.default = self.startTagOther | ||
985 | |||
986 | self.endTagHandler = _utils.MethodDispatcher([ | ||
987 | ("body", self.endTagBody), | ||
988 | ("html", self.endTagHtml), | ||
989 | (("address", "article", "aside", "blockquote", "button", "center", | ||
990 | "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", | ||
991 | "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre", | ||
992 | "section", "summary", "ul"), self.endTagBlock), | ||
993 | ("form", self.endTagForm), | ||
994 | ("p", self.endTagP), | ||
995 | (("dd", "dt", "li"), self.endTagListItem), | ||
996 | (headingElements, self.endTagHeading), | ||
997 | (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", | ||
998 | "strike", "strong", "tt", "u"), self.endTagFormatting), | ||
999 | (("applet", "marquee", "object"), self.endTagAppletMarqueeObject), | ||
1000 | ("br", self.endTagBr), | ||
1001 | ]) | ||
1002 | self.endTagHandler.default = self.endTagOther | ||
1003 | |||
1004 | def isMatchingFormattingElement(self, node1, node2): | ||
1005 | return (node1.name == node2.name and | ||
1006 | node1.namespace == node2.namespace and | ||
1007 | node1.attributes == node2.attributes) | ||
1008 | |||
1009 | # helper | ||
1010 | def addFormattingElement(self, token): | ||
1011 | self.tree.insertElement(token) | ||
1012 | element = self.tree.openElements[-1] | ||
1013 | |||
1014 | matchingElements = [] | ||
1015 | for node in self.tree.activeFormattingElements[::-1]: | ||
1016 | if node is Marker: | ||
1017 | break | ||
1018 | elif self.isMatchingFormattingElement(node, element): | ||
1019 | matchingElements.append(node) | ||
1020 | |||
1021 | assert len(matchingElements) <= 3 | ||
1022 | if len(matchingElements) == 3: | ||
1023 | self.tree.activeFormattingElements.remove(matchingElements[-1]) | ||
1024 | self.tree.activeFormattingElements.append(element) | ||
1025 | |||
1026 | # the real deal | ||
1027 | def processEOF(self): | ||
1028 | allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td", | ||
1029 | "tfoot", "th", "thead", "tr", "body", | ||
1030 | "html")) | ||
1031 | for node in self.tree.openElements[::-1]: | ||
1032 | if node.name not in allowed_elements: | ||
1033 | self.parser.parseError("expected-closing-tag-but-got-eof") | ||
1034 | break | ||
1035 | # Stop parsing | ||
1036 | |||
1037 | def processSpaceCharactersDropNewline(self, token): | ||
1038 | # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we | ||
1039 | # want to drop leading newlines | ||
1040 | data = token["data"] | ||
1041 | self.processSpaceCharacters = self.processSpaceCharactersNonPre | ||
1042 | if (data.startswith("\n") and | ||
1043 | self.tree.openElements[-1].name in ("pre", "listing", "textarea") and | ||
1044 | not self.tree.openElements[-1].hasContent()): | ||
1045 | data = data[1:] | ||
1046 | if data: | ||
1047 | self.tree.reconstructActiveFormattingElements() | ||
1048 | self.tree.insertText(data) | ||
1049 | |||
1050 | def processCharacters(self, token): | ||
1051 | if token["data"] == "\u0000": | ||
1052 | # The tokenizer should always emit null on its own | ||
1053 | return | ||
1054 | self.tree.reconstructActiveFormattingElements() | ||
1055 | self.tree.insertText(token["data"]) | ||
1056 | # This must be bad for performance | ||
1057 | if (self.parser.framesetOK and | ||
1058 | any([char not in spaceCharacters | ||
1059 | for char in token["data"]])): | ||
1060 | self.parser.framesetOK = False | ||
1061 | |||
1062 | def processSpaceCharactersNonPre(self, token): | ||
1063 | self.tree.reconstructActiveFormattingElements() | ||
1064 | self.tree.insertText(token["data"]) | ||
1065 | |||
1066 | def startTagProcessInHead(self, token): | ||
1067 | return self.parser.phases["inHead"].processStartTag(token) | ||
1068 | |||
1069 | def startTagBody(self, token): | ||
1070 | self.parser.parseError("unexpected-start-tag", {"name": "body"}) | ||
1071 | if (len(self.tree.openElements) == 1 or | ||
1072 | self.tree.openElements[1].name != "body"): | ||
1073 | assert self.parser.innerHTML | ||
1074 | else: | ||
1075 | self.parser.framesetOK = False | ||
1076 | for attr, value in token["data"].items(): | ||
1077 | if attr not in self.tree.openElements[1].attributes: | ||
1078 | self.tree.openElements[1].attributes[attr] = value | ||
1079 | |||
1080 | def startTagFrameset(self, token): | ||
1081 | self.parser.parseError("unexpected-start-tag", {"name": "frameset"}) | ||
1082 | if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"): | ||
1083 | assert self.parser.innerHTML | ||
1084 | elif not self.parser.framesetOK: | ||
1085 | pass | ||
1086 | else: | ||
1087 | if self.tree.openElements[1].parent: | ||
1088 | self.tree.openElements[1].parent.removeChild(self.tree.openElements[1]) | ||
1089 | while self.tree.openElements[-1].name != "html": | ||
1090 | self.tree.openElements.pop() | ||
1091 | self.tree.insertElement(token) | ||
1092 | self.parser.phase = self.parser.phases["inFrameset"] | ||
1093 | |||
1094 | def startTagCloseP(self, token): | ||
1095 | if self.tree.elementInScope("p", variant="button"): | ||
1096 | self.endTagP(impliedTagToken("p")) | ||
1097 | self.tree.insertElement(token) | ||
1098 | |||
1099 | def startTagPreListing(self, token): | ||
1100 | if self.tree.elementInScope("p", variant="button"): | ||
1101 | self.endTagP(impliedTagToken("p")) | ||
1102 | self.tree.insertElement(token) | ||
1103 | self.parser.framesetOK = False | ||
1104 | self.processSpaceCharacters = self.processSpaceCharactersDropNewline | ||
1105 | |||
1106 | def startTagForm(self, token): | ||
1107 | if self.tree.formPointer: | ||
1108 | self.parser.parseError("unexpected-start-tag", {"name": "form"}) | ||
1109 | else: | ||
1110 | if self.tree.elementInScope("p", variant="button"): | ||
1111 | self.endTagP(impliedTagToken("p")) | ||
1112 | self.tree.insertElement(token) | ||
1113 | self.tree.formPointer = self.tree.openElements[-1] | ||
1114 | |||
1115 | def startTagListItem(self, token): | ||
1116 | self.parser.framesetOK = False | ||
1117 | |||
1118 | stopNamesMap = {"li": ["li"], | ||
1119 | "dt": ["dt", "dd"], | ||
1120 | "dd": ["dt", "dd"]} | ||
1121 | stopNames = stopNamesMap[token["name"]] | ||
1122 | for node in reversed(self.tree.openElements): | ||
1123 | if node.name in stopNames: | ||
1124 | self.parser.phase.processEndTag( | ||
1125 | impliedTagToken(node.name, "EndTag")) | ||
1126 | break | ||
1127 | if (node.nameTuple in specialElements and | ||
1128 | node.name not in ("address", "div", "p")): | ||
1129 | break | ||
1130 | |||
1131 | if self.tree.elementInScope("p", variant="button"): | ||
1132 | self.parser.phase.processEndTag( | ||
1133 | impliedTagToken("p", "EndTag")) | ||
1134 | |||
1135 | self.tree.insertElement(token) | ||
1136 | |||
1137 | def startTagPlaintext(self, token): | ||
1138 | if self.tree.elementInScope("p", variant="button"): | ||
1139 | self.endTagP(impliedTagToken("p")) | ||
1140 | self.tree.insertElement(token) | ||
1141 | self.parser.tokenizer.state = self.parser.tokenizer.plaintextState | ||
1142 | |||
1143 | def startTagHeading(self, token): | ||
1144 | if self.tree.elementInScope("p", variant="button"): | ||
1145 | self.endTagP(impliedTagToken("p")) | ||
1146 | if self.tree.openElements[-1].name in headingElements: | ||
1147 | self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) | ||
1148 | self.tree.openElements.pop() | ||
1149 | self.tree.insertElement(token) | ||
1150 | |||
1151 | def startTagA(self, token): | ||
1152 | afeAElement = self.tree.elementInActiveFormattingElements("a") | ||
1153 | if afeAElement: | ||
1154 | self.parser.parseError("unexpected-start-tag-implies-end-tag", | ||
1155 | {"startName": "a", "endName": "a"}) | ||
1156 | self.endTagFormatting(impliedTagToken("a")) | ||
1157 | if afeAElement in self.tree.openElements: | ||
1158 | self.tree.openElements.remove(afeAElement) | ||
1159 | if afeAElement in self.tree.activeFormattingElements: | ||
1160 | self.tree.activeFormattingElements.remove(afeAElement) | ||
1161 | self.tree.reconstructActiveFormattingElements() | ||
1162 | self.addFormattingElement(token) | ||
1163 | |||
1164 | def startTagFormatting(self, token): | ||
1165 | self.tree.reconstructActiveFormattingElements() | ||
1166 | self.addFormattingElement(token) | ||
1167 | |||
1168 | def startTagNobr(self, token): | ||
1169 | self.tree.reconstructActiveFormattingElements() | ||
1170 | if self.tree.elementInScope("nobr"): | ||
1171 | self.parser.parseError("unexpected-start-tag-implies-end-tag", | ||
1172 | {"startName": "nobr", "endName": "nobr"}) | ||
1173 | self.processEndTag(impliedTagToken("nobr")) | ||
1174 | # XXX Need tests that trigger the following | ||
1175 | self.tree.reconstructActiveFormattingElements() | ||
1176 | self.addFormattingElement(token) | ||
1177 | |||
1178 | def startTagButton(self, token): | ||
1179 | if self.tree.elementInScope("button"): | ||
1180 | self.parser.parseError("unexpected-start-tag-implies-end-tag", | ||
1181 | {"startName": "button", "endName": "button"}) | ||
1182 | self.processEndTag(impliedTagToken("button")) | ||
1183 | return token | ||
1184 | else: | ||
1185 | self.tree.reconstructActiveFormattingElements() | ||
1186 | self.tree.insertElement(token) | ||
1187 | self.parser.framesetOK = False | ||
1188 | |||
1189 | def startTagAppletMarqueeObject(self, token): | ||
1190 | self.tree.reconstructActiveFormattingElements() | ||
1191 | self.tree.insertElement(token) | ||
1192 | self.tree.activeFormattingElements.append(Marker) | ||
1193 | self.parser.framesetOK = False | ||
1194 | |||
1195 | def startTagXmp(self, token): | ||
1196 | if self.tree.elementInScope("p", variant="button"): | ||
1197 | self.endTagP(impliedTagToken("p")) | ||
1198 | self.tree.reconstructActiveFormattingElements() | ||
1199 | self.parser.framesetOK = False | ||
1200 | self.parser.parseRCDataRawtext(token, "RAWTEXT") | ||
1201 | |||
1202 | def startTagTable(self, token): | ||
1203 | if self.parser.compatMode != "quirks": | ||
1204 | if self.tree.elementInScope("p", variant="button"): | ||
1205 | self.processEndTag(impliedTagToken("p")) | ||
1206 | self.tree.insertElement(token) | ||
1207 | self.parser.framesetOK = False | ||
1208 | self.parser.phase = self.parser.phases["inTable"] | ||
1209 | |||
1210 | def startTagVoidFormatting(self, token): | ||
1211 | self.tree.reconstructActiveFormattingElements() | ||
1212 | self.tree.insertElement(token) | ||
1213 | self.tree.openElements.pop() | ||
1214 | token["selfClosingAcknowledged"] = True | ||
1215 | self.parser.framesetOK = False | ||
1216 | |||
1217 | def startTagInput(self, token): | ||
1218 | framesetOK = self.parser.framesetOK | ||
1219 | self.startTagVoidFormatting(token) | ||
1220 | if ("type" in token["data"] and | ||
1221 | token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): | ||
1222 | # input type=hidden doesn't change framesetOK | ||
1223 | self.parser.framesetOK = framesetOK | ||
1224 | |||
1225 | def startTagParamSource(self, token): | ||
1226 | self.tree.insertElement(token) | ||
1227 | self.tree.openElements.pop() | ||
1228 | token["selfClosingAcknowledged"] = True | ||
1229 | |||
1230 | def startTagHr(self, token): | ||
1231 | if self.tree.elementInScope("p", variant="button"): | ||
1232 | self.endTagP(impliedTagToken("p")) | ||
1233 | self.tree.insertElement(token) | ||
1234 | self.tree.openElements.pop() | ||
1235 | token["selfClosingAcknowledged"] = True | ||
1236 | self.parser.framesetOK = False | ||
1237 | |||
1238 | def startTagImage(self, token): | ||
1239 | # No really... | ||
1240 | self.parser.parseError("unexpected-start-tag-treated-as", | ||
1241 | {"originalName": "image", "newName": "img"}) | ||
1242 | self.processStartTag(impliedTagToken("img", "StartTag", | ||
1243 | attributes=token["data"], | ||
1244 | selfClosing=token["selfClosing"])) | ||
1245 | |||
1246 | def startTagIsIndex(self, token): | ||
1247 | self.parser.parseError("deprecated-tag", {"name": "isindex"}) | ||
1248 | if self.tree.formPointer: | ||
1249 | return | ||
1250 | form_attrs = {} | ||
1251 | if "action" in token["data"]: | ||
1252 | form_attrs["action"] = token["data"]["action"] | ||
1253 | self.processStartTag(impliedTagToken("form", "StartTag", | ||
1254 | attributes=form_attrs)) | ||
1255 | self.processStartTag(impliedTagToken("hr", "StartTag")) | ||
1256 | self.processStartTag(impliedTagToken("label", "StartTag")) | ||
1257 | # XXX Localization ... | ||
1258 | if "prompt" in token["data"]: | ||
1259 | prompt = token["data"]["prompt"] | ||
1260 | else: | ||
1261 | prompt = "This is a searchable index. Enter search keywords: " | ||
1262 | self.processCharacters( | ||
1263 | {"type": tokenTypes["Characters"], "data": prompt}) | ||
1264 | attributes = token["data"].copy() | ||
1265 | if "action" in attributes: | ||
1266 | del attributes["action"] | ||
1267 | if "prompt" in attributes: | ||
1268 | del attributes["prompt"] | ||
1269 | attributes["name"] = "isindex" | ||
1270 | self.processStartTag(impliedTagToken("input", "StartTag", | ||
1271 | attributes=attributes, | ||
1272 | selfClosing=token["selfClosing"])) | ||
1273 | self.processEndTag(impliedTagToken("label")) | ||
1274 | self.processStartTag(impliedTagToken("hr", "StartTag")) | ||
1275 | self.processEndTag(impliedTagToken("form")) | ||
1276 | |||
1277 | def startTagTextarea(self, token): | ||
1278 | self.tree.insertElement(token) | ||
1279 | self.parser.tokenizer.state = self.parser.tokenizer.rcdataState | ||
1280 | self.processSpaceCharacters = self.processSpaceCharactersDropNewline | ||
1281 | self.parser.framesetOK = False | ||
1282 | |||
1283 | def startTagIFrame(self, token): | ||
1284 | self.parser.framesetOK = False | ||
1285 | self.startTagRawtext(token) | ||
1286 | |||
1287 | def startTagNoscript(self, token): | ||
1288 | if self.parser.scripting: | ||
1289 | self.startTagRawtext(token) | ||
1290 | else: | ||
1291 | self.startTagOther(token) | ||
1292 | |||
1293 | def startTagRawtext(self, token): | ||
1294 | """iframe, noembed noframes, noscript(if scripting enabled)""" | ||
1295 | self.parser.parseRCDataRawtext(token, "RAWTEXT") | ||
1296 | |||
1297 | def startTagOpt(self, token): | ||
1298 | if self.tree.openElements[-1].name == "option": | ||
1299 | self.parser.phase.processEndTag(impliedTagToken("option")) | ||
1300 | self.tree.reconstructActiveFormattingElements() | ||
1301 | self.parser.tree.insertElement(token) | ||
1302 | |||
1303 | def startTagSelect(self, token): | ||
1304 | self.tree.reconstructActiveFormattingElements() | ||
1305 | self.tree.insertElement(token) | ||
1306 | self.parser.framesetOK = False | ||
1307 | if self.parser.phase in (self.parser.phases["inTable"], | ||
1308 | self.parser.phases["inCaption"], | ||
1309 | self.parser.phases["inColumnGroup"], | ||
1310 | self.parser.phases["inTableBody"], | ||
1311 | self.parser.phases["inRow"], | ||
1312 | self.parser.phases["inCell"]): | ||
1313 | self.parser.phase = self.parser.phases["inSelectInTable"] | ||
1314 | else: | ||
1315 | self.parser.phase = self.parser.phases["inSelect"] | ||
1316 | |||
1317 | def startTagRpRt(self, token): | ||
1318 | if self.tree.elementInScope("ruby"): | ||
1319 | self.tree.generateImpliedEndTags() | ||
1320 | if self.tree.openElements[-1].name != "ruby": | ||
1321 | self.parser.parseError() | ||
1322 | self.tree.insertElement(token) | ||
1323 | |||
1324 | def startTagMath(self, token): | ||
1325 | self.tree.reconstructActiveFormattingElements() | ||
1326 | self.parser.adjustMathMLAttributes(token) | ||
1327 | self.parser.adjustForeignAttributes(token) | ||
1328 | token["namespace"] = namespaces["mathml"] | ||
1329 | self.tree.insertElement(token) | ||
1330 | # Need to get the parse error right for the case where the token | ||
1331 | # has a namespace not equal to the xmlns attribute | ||
1332 | if token["selfClosing"]: | ||
1333 | self.tree.openElements.pop() | ||
1334 | token["selfClosingAcknowledged"] = True | ||
1335 | |||
1336 | def startTagSvg(self, token): | ||
1337 | self.tree.reconstructActiveFormattingElements() | ||
1338 | self.parser.adjustSVGAttributes(token) | ||
1339 | self.parser.adjustForeignAttributes(token) | ||
1340 | token["namespace"] = namespaces["svg"] | ||
1341 | self.tree.insertElement(token) | ||
1342 | # Need to get the parse error right for the case where the token | ||
1343 | # has a namespace not equal to the xmlns attribute | ||
1344 | if token["selfClosing"]: | ||
1345 | self.tree.openElements.pop() | ||
1346 | token["selfClosingAcknowledged"] = True | ||
1347 | |||
1348 | def startTagMisplaced(self, token): | ||
1349 | """ Elements that should be children of other elements that have a | ||
1350 | different insertion mode; here they are ignored | ||
1351 | "caption", "col", "colgroup", "frame", "frameset", "head", | ||
1352 | "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", | ||
1353 | "tr", "noscript" | ||
1354 | """ | ||
1355 | self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]}) | ||
1356 | |||
1357 | def startTagOther(self, token): | ||
1358 | self.tree.reconstructActiveFormattingElements() | ||
1359 | self.tree.insertElement(token) | ||
1360 | |||
1361 | def endTagP(self, token): | ||
1362 | if not self.tree.elementInScope("p", variant="button"): | ||
1363 | self.startTagCloseP(impliedTagToken("p", "StartTag")) | ||
1364 | self.parser.parseError("unexpected-end-tag", {"name": "p"}) | ||
1365 | self.endTagP(impliedTagToken("p", "EndTag")) | ||
1366 | else: | ||
1367 | self.tree.generateImpliedEndTags("p") | ||
1368 | if self.tree.openElements[-1].name != "p": | ||
1369 | self.parser.parseError("unexpected-end-tag", {"name": "p"}) | ||
1370 | node = self.tree.openElements.pop() | ||
1371 | while node.name != "p": | ||
1372 | node = self.tree.openElements.pop() | ||
1373 | |||
1374 | def endTagBody(self, token): | ||
1375 | if not self.tree.elementInScope("body"): | ||
1376 | self.parser.parseError() | ||
1377 | return | ||
1378 | elif self.tree.openElements[-1].name != "body": | ||
1379 | for node in self.tree.openElements[2:]: | ||
1380 | if node.name not in frozenset(("dd", "dt", "li", "optgroup", | ||
1381 | "option", "p", "rp", "rt", | ||
1382 | "tbody", "td", "tfoot", | ||
1383 | "th", "thead", "tr", "body", | ||
1384 | "html")): | ||
1385 | # Not sure this is the correct name for the parse error | ||
1386 | self.parser.parseError( | ||
1387 | "expected-one-end-tag-but-got-another", | ||
1388 | {"gotName": "body", "expectedName": node.name}) | ||
1389 | break | ||
1390 | self.parser.phase = self.parser.phases["afterBody"] | ||
1391 | |||
1392 | def endTagHtml(self, token): | ||
1393 | # We repeat the test for the body end tag token being ignored here | ||
1394 | if self.tree.elementInScope("body"): | ||
1395 | self.endTagBody(impliedTagToken("body")) | ||
1396 | return token | ||
1397 | |||
1398 | def endTagBlock(self, token): | ||
1399 | # Put us back in the right whitespace handling mode | ||
1400 | if token["name"] == "pre": | ||
1401 | self.processSpaceCharacters = self.processSpaceCharactersNonPre | ||
1402 | inScope = self.tree.elementInScope(token["name"]) | ||
1403 | if inScope: | ||
1404 | self.tree.generateImpliedEndTags() | ||
1405 | if self.tree.openElements[-1].name != token["name"]: | ||
1406 | self.parser.parseError("end-tag-too-early", {"name": token["name"]}) | ||
1407 | if inScope: | ||
1408 | node = self.tree.openElements.pop() | ||
1409 | while node.name != token["name"]: | ||
1410 | node = self.tree.openElements.pop() | ||
1411 | |||
1412 | def endTagForm(self, token): | ||
1413 | node = self.tree.formPointer | ||
1414 | self.tree.formPointer = None | ||
1415 | if node is None or not self.tree.elementInScope(node): | ||
1416 | self.parser.parseError("unexpected-end-tag", | ||
1417 | {"name": "form"}) | ||
1418 | else: | ||
1419 | self.tree.generateImpliedEndTags() | ||
1420 | if self.tree.openElements[-1] != node: | ||
1421 | self.parser.parseError("end-tag-too-early-ignored", | ||
1422 | {"name": "form"}) | ||
1423 | self.tree.openElements.remove(node) | ||
1424 | |||
1425 | def endTagListItem(self, token): | ||
1426 | if token["name"] == "li": | ||
1427 | variant = "list" | ||
1428 | else: | ||
1429 | variant = None | ||
1430 | if not self.tree.elementInScope(token["name"], variant=variant): | ||
1431 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
1432 | else: | ||
1433 | self.tree.generateImpliedEndTags(exclude=token["name"]) | ||
1434 | if self.tree.openElements[-1].name != token["name"]: | ||
1435 | self.parser.parseError( | ||
1436 | "end-tag-too-early", | ||
1437 | {"name": token["name"]}) | ||
1438 | node = self.tree.openElements.pop() | ||
1439 | while node.name != token["name"]: | ||
1440 | node = self.tree.openElements.pop() | ||
1441 | |||
1442 | def endTagHeading(self, token): | ||
1443 | for item in headingElements: | ||
1444 | if self.tree.elementInScope(item): | ||
1445 | self.tree.generateImpliedEndTags() | ||
1446 | break | ||
1447 | if self.tree.openElements[-1].name != token["name"]: | ||
1448 | self.parser.parseError("end-tag-too-early", {"name": token["name"]}) | ||
1449 | |||
1450 | for item in headingElements: | ||
1451 | if self.tree.elementInScope(item): | ||
1452 | item = self.tree.openElements.pop() | ||
1453 | while item.name not in headingElements: | ||
1454 | item = self.tree.openElements.pop() | ||
1455 | break | ||
1456 | |||
1457 | def endTagFormatting(self, token): | ||
1458 | """The much-feared adoption agency algorithm""" | ||
1459 | # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867 | ||
1460 | # XXX Better parseError messages appreciated. | ||
1461 | |||
1462 | # Step 1 | ||
1463 | outerLoopCounter = 0 | ||
1464 | |||
1465 | # Step 2 | ||
1466 | while outerLoopCounter < 8: | ||
1467 | |||
1468 | # Step 3 | ||
1469 | outerLoopCounter += 1 | ||
1470 | |||
1471 | # Step 4: | ||
1472 | |||
1473 | # Let the formatting element be the last element in | ||
1474 | # the list of active formatting elements that: | ||
1475 | # - is between the end of the list and the last scope | ||
1476 | # marker in the list, if any, or the start of the list | ||
1477 | # otherwise, and | ||
1478 | # - has the same tag name as the token. | ||
1479 | formattingElement = self.tree.elementInActiveFormattingElements( | ||
1480 | token["name"]) | ||
1481 | if (not formattingElement or | ||
1482 | (formattingElement in self.tree.openElements and | ||
1483 | not self.tree.elementInScope(formattingElement.name))): | ||
1484 | # If there is no such node, then abort these steps | ||
1485 | # and instead act as described in the "any other | ||
1486 | # end tag" entry below. | ||
1487 | self.endTagOther(token) | ||
1488 | return | ||
1489 | |||
1490 | # Otherwise, if there is such a node, but that node is | ||
1491 | # not in the stack of open elements, then this is a | ||
1492 | # parse error; remove the element from the list, and | ||
1493 | # abort these steps. | ||
1494 | elif formattingElement not in self.tree.openElements: | ||
1495 | self.parser.parseError("adoption-agency-1.2", {"name": token["name"]}) | ||
1496 | self.tree.activeFormattingElements.remove(formattingElement) | ||
1497 | return | ||
1498 | |||
1499 | # Otherwise, if there is such a node, and that node is | ||
1500 | # also in the stack of open elements, but the element | ||
1501 | # is not in scope, then this is a parse error; ignore | ||
1502 | # the token, and abort these steps. | ||
1503 | elif not self.tree.elementInScope(formattingElement.name): | ||
1504 | self.parser.parseError("adoption-agency-4.4", {"name": token["name"]}) | ||
1505 | return | ||
1506 | |||
1507 | # Otherwise, there is a formatting element and that | ||
1508 | # element is in the stack and is in scope. If the | ||
1509 | # element is not the current node, this is a parse | ||
1510 | # error. In any case, proceed with the algorithm as | ||
1511 | # written in the following steps. | ||
1512 | else: | ||
1513 | if formattingElement != self.tree.openElements[-1]: | ||
1514 | self.parser.parseError("adoption-agency-1.3", {"name": token["name"]}) | ||
1515 | |||
1516 | # Step 5: | ||
1517 | |||
1518 | # Let the furthest block be the topmost node in the | ||
1519 | # stack of open elements that is lower in the stack | ||
1520 | # than the formatting element, and is an element in | ||
1521 | # the special category. There might not be one. | ||
1522 | afeIndex = self.tree.openElements.index(formattingElement) | ||
1523 | furthestBlock = None | ||
1524 | for element in self.tree.openElements[afeIndex:]: | ||
1525 | if element.nameTuple in specialElements: | ||
1526 | furthestBlock = element | ||
1527 | break | ||
1528 | |||
1529 | # Step 6: | ||
1530 | |||
1531 | # If there is no furthest block, then the UA must | ||
1532 | # first pop all the nodes from the bottom of the stack | ||
1533 | # of open elements, from the current node up to and | ||
1534 | # including the formatting element, then remove the | ||
1535 | # formatting element from the list of active | ||
1536 | # formatting elements, and finally abort these steps. | ||
1537 | if furthestBlock is None: | ||
1538 | element = self.tree.openElements.pop() | ||
1539 | while element != formattingElement: | ||
1540 | element = self.tree.openElements.pop() | ||
1541 | self.tree.activeFormattingElements.remove(element) | ||
1542 | return | ||
1543 | |||
1544 | # Step 7 | ||
1545 | commonAncestor = self.tree.openElements[afeIndex - 1] | ||
1546 | |||
1547 | # Step 8: | ||
1548 | # The bookmark is supposed to help us identify where to reinsert | ||
1549 | # nodes in step 15. We have to ensure that we reinsert nodes after | ||
1550 | # the node before the active formatting element. Note the bookmark | ||
1551 | # can move in step 9.7 | ||
1552 | bookmark = self.tree.activeFormattingElements.index(formattingElement) | ||
1553 | |||
1554 | # Step 9 | ||
1555 | lastNode = node = furthestBlock | ||
1556 | innerLoopCounter = 0 | ||
1557 | |||
1558 | index = self.tree.openElements.index(node) | ||
1559 | while innerLoopCounter < 3: | ||
1560 | innerLoopCounter += 1 | ||
1561 | # Node is element before node in open elements | ||
1562 | index -= 1 | ||
1563 | node = self.tree.openElements[index] | ||
1564 | if node not in self.tree.activeFormattingElements: | ||
1565 | self.tree.openElements.remove(node) | ||
1566 | continue | ||
1567 | # Step 9.6 | ||
1568 | if node == formattingElement: | ||
1569 | break | ||
1570 | # Step 9.7 | ||
1571 | if lastNode == furthestBlock: | ||
1572 | bookmark = self.tree.activeFormattingElements.index(node) + 1 | ||
1573 | # Step 9.8 | ||
1574 | clone = node.cloneNode() | ||
1575 | # Replace node with clone | ||
1576 | self.tree.activeFormattingElements[ | ||
1577 | self.tree.activeFormattingElements.index(node)] = clone | ||
1578 | self.tree.openElements[ | ||
1579 | self.tree.openElements.index(node)] = clone | ||
1580 | node = clone | ||
1581 | # Step 9.9 | ||
1582 | # Remove lastNode from its parents, if any | ||
1583 | if lastNode.parent: | ||
1584 | lastNode.parent.removeChild(lastNode) | ||
1585 | node.appendChild(lastNode) | ||
1586 | # Step 9.10 | ||
1587 | lastNode = node | ||
1588 | |||
1589 | # Step 10 | ||
1590 | # Foster parent lastNode if commonAncestor is a | ||
1591 | # table, tbody, tfoot, thead, or tr we need to foster | ||
1592 | # parent the lastNode | ||
1593 | if lastNode.parent: | ||
1594 | lastNode.parent.removeChild(lastNode) | ||
1595 | |||
1596 | if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")): | ||
1597 | parent, insertBefore = self.tree.getTableMisnestedNodePosition() | ||
1598 | parent.insertBefore(lastNode, insertBefore) | ||
1599 | else: | ||
1600 | commonAncestor.appendChild(lastNode) | ||
1601 | |||
1602 | # Step 11 | ||
1603 | clone = formattingElement.cloneNode() | ||
1604 | |||
1605 | # Step 12 | ||
1606 | furthestBlock.reparentChildren(clone) | ||
1607 | |||
1608 | # Step 13 | ||
1609 | furthestBlock.appendChild(clone) | ||
1610 | |||
1611 | # Step 14 | ||
1612 | self.tree.activeFormattingElements.remove(formattingElement) | ||
1613 | self.tree.activeFormattingElements.insert(bookmark, clone) | ||
1614 | |||
1615 | # Step 15 | ||
1616 | self.tree.openElements.remove(formattingElement) | ||
1617 | self.tree.openElements.insert( | ||
1618 | self.tree.openElements.index(furthestBlock) + 1, clone) | ||
1619 | |||
1620 | def endTagAppletMarqueeObject(self, token): | ||
1621 | if self.tree.elementInScope(token["name"]): | ||
1622 | self.tree.generateImpliedEndTags() | ||
1623 | if self.tree.openElements[-1].name != token["name"]: | ||
1624 | self.parser.parseError("end-tag-too-early", {"name": token["name"]}) | ||
1625 | |||
1626 | if self.tree.elementInScope(token["name"]): | ||
1627 | element = self.tree.openElements.pop() | ||
1628 | while element.name != token["name"]: | ||
1629 | element = self.tree.openElements.pop() | ||
1630 | self.tree.clearActiveFormattingElements() | ||
1631 | |||
1632 | def endTagBr(self, token): | ||
1633 | self.parser.parseError("unexpected-end-tag-treated-as", | ||
1634 | {"originalName": "br", "newName": "br element"}) | ||
1635 | self.tree.reconstructActiveFormattingElements() | ||
1636 | self.tree.insertElement(impliedTagToken("br", "StartTag")) | ||
1637 | self.tree.openElements.pop() | ||
1638 | |||
1639 | def endTagOther(self, token): | ||
1640 | for node in self.tree.openElements[::-1]: | ||
1641 | if node.name == token["name"]: | ||
1642 | self.tree.generateImpliedEndTags(exclude=token["name"]) | ||
1643 | if self.tree.openElements[-1].name != token["name"]: | ||
1644 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
1645 | while self.tree.openElements.pop() != node: | ||
1646 | pass | ||
1647 | break | ||
1648 | else: | ||
1649 | if node.nameTuple in specialElements: | ||
1650 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
1651 | break | ||
1652 | |||
1653 | class TextPhase(Phase): | ||
1654 | def __init__(self, parser, tree): | ||
1655 | Phase.__init__(self, parser, tree) | ||
1656 | self.startTagHandler = _utils.MethodDispatcher([]) | ||
1657 | self.startTagHandler.default = self.startTagOther | ||
1658 | self.endTagHandler = _utils.MethodDispatcher([ | ||
1659 | ("script", self.endTagScript)]) | ||
1660 | self.endTagHandler.default = self.endTagOther | ||
1661 | |||
1662 | def processCharacters(self, token): | ||
1663 | self.tree.insertText(token["data"]) | ||
1664 | |||
1665 | def processEOF(self): | ||
1666 | self.parser.parseError("expected-named-closing-tag-but-got-eof", | ||
1667 | {"name": self.tree.openElements[-1].name}) | ||
1668 | self.tree.openElements.pop() | ||
1669 | self.parser.phase = self.parser.originalPhase | ||
1670 | return True | ||
1671 | |||
1672 | def startTagOther(self, token): | ||
1673 | assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name'] | ||
1674 | |||
1675 | def endTagScript(self, token): | ||
1676 | node = self.tree.openElements.pop() | ||
1677 | assert node.name == "script" | ||
1678 | self.parser.phase = self.parser.originalPhase | ||
1679 | # The rest of this method is all stuff that only happens if | ||
1680 | # document.write works | ||
1681 | |||
1682 | def endTagOther(self, token): | ||
1683 | self.tree.openElements.pop() | ||
1684 | self.parser.phase = self.parser.originalPhase | ||
1685 | |||
1686 | class InTablePhase(Phase): | ||
1687 | # http://www.whatwg.org/specs/web-apps/current-work/#in-table | ||
1688 | def __init__(self, parser, tree): | ||
1689 | Phase.__init__(self, parser, tree) | ||
1690 | self.startTagHandler = _utils.MethodDispatcher([ | ||
1691 | ("html", self.startTagHtml), | ||
1692 | ("caption", self.startTagCaption), | ||
1693 | ("colgroup", self.startTagColgroup), | ||
1694 | ("col", self.startTagCol), | ||
1695 | (("tbody", "tfoot", "thead"), self.startTagRowGroup), | ||
1696 | (("td", "th", "tr"), self.startTagImplyTbody), | ||
1697 | ("table", self.startTagTable), | ||
1698 | (("style", "script"), self.startTagStyleScript), | ||
1699 | ("input", self.startTagInput), | ||
1700 | ("form", self.startTagForm) | ||
1701 | ]) | ||
1702 | self.startTagHandler.default = self.startTagOther | ||
1703 | |||
1704 | self.endTagHandler = _utils.MethodDispatcher([ | ||
1705 | ("table", self.endTagTable), | ||
1706 | (("body", "caption", "col", "colgroup", "html", "tbody", "td", | ||
1707 | "tfoot", "th", "thead", "tr"), self.endTagIgnore) | ||
1708 | ]) | ||
1709 | self.endTagHandler.default = self.endTagOther | ||
1710 | |||
1711 | # helper methods | ||
1712 | def clearStackToTableContext(self): | ||
1713 | # "clear the stack back to a table context" | ||
1714 | while self.tree.openElements[-1].name not in ("table", "html"): | ||
1715 | # self.parser.parseError("unexpected-implied-end-tag-in-table", | ||
1716 | # {"name": self.tree.openElements[-1].name}) | ||
1717 | self.tree.openElements.pop() | ||
1718 | # When the current node is <html> it's an innerHTML case | ||
1719 | |||
1720 | # processing methods | ||
1721 | def processEOF(self): | ||
1722 | if self.tree.openElements[-1].name != "html": | ||
1723 | self.parser.parseError("eof-in-table") | ||
1724 | else: | ||
1725 | assert self.parser.innerHTML | ||
1726 | # Stop parsing | ||
1727 | |||
1728 | def processSpaceCharacters(self, token): | ||
1729 | originalPhase = self.parser.phase | ||
1730 | self.parser.phase = self.parser.phases["inTableText"] | ||
1731 | self.parser.phase.originalPhase = originalPhase | ||
1732 | self.parser.phase.processSpaceCharacters(token) | ||
1733 | |||
1734 | def processCharacters(self, token): | ||
1735 | originalPhase = self.parser.phase | ||
1736 | self.parser.phase = self.parser.phases["inTableText"] | ||
1737 | self.parser.phase.originalPhase = originalPhase | ||
1738 | self.parser.phase.processCharacters(token) | ||
1739 | |||
1740 | def insertText(self, token): | ||
1741 | # If we get here there must be at least one non-whitespace character | ||
1742 | # Do the table magic! | ||
1743 | self.tree.insertFromTable = True | ||
1744 | self.parser.phases["inBody"].processCharacters(token) | ||
1745 | self.tree.insertFromTable = False | ||
1746 | |||
1747 | def startTagCaption(self, token): | ||
1748 | self.clearStackToTableContext() | ||
1749 | self.tree.activeFormattingElements.append(Marker) | ||
1750 | self.tree.insertElement(token) | ||
1751 | self.parser.phase = self.parser.phases["inCaption"] | ||
1752 | |||
1753 | def startTagColgroup(self, token): | ||
1754 | self.clearStackToTableContext() | ||
1755 | self.tree.insertElement(token) | ||
1756 | self.parser.phase = self.parser.phases["inColumnGroup"] | ||
1757 | |||
1758 | def startTagCol(self, token): | ||
1759 | self.startTagColgroup(impliedTagToken("colgroup", "StartTag")) | ||
1760 | return token | ||
1761 | |||
1762 | def startTagRowGroup(self, token): | ||
1763 | self.clearStackToTableContext() | ||
1764 | self.tree.insertElement(token) | ||
1765 | self.parser.phase = self.parser.phases["inTableBody"] | ||
1766 | |||
1767 | def startTagImplyTbody(self, token): | ||
1768 | self.startTagRowGroup(impliedTagToken("tbody", "StartTag")) | ||
1769 | return token | ||
1770 | |||
1771 | def startTagTable(self, token): | ||
1772 | self.parser.parseError("unexpected-start-tag-implies-end-tag", | ||
1773 | {"startName": "table", "endName": "table"}) | ||
1774 | self.parser.phase.processEndTag(impliedTagToken("table")) | ||
1775 | if not self.parser.innerHTML: | ||
1776 | return token | ||
1777 | |||
1778 | def startTagStyleScript(self, token): | ||
1779 | return self.parser.phases["inHead"].processStartTag(token) | ||
1780 | |||
1781 | def startTagInput(self, token): | ||
1782 | if ("type" in token["data"] and | ||
1783 | token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): | ||
1784 | self.parser.parseError("unexpected-hidden-input-in-table") | ||
1785 | self.tree.insertElement(token) | ||
1786 | # XXX associate with form | ||
1787 | self.tree.openElements.pop() | ||
1788 | else: | ||
1789 | self.startTagOther(token) | ||
1790 | |||
1791 | def startTagForm(self, token): | ||
1792 | self.parser.parseError("unexpected-form-in-table") | ||
1793 | if self.tree.formPointer is None: | ||
1794 | self.tree.insertElement(token) | ||
1795 | self.tree.formPointer = self.tree.openElements[-1] | ||
1796 | self.tree.openElements.pop() | ||
1797 | |||
1798 | def startTagOther(self, token): | ||
1799 | self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]}) | ||
1800 | # Do the table magic! | ||
1801 | self.tree.insertFromTable = True | ||
1802 | self.parser.phases["inBody"].processStartTag(token) | ||
1803 | self.tree.insertFromTable = False | ||
1804 | |||
1805 | def endTagTable(self, token): | ||
1806 | if self.tree.elementInScope("table", variant="table"): | ||
1807 | self.tree.generateImpliedEndTags() | ||
1808 | if self.tree.openElements[-1].name != "table": | ||
1809 | self.parser.parseError("end-tag-too-early-named", | ||
1810 | {"gotName": "table", | ||
1811 | "expectedName": self.tree.openElements[-1].name}) | ||
1812 | while self.tree.openElements[-1].name != "table": | ||
1813 | self.tree.openElements.pop() | ||
1814 | self.tree.openElements.pop() | ||
1815 | self.parser.resetInsertionMode() | ||
1816 | else: | ||
1817 | # innerHTML case | ||
1818 | assert self.parser.innerHTML | ||
1819 | self.parser.parseError() | ||
1820 | |||
1821 | def endTagIgnore(self, token): | ||
1822 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
1823 | |||
1824 | def endTagOther(self, token): | ||
1825 | self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]}) | ||
1826 | # Do the table magic! | ||
1827 | self.tree.insertFromTable = True | ||
1828 | self.parser.phases["inBody"].processEndTag(token) | ||
1829 | self.tree.insertFromTable = False | ||
1830 | |||
1831 | class InTableTextPhase(Phase): | ||
1832 | def __init__(self, parser, tree): | ||
1833 | Phase.__init__(self, parser, tree) | ||
1834 | self.originalPhase = None | ||
1835 | self.characterTokens = [] | ||
1836 | |||
1837 | def flushCharacters(self): | ||
1838 | data = "".join([item["data"] for item in self.characterTokens]) | ||
1839 | if any([item not in spaceCharacters for item in data]): | ||
1840 | token = {"type": tokenTypes["Characters"], "data": data} | ||
1841 | self.parser.phases["inTable"].insertText(token) | ||
1842 | elif data: | ||
1843 | self.tree.insertText(data) | ||
1844 | self.characterTokens = [] | ||
1845 | |||
1846 | def processComment(self, token): | ||
1847 | self.flushCharacters() | ||
1848 | self.parser.phase = self.originalPhase | ||
1849 | return token | ||
1850 | |||
1851 | def processEOF(self): | ||
1852 | self.flushCharacters() | ||
1853 | self.parser.phase = self.originalPhase | ||
1854 | return True | ||
1855 | |||
1856 | def processCharacters(self, token): | ||
1857 | if token["data"] == "\u0000": | ||
1858 | return | ||
1859 | self.characterTokens.append(token) | ||
1860 | |||
1861 | def processSpaceCharacters(self, token): | ||
1862 | # pretty sure we should never reach here | ||
1863 | self.characterTokens.append(token) | ||
1864 | # assert False | ||
1865 | |||
1866 | def processStartTag(self, token): | ||
1867 | self.flushCharacters() | ||
1868 | self.parser.phase = self.originalPhase | ||
1869 | return token | ||
1870 | |||
1871 | def processEndTag(self, token): | ||
1872 | self.flushCharacters() | ||
1873 | self.parser.phase = self.originalPhase | ||
1874 | return token | ||
1875 | |||
1876 | class InCaptionPhase(Phase): | ||
1877 | # http://www.whatwg.org/specs/web-apps/current-work/#in-caption | ||
1878 | def __init__(self, parser, tree): | ||
1879 | Phase.__init__(self, parser, tree) | ||
1880 | |||
1881 | self.startTagHandler = _utils.MethodDispatcher([ | ||
1882 | ("html", self.startTagHtml), | ||
1883 | (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", | ||
1884 | "thead", "tr"), self.startTagTableElement) | ||
1885 | ]) | ||
1886 | self.startTagHandler.default = self.startTagOther | ||
1887 | |||
1888 | self.endTagHandler = _utils.MethodDispatcher([ | ||
1889 | ("caption", self.endTagCaption), | ||
1890 | ("table", self.endTagTable), | ||
1891 | (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", | ||
1892 | "thead", "tr"), self.endTagIgnore) | ||
1893 | ]) | ||
1894 | self.endTagHandler.default = self.endTagOther | ||
1895 | |||
1896 | def ignoreEndTagCaption(self): | ||
1897 | return not self.tree.elementInScope("caption", variant="table") | ||
1898 | |||
1899 | def processEOF(self): | ||
1900 | self.parser.phases["inBody"].processEOF() | ||
1901 | |||
1902 | def processCharacters(self, token): | ||
1903 | return self.parser.phases["inBody"].processCharacters(token) | ||
1904 | |||
1905 | def startTagTableElement(self, token): | ||
1906 | self.parser.parseError() | ||
1907 | # XXX Have to duplicate logic here to find out if the tag is ignored | ||
1908 | ignoreEndTag = self.ignoreEndTagCaption() | ||
1909 | self.parser.phase.processEndTag(impliedTagToken("caption")) | ||
1910 | if not ignoreEndTag: | ||
1911 | return token | ||
1912 | |||
1913 | def startTagOther(self, token): | ||
1914 | return self.parser.phases["inBody"].processStartTag(token) | ||
1915 | |||
1916 | def endTagCaption(self, token): | ||
1917 | if not self.ignoreEndTagCaption(): | ||
1918 | # AT this code is quite similar to endTagTable in "InTable" | ||
1919 | self.tree.generateImpliedEndTags() | ||
1920 | if self.tree.openElements[-1].name != "caption": | ||
1921 | self.parser.parseError("expected-one-end-tag-but-got-another", | ||
1922 | {"gotName": "caption", | ||
1923 | "expectedName": self.tree.openElements[-1].name}) | ||
1924 | while self.tree.openElements[-1].name != "caption": | ||
1925 | self.tree.openElements.pop() | ||
1926 | self.tree.openElements.pop() | ||
1927 | self.tree.clearActiveFormattingElements() | ||
1928 | self.parser.phase = self.parser.phases["inTable"] | ||
1929 | else: | ||
1930 | # innerHTML case | ||
1931 | assert self.parser.innerHTML | ||
1932 | self.parser.parseError() | ||
1933 | |||
1934 | def endTagTable(self, token): | ||
1935 | self.parser.parseError() | ||
1936 | ignoreEndTag = self.ignoreEndTagCaption() | ||
1937 | self.parser.phase.processEndTag(impliedTagToken("caption")) | ||
1938 | if not ignoreEndTag: | ||
1939 | return token | ||
1940 | |||
1941 | def endTagIgnore(self, token): | ||
1942 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
1943 | |||
1944 | def endTagOther(self, token): | ||
1945 | return self.parser.phases["inBody"].processEndTag(token) | ||
1946 | |||
1947 | class InColumnGroupPhase(Phase): | ||
1948 | # http://www.whatwg.org/specs/web-apps/current-work/#in-column | ||
1949 | |||
1950 | def __init__(self, parser, tree): | ||
1951 | Phase.__init__(self, parser, tree) | ||
1952 | |||
1953 | self.startTagHandler = _utils.MethodDispatcher([ | ||
1954 | ("html", self.startTagHtml), | ||
1955 | ("col", self.startTagCol) | ||
1956 | ]) | ||
1957 | self.startTagHandler.default = self.startTagOther | ||
1958 | |||
1959 | self.endTagHandler = _utils.MethodDispatcher([ | ||
1960 | ("colgroup", self.endTagColgroup), | ||
1961 | ("col", self.endTagCol) | ||
1962 | ]) | ||
1963 | self.endTagHandler.default = self.endTagOther | ||
1964 | |||
1965 | def ignoreEndTagColgroup(self): | ||
1966 | return self.tree.openElements[-1].name == "html" | ||
1967 | |||
1968 | def processEOF(self): | ||
1969 | if self.tree.openElements[-1].name == "html": | ||
1970 | assert self.parser.innerHTML | ||
1971 | return | ||
1972 | else: | ||
1973 | ignoreEndTag = self.ignoreEndTagColgroup() | ||
1974 | self.endTagColgroup(impliedTagToken("colgroup")) | ||
1975 | if not ignoreEndTag: | ||
1976 | return True | ||
1977 | |||
1978 | def processCharacters(self, token): | ||
1979 | ignoreEndTag = self.ignoreEndTagColgroup() | ||
1980 | self.endTagColgroup(impliedTagToken("colgroup")) | ||
1981 | if not ignoreEndTag: | ||
1982 | return token | ||
1983 | |||
1984 | def startTagCol(self, token): | ||
1985 | self.tree.insertElement(token) | ||
1986 | self.tree.openElements.pop() | ||
1987 | token["selfClosingAcknowledged"] = True | ||
1988 | |||
1989 | def startTagOther(self, token): | ||
1990 | ignoreEndTag = self.ignoreEndTagColgroup() | ||
1991 | self.endTagColgroup(impliedTagToken("colgroup")) | ||
1992 | if not ignoreEndTag: | ||
1993 | return token | ||
1994 | |||
1995 | def endTagColgroup(self, token): | ||
1996 | if self.ignoreEndTagColgroup(): | ||
1997 | # innerHTML case | ||
1998 | assert self.parser.innerHTML | ||
1999 | self.parser.parseError() | ||
2000 | else: | ||
2001 | self.tree.openElements.pop() | ||
2002 | self.parser.phase = self.parser.phases["inTable"] | ||
2003 | |||
2004 | def endTagCol(self, token): | ||
2005 | self.parser.parseError("no-end-tag", {"name": "col"}) | ||
2006 | |||
2007 | def endTagOther(self, token): | ||
2008 | ignoreEndTag = self.ignoreEndTagColgroup() | ||
2009 | self.endTagColgroup(impliedTagToken("colgroup")) | ||
2010 | if not ignoreEndTag: | ||
2011 | return token | ||
2012 | |||
2013 | class InTableBodyPhase(Phase): | ||
2014 | # http://www.whatwg.org/specs/web-apps/current-work/#in-table0 | ||
2015 | def __init__(self, parser, tree): | ||
2016 | Phase.__init__(self, parser, tree) | ||
2017 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2018 | ("html", self.startTagHtml), | ||
2019 | ("tr", self.startTagTr), | ||
2020 | (("td", "th"), self.startTagTableCell), | ||
2021 | (("caption", "col", "colgroup", "tbody", "tfoot", "thead"), | ||
2022 | self.startTagTableOther) | ||
2023 | ]) | ||
2024 | self.startTagHandler.default = self.startTagOther | ||
2025 | |||
2026 | self.endTagHandler = _utils.MethodDispatcher([ | ||
2027 | (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), | ||
2028 | ("table", self.endTagTable), | ||
2029 | (("body", "caption", "col", "colgroup", "html", "td", "th", | ||
2030 | "tr"), self.endTagIgnore) | ||
2031 | ]) | ||
2032 | self.endTagHandler.default = self.endTagOther | ||
2033 | |||
2034 | # helper methods | ||
2035 | def clearStackToTableBodyContext(self): | ||
2036 | while self.tree.openElements[-1].name not in ("tbody", "tfoot", | ||
2037 | "thead", "html"): | ||
2038 | # self.parser.parseError("unexpected-implied-end-tag-in-table", | ||
2039 | # {"name": self.tree.openElements[-1].name}) | ||
2040 | self.tree.openElements.pop() | ||
2041 | if self.tree.openElements[-1].name == "html": | ||
2042 | assert self.parser.innerHTML | ||
2043 | |||
2044 | # the rest | ||
2045 | def processEOF(self): | ||
2046 | self.parser.phases["inTable"].processEOF() | ||
2047 | |||
2048 | def processSpaceCharacters(self, token): | ||
2049 | return self.parser.phases["inTable"].processSpaceCharacters(token) | ||
2050 | |||
2051 | def processCharacters(self, token): | ||
2052 | return self.parser.phases["inTable"].processCharacters(token) | ||
2053 | |||
2054 | def startTagTr(self, token): | ||
2055 | self.clearStackToTableBodyContext() | ||
2056 | self.tree.insertElement(token) | ||
2057 | self.parser.phase = self.parser.phases["inRow"] | ||
2058 | |||
2059 | def startTagTableCell(self, token): | ||
2060 | self.parser.parseError("unexpected-cell-in-table-body", | ||
2061 | {"name": token["name"]}) | ||
2062 | self.startTagTr(impliedTagToken("tr", "StartTag")) | ||
2063 | return token | ||
2064 | |||
2065 | def startTagTableOther(self, token): | ||
2066 | # XXX AT Any ideas on how to share this with endTagTable? | ||
2067 | if (self.tree.elementInScope("tbody", variant="table") or | ||
2068 | self.tree.elementInScope("thead", variant="table") or | ||
2069 | self.tree.elementInScope("tfoot", variant="table")): | ||
2070 | self.clearStackToTableBodyContext() | ||
2071 | self.endTagTableRowGroup( | ||
2072 | impliedTagToken(self.tree.openElements[-1].name)) | ||
2073 | return token | ||
2074 | else: | ||
2075 | # innerHTML case | ||
2076 | assert self.parser.innerHTML | ||
2077 | self.parser.parseError() | ||
2078 | |||
2079 | def startTagOther(self, token): | ||
2080 | return self.parser.phases["inTable"].processStartTag(token) | ||
2081 | |||
2082 | def endTagTableRowGroup(self, token): | ||
2083 | if self.tree.elementInScope(token["name"], variant="table"): | ||
2084 | self.clearStackToTableBodyContext() | ||
2085 | self.tree.openElements.pop() | ||
2086 | self.parser.phase = self.parser.phases["inTable"] | ||
2087 | else: | ||
2088 | self.parser.parseError("unexpected-end-tag-in-table-body", | ||
2089 | {"name": token["name"]}) | ||
2090 | |||
2091 | def endTagTable(self, token): | ||
2092 | if (self.tree.elementInScope("tbody", variant="table") or | ||
2093 | self.tree.elementInScope("thead", variant="table") or | ||
2094 | self.tree.elementInScope("tfoot", variant="table")): | ||
2095 | self.clearStackToTableBodyContext() | ||
2096 | self.endTagTableRowGroup( | ||
2097 | impliedTagToken(self.tree.openElements[-1].name)) | ||
2098 | return token | ||
2099 | else: | ||
2100 | # innerHTML case | ||
2101 | assert self.parser.innerHTML | ||
2102 | self.parser.parseError() | ||
2103 | |||
2104 | def endTagIgnore(self, token): | ||
2105 | self.parser.parseError("unexpected-end-tag-in-table-body", | ||
2106 | {"name": token["name"]}) | ||
2107 | |||
2108 | def endTagOther(self, token): | ||
2109 | return self.parser.phases["inTable"].processEndTag(token) | ||
2110 | |||
2111 | class InRowPhase(Phase): | ||
2112 | # http://www.whatwg.org/specs/web-apps/current-work/#in-row | ||
2113 | def __init__(self, parser, tree): | ||
2114 | Phase.__init__(self, parser, tree) | ||
2115 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2116 | ("html", self.startTagHtml), | ||
2117 | (("td", "th"), self.startTagTableCell), | ||
2118 | (("caption", "col", "colgroup", "tbody", "tfoot", "thead", | ||
2119 | "tr"), self.startTagTableOther) | ||
2120 | ]) | ||
2121 | self.startTagHandler.default = self.startTagOther | ||
2122 | |||
2123 | self.endTagHandler = _utils.MethodDispatcher([ | ||
2124 | ("tr", self.endTagTr), | ||
2125 | ("table", self.endTagTable), | ||
2126 | (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), | ||
2127 | (("body", "caption", "col", "colgroup", "html", "td", "th"), | ||
2128 | self.endTagIgnore) | ||
2129 | ]) | ||
2130 | self.endTagHandler.default = self.endTagOther | ||
2131 | |||
2132 | # helper methods (XXX unify this with other table helper methods) | ||
2133 | def clearStackToTableRowContext(self): | ||
2134 | while self.tree.openElements[-1].name not in ("tr", "html"): | ||
2135 | self.parser.parseError("unexpected-implied-end-tag-in-table-row", | ||
2136 | {"name": self.tree.openElements[-1].name}) | ||
2137 | self.tree.openElements.pop() | ||
2138 | |||
2139 | def ignoreEndTagTr(self): | ||
2140 | return not self.tree.elementInScope("tr", variant="table") | ||
2141 | |||
2142 | # the rest | ||
2143 | def processEOF(self): | ||
2144 | self.parser.phases["inTable"].processEOF() | ||
2145 | |||
2146 | def processSpaceCharacters(self, token): | ||
2147 | return self.parser.phases["inTable"].processSpaceCharacters(token) | ||
2148 | |||
2149 | def processCharacters(self, token): | ||
2150 | return self.parser.phases["inTable"].processCharacters(token) | ||
2151 | |||
2152 | def startTagTableCell(self, token): | ||
2153 | self.clearStackToTableRowContext() | ||
2154 | self.tree.insertElement(token) | ||
2155 | self.parser.phase = self.parser.phases["inCell"] | ||
2156 | self.tree.activeFormattingElements.append(Marker) | ||
2157 | |||
2158 | def startTagTableOther(self, token): | ||
2159 | ignoreEndTag = self.ignoreEndTagTr() | ||
2160 | self.endTagTr(impliedTagToken("tr")) | ||
2161 | # XXX how are we sure it's always ignored in the innerHTML case? | ||
2162 | if not ignoreEndTag: | ||
2163 | return token | ||
2164 | |||
2165 | def startTagOther(self, token): | ||
2166 | return self.parser.phases["inTable"].processStartTag(token) | ||
2167 | |||
2168 | def endTagTr(self, token): | ||
2169 | if not self.ignoreEndTagTr(): | ||
2170 | self.clearStackToTableRowContext() | ||
2171 | self.tree.openElements.pop() | ||
2172 | self.parser.phase = self.parser.phases["inTableBody"] | ||
2173 | else: | ||
2174 | # innerHTML case | ||
2175 | assert self.parser.innerHTML | ||
2176 | self.parser.parseError() | ||
2177 | |||
2178 | def endTagTable(self, token): | ||
2179 | ignoreEndTag = self.ignoreEndTagTr() | ||
2180 | self.endTagTr(impliedTagToken("tr")) | ||
2181 | # Reprocess the current tag if the tr end tag was not ignored | ||
2182 | # XXX how are we sure it's always ignored in the innerHTML case? | ||
2183 | if not ignoreEndTag: | ||
2184 | return token | ||
2185 | |||
2186 | def endTagTableRowGroup(self, token): | ||
2187 | if self.tree.elementInScope(token["name"], variant="table"): | ||
2188 | self.endTagTr(impliedTagToken("tr")) | ||
2189 | return token | ||
2190 | else: | ||
2191 | self.parser.parseError() | ||
2192 | |||
2193 | def endTagIgnore(self, token): | ||
2194 | self.parser.parseError("unexpected-end-tag-in-table-row", | ||
2195 | {"name": token["name"]}) | ||
2196 | |||
2197 | def endTagOther(self, token): | ||
2198 | return self.parser.phases["inTable"].processEndTag(token) | ||
2199 | |||
2200 | class InCellPhase(Phase): | ||
2201 | # http://www.whatwg.org/specs/web-apps/current-work/#in-cell | ||
2202 | def __init__(self, parser, tree): | ||
2203 | Phase.__init__(self, parser, tree) | ||
2204 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2205 | ("html", self.startTagHtml), | ||
2206 | (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", | ||
2207 | "thead", "tr"), self.startTagTableOther) | ||
2208 | ]) | ||
2209 | self.startTagHandler.default = self.startTagOther | ||
2210 | |||
2211 | self.endTagHandler = _utils.MethodDispatcher([ | ||
2212 | (("td", "th"), self.endTagTableCell), | ||
2213 | (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore), | ||
2214 | (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply) | ||
2215 | ]) | ||
2216 | self.endTagHandler.default = self.endTagOther | ||
2217 | |||
2218 | # helper | ||
2219 | def closeCell(self): | ||
2220 | if self.tree.elementInScope("td", variant="table"): | ||
2221 | self.endTagTableCell(impliedTagToken("td")) | ||
2222 | elif self.tree.elementInScope("th", variant="table"): | ||
2223 | self.endTagTableCell(impliedTagToken("th")) | ||
2224 | |||
2225 | # the rest | ||
2226 | def processEOF(self): | ||
2227 | self.parser.phases["inBody"].processEOF() | ||
2228 | |||
2229 | def processCharacters(self, token): | ||
2230 | return self.parser.phases["inBody"].processCharacters(token) | ||
2231 | |||
2232 | def startTagTableOther(self, token): | ||
2233 | if (self.tree.elementInScope("td", variant="table") or | ||
2234 | self.tree.elementInScope("th", variant="table")): | ||
2235 | self.closeCell() | ||
2236 | return token | ||
2237 | else: | ||
2238 | # innerHTML case | ||
2239 | assert self.parser.innerHTML | ||
2240 | self.parser.parseError() | ||
2241 | |||
2242 | def startTagOther(self, token): | ||
2243 | return self.parser.phases["inBody"].processStartTag(token) | ||
2244 | |||
2245 | def endTagTableCell(self, token): | ||
2246 | if self.tree.elementInScope(token["name"], variant="table"): | ||
2247 | self.tree.generateImpliedEndTags(token["name"]) | ||
2248 | if self.tree.openElements[-1].name != token["name"]: | ||
2249 | self.parser.parseError("unexpected-cell-end-tag", | ||
2250 | {"name": token["name"]}) | ||
2251 | while True: | ||
2252 | node = self.tree.openElements.pop() | ||
2253 | if node.name == token["name"]: | ||
2254 | break | ||
2255 | else: | ||
2256 | self.tree.openElements.pop() | ||
2257 | self.tree.clearActiveFormattingElements() | ||
2258 | self.parser.phase = self.parser.phases["inRow"] | ||
2259 | else: | ||
2260 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
2261 | |||
2262 | def endTagIgnore(self, token): | ||
2263 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
2264 | |||
2265 | def endTagImply(self, token): | ||
2266 | if self.tree.elementInScope(token["name"], variant="table"): | ||
2267 | self.closeCell() | ||
2268 | return token | ||
2269 | else: | ||
2270 | # sometimes innerHTML case | ||
2271 | self.parser.parseError() | ||
2272 | |||
2273 | def endTagOther(self, token): | ||
2274 | return self.parser.phases["inBody"].processEndTag(token) | ||
2275 | |||
2276 | class InSelectPhase(Phase): | ||
2277 | def __init__(self, parser, tree): | ||
2278 | Phase.__init__(self, parser, tree) | ||
2279 | |||
2280 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2281 | ("html", self.startTagHtml), | ||
2282 | ("option", self.startTagOption), | ||
2283 | ("optgroup", self.startTagOptgroup), | ||
2284 | ("select", self.startTagSelect), | ||
2285 | (("input", "keygen", "textarea"), self.startTagInput), | ||
2286 | ("script", self.startTagScript) | ||
2287 | ]) | ||
2288 | self.startTagHandler.default = self.startTagOther | ||
2289 | |||
2290 | self.endTagHandler = _utils.MethodDispatcher([ | ||
2291 | ("option", self.endTagOption), | ||
2292 | ("optgroup", self.endTagOptgroup), | ||
2293 | ("select", self.endTagSelect) | ||
2294 | ]) | ||
2295 | self.endTagHandler.default = self.endTagOther | ||
2296 | |||
2297 | # http://www.whatwg.org/specs/web-apps/current-work/#in-select | ||
2298 | def processEOF(self): | ||
2299 | if self.tree.openElements[-1].name != "html": | ||
2300 | self.parser.parseError("eof-in-select") | ||
2301 | else: | ||
2302 | assert self.parser.innerHTML | ||
2303 | |||
2304 | def processCharacters(self, token): | ||
2305 | if token["data"] == "\u0000": | ||
2306 | return | ||
2307 | self.tree.insertText(token["data"]) | ||
2308 | |||
2309 | def startTagOption(self, token): | ||
2310 | # We need to imply </option> if <option> is the current node. | ||
2311 | if self.tree.openElements[-1].name == "option": | ||
2312 | self.tree.openElements.pop() | ||
2313 | self.tree.insertElement(token) | ||
2314 | |||
2315 | def startTagOptgroup(self, token): | ||
2316 | if self.tree.openElements[-1].name == "option": | ||
2317 | self.tree.openElements.pop() | ||
2318 | if self.tree.openElements[-1].name == "optgroup": | ||
2319 | self.tree.openElements.pop() | ||
2320 | self.tree.insertElement(token) | ||
2321 | |||
2322 | def startTagSelect(self, token): | ||
2323 | self.parser.parseError("unexpected-select-in-select") | ||
2324 | self.endTagSelect(impliedTagToken("select")) | ||
2325 | |||
2326 | def startTagInput(self, token): | ||
2327 | self.parser.parseError("unexpected-input-in-select") | ||
2328 | if self.tree.elementInScope("select", variant="select"): | ||
2329 | self.endTagSelect(impliedTagToken("select")) | ||
2330 | return token | ||
2331 | else: | ||
2332 | assert self.parser.innerHTML | ||
2333 | |||
2334 | def startTagScript(self, token): | ||
2335 | return self.parser.phases["inHead"].processStartTag(token) | ||
2336 | |||
2337 | def startTagOther(self, token): | ||
2338 | self.parser.parseError("unexpected-start-tag-in-select", | ||
2339 | {"name": token["name"]}) | ||
2340 | |||
2341 | def endTagOption(self, token): | ||
2342 | if self.tree.openElements[-1].name == "option": | ||
2343 | self.tree.openElements.pop() | ||
2344 | else: | ||
2345 | self.parser.parseError("unexpected-end-tag-in-select", | ||
2346 | {"name": "option"}) | ||
2347 | |||
2348 | def endTagOptgroup(self, token): | ||
2349 | # </optgroup> implicitly closes <option> | ||
2350 | if (self.tree.openElements[-1].name == "option" and | ||
2351 | self.tree.openElements[-2].name == "optgroup"): | ||
2352 | self.tree.openElements.pop() | ||
2353 | # It also closes </optgroup> | ||
2354 | if self.tree.openElements[-1].name == "optgroup": | ||
2355 | self.tree.openElements.pop() | ||
2356 | # But nothing else | ||
2357 | else: | ||
2358 | self.parser.parseError("unexpected-end-tag-in-select", | ||
2359 | {"name": "optgroup"}) | ||
2360 | |||
2361 | def endTagSelect(self, token): | ||
2362 | if self.tree.elementInScope("select", variant="select"): | ||
2363 | node = self.tree.openElements.pop() | ||
2364 | while node.name != "select": | ||
2365 | node = self.tree.openElements.pop() | ||
2366 | self.parser.resetInsertionMode() | ||
2367 | else: | ||
2368 | # innerHTML case | ||
2369 | assert self.parser.innerHTML | ||
2370 | self.parser.parseError() | ||
2371 | |||
2372 | def endTagOther(self, token): | ||
2373 | self.parser.parseError("unexpected-end-tag-in-select", | ||
2374 | {"name": token["name"]}) | ||
2375 | |||
2376 | class InSelectInTablePhase(Phase): | ||
2377 | def __init__(self, parser, tree): | ||
2378 | Phase.__init__(self, parser, tree) | ||
2379 | |||
2380 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2381 | (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), | ||
2382 | self.startTagTable) | ||
2383 | ]) | ||
2384 | self.startTagHandler.default = self.startTagOther | ||
2385 | |||
2386 | self.endTagHandler = _utils.MethodDispatcher([ | ||
2387 | (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), | ||
2388 | self.endTagTable) | ||
2389 | ]) | ||
2390 | self.endTagHandler.default = self.endTagOther | ||
2391 | |||
2392 | def processEOF(self): | ||
2393 | self.parser.phases["inSelect"].processEOF() | ||
2394 | |||
2395 | def processCharacters(self, token): | ||
2396 | return self.parser.phases["inSelect"].processCharacters(token) | ||
2397 | |||
2398 | def startTagTable(self, token): | ||
2399 | self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]}) | ||
2400 | self.endTagOther(impliedTagToken("select")) | ||
2401 | return token | ||
2402 | |||
2403 | def startTagOther(self, token): | ||
2404 | return self.parser.phases["inSelect"].processStartTag(token) | ||
2405 | |||
2406 | def endTagTable(self, token): | ||
2407 | self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]}) | ||
2408 | if self.tree.elementInScope(token["name"], variant="table"): | ||
2409 | self.endTagOther(impliedTagToken("select")) | ||
2410 | return token | ||
2411 | |||
2412 | def endTagOther(self, token): | ||
2413 | return self.parser.phases["inSelect"].processEndTag(token) | ||
2414 | |||
2415 | class InForeignContentPhase(Phase): | ||
2416 | breakoutElements = frozenset(["b", "big", "blockquote", "body", "br", | ||
2417 | "center", "code", "dd", "div", "dl", "dt", | ||
2418 | "em", "embed", "h1", "h2", "h3", | ||
2419 | "h4", "h5", "h6", "head", "hr", "i", "img", | ||
2420 | "li", "listing", "menu", "meta", "nobr", | ||
2421 | "ol", "p", "pre", "ruby", "s", "small", | ||
2422 | "span", "strong", "strike", "sub", "sup", | ||
2423 | "table", "tt", "u", "ul", "var"]) | ||
2424 | |||
2425 | def __init__(self, parser, tree): | ||
2426 | Phase.__init__(self, parser, tree) | ||
2427 | |||
2428 | def adjustSVGTagNames(self, token): | ||
2429 | replacements = {"altglyph": "altGlyph", | ||
2430 | "altglyphdef": "altGlyphDef", | ||
2431 | "altglyphitem": "altGlyphItem", | ||
2432 | "animatecolor": "animateColor", | ||
2433 | "animatemotion": "animateMotion", | ||
2434 | "animatetransform": "animateTransform", | ||
2435 | "clippath": "clipPath", | ||
2436 | "feblend": "feBlend", | ||
2437 | "fecolormatrix": "feColorMatrix", | ||
2438 | "fecomponenttransfer": "feComponentTransfer", | ||
2439 | "fecomposite": "feComposite", | ||
2440 | "feconvolvematrix": "feConvolveMatrix", | ||
2441 | "fediffuselighting": "feDiffuseLighting", | ||
2442 | "fedisplacementmap": "feDisplacementMap", | ||
2443 | "fedistantlight": "feDistantLight", | ||
2444 | "feflood": "feFlood", | ||
2445 | "fefunca": "feFuncA", | ||
2446 | "fefuncb": "feFuncB", | ||
2447 | "fefuncg": "feFuncG", | ||
2448 | "fefuncr": "feFuncR", | ||
2449 | "fegaussianblur": "feGaussianBlur", | ||
2450 | "feimage": "feImage", | ||
2451 | "femerge": "feMerge", | ||
2452 | "femergenode": "feMergeNode", | ||
2453 | "femorphology": "feMorphology", | ||
2454 | "feoffset": "feOffset", | ||
2455 | "fepointlight": "fePointLight", | ||
2456 | "fespecularlighting": "feSpecularLighting", | ||
2457 | "fespotlight": "feSpotLight", | ||
2458 | "fetile": "feTile", | ||
2459 | "feturbulence": "feTurbulence", | ||
2460 | "foreignobject": "foreignObject", | ||
2461 | "glyphref": "glyphRef", | ||
2462 | "lineargradient": "linearGradient", | ||
2463 | "radialgradient": "radialGradient", | ||
2464 | "textpath": "textPath"} | ||
2465 | |||
2466 | if token["name"] in replacements: | ||
2467 | token["name"] = replacements[token["name"]] | ||
2468 | |||
2469 | def processCharacters(self, token): | ||
2470 | if token["data"] == "\u0000": | ||
2471 | token["data"] = "\uFFFD" | ||
2472 | elif (self.parser.framesetOK and | ||
2473 | any(char not in spaceCharacters for char in token["data"])): | ||
2474 | self.parser.framesetOK = False | ||
2475 | Phase.processCharacters(self, token) | ||
2476 | |||
2477 | def processStartTag(self, token): | ||
2478 | currentNode = self.tree.openElements[-1] | ||
2479 | if (token["name"] in self.breakoutElements or | ||
2480 | (token["name"] == "font" and | ||
2481 | set(token["data"].keys()) & set(["color", "face", "size"]))): | ||
2482 | self.parser.parseError("unexpected-html-element-in-foreign-content", | ||
2483 | {"name": token["name"]}) | ||
2484 | while (self.tree.openElements[-1].namespace != | ||
2485 | self.tree.defaultNamespace and | ||
2486 | not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and | ||
2487 | not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])): | ||
2488 | self.tree.openElements.pop() | ||
2489 | return token | ||
2490 | |||
2491 | else: | ||
2492 | if currentNode.namespace == namespaces["mathml"]: | ||
2493 | self.parser.adjustMathMLAttributes(token) | ||
2494 | elif currentNode.namespace == namespaces["svg"]: | ||
2495 | self.adjustSVGTagNames(token) | ||
2496 | self.parser.adjustSVGAttributes(token) | ||
2497 | self.parser.adjustForeignAttributes(token) | ||
2498 | token["namespace"] = currentNode.namespace | ||
2499 | self.tree.insertElement(token) | ||
2500 | if token["selfClosing"]: | ||
2501 | self.tree.openElements.pop() | ||
2502 | token["selfClosingAcknowledged"] = True | ||
2503 | |||
2504 | def processEndTag(self, token): | ||
2505 | nodeIndex = len(self.tree.openElements) - 1 | ||
2506 | node = self.tree.openElements[-1] | ||
2507 | if node.name.translate(asciiUpper2Lower) != token["name"]: | ||
2508 | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | ||
2509 | |||
2510 | while True: | ||
2511 | if node.name.translate(asciiUpper2Lower) == token["name"]: | ||
2512 | # XXX this isn't in the spec but it seems necessary | ||
2513 | if self.parser.phase == self.parser.phases["inTableText"]: | ||
2514 | self.parser.phase.flushCharacters() | ||
2515 | self.parser.phase = self.parser.phase.originalPhase | ||
2516 | while self.tree.openElements.pop() != node: | ||
2517 | assert self.tree.openElements | ||
2518 | new_token = None | ||
2519 | break | ||
2520 | nodeIndex -= 1 | ||
2521 | |||
2522 | node = self.tree.openElements[nodeIndex] | ||
2523 | if node.namespace != self.tree.defaultNamespace: | ||
2524 | continue | ||
2525 | else: | ||
2526 | new_token = self.parser.phase.processEndTag(token) | ||
2527 | break | ||
2528 | return new_token | ||
2529 | |||
2530 | class AfterBodyPhase(Phase): | ||
2531 | def __init__(self, parser, tree): | ||
2532 | Phase.__init__(self, parser, tree) | ||
2533 | |||
2534 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2535 | ("html", self.startTagHtml) | ||
2536 | ]) | ||
2537 | self.startTagHandler.default = self.startTagOther | ||
2538 | |||
2539 | self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)]) | ||
2540 | self.endTagHandler.default = self.endTagOther | ||
2541 | |||
2542 | def processEOF(self): | ||
2543 | # Stop parsing | ||
2544 | pass | ||
2545 | |||
2546 | def processComment(self, token): | ||
2547 | # This is needed because data is to be appended to the <html> element | ||
2548 | # here and not to whatever is currently open. | ||
2549 | self.tree.insertComment(token, self.tree.openElements[0]) | ||
2550 | |||
2551 | def processCharacters(self, token): | ||
2552 | self.parser.parseError("unexpected-char-after-body") | ||
2553 | self.parser.phase = self.parser.phases["inBody"] | ||
2554 | return token | ||
2555 | |||
2556 | def startTagHtml(self, token): | ||
2557 | return self.parser.phases["inBody"].processStartTag(token) | ||
2558 | |||
2559 | def startTagOther(self, token): | ||
2560 | self.parser.parseError("unexpected-start-tag-after-body", | ||
2561 | {"name": token["name"]}) | ||
2562 | self.parser.phase = self.parser.phases["inBody"] | ||
2563 | return token | ||
2564 | |||
2565 | def endTagHtml(self, name): | ||
2566 | if self.parser.innerHTML: | ||
2567 | self.parser.parseError("unexpected-end-tag-after-body-innerhtml") | ||
2568 | else: | ||
2569 | self.parser.phase = self.parser.phases["afterAfterBody"] | ||
2570 | |||
2571 | def endTagOther(self, token): | ||
2572 | self.parser.parseError("unexpected-end-tag-after-body", | ||
2573 | {"name": token["name"]}) | ||
2574 | self.parser.phase = self.parser.phases["inBody"] | ||
2575 | return token | ||
2576 | |||
2577 | class InFramesetPhase(Phase): | ||
2578 | # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset | ||
2579 | def __init__(self, parser, tree): | ||
2580 | Phase.__init__(self, parser, tree) | ||
2581 | |||
2582 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2583 | ("html", self.startTagHtml), | ||
2584 | ("frameset", self.startTagFrameset), | ||
2585 | ("frame", self.startTagFrame), | ||
2586 | ("noframes", self.startTagNoframes) | ||
2587 | ]) | ||
2588 | self.startTagHandler.default = self.startTagOther | ||
2589 | |||
2590 | self.endTagHandler = _utils.MethodDispatcher([ | ||
2591 | ("frameset", self.endTagFrameset) | ||
2592 | ]) | ||
2593 | self.endTagHandler.default = self.endTagOther | ||
2594 | |||
2595 | def processEOF(self): | ||
2596 | if self.tree.openElements[-1].name != "html": | ||
2597 | self.parser.parseError("eof-in-frameset") | ||
2598 | else: | ||
2599 | assert self.parser.innerHTML | ||
2600 | |||
2601 | def processCharacters(self, token): | ||
2602 | self.parser.parseError("unexpected-char-in-frameset") | ||
2603 | |||
2604 | def startTagFrameset(self, token): | ||
2605 | self.tree.insertElement(token) | ||
2606 | |||
2607 | def startTagFrame(self, token): | ||
2608 | self.tree.insertElement(token) | ||
2609 | self.tree.openElements.pop() | ||
2610 | |||
2611 | def startTagNoframes(self, token): | ||
2612 | return self.parser.phases["inBody"].processStartTag(token) | ||
2613 | |||
2614 | def startTagOther(self, token): | ||
2615 | self.parser.parseError("unexpected-start-tag-in-frameset", | ||
2616 | {"name": token["name"]}) | ||
2617 | |||
2618 | def endTagFrameset(self, token): | ||
2619 | if self.tree.openElements[-1].name == "html": | ||
2620 | # innerHTML case | ||
2621 | self.parser.parseError("unexpected-frameset-in-frameset-innerhtml") | ||
2622 | else: | ||
2623 | self.tree.openElements.pop() | ||
2624 | if (not self.parser.innerHTML and | ||
2625 | self.tree.openElements[-1].name != "frameset"): | ||
2626 | # If we're not in innerHTML mode and the current node is not a | ||
2627 | # "frameset" element (anymore) then switch. | ||
2628 | self.parser.phase = self.parser.phases["afterFrameset"] | ||
2629 | |||
2630 | def endTagOther(self, token): | ||
2631 | self.parser.parseError("unexpected-end-tag-in-frameset", | ||
2632 | {"name": token["name"]}) | ||
2633 | |||
2634 | class AfterFramesetPhase(Phase): | ||
2635 | # http://www.whatwg.org/specs/web-apps/current-work/#after3 | ||
2636 | def __init__(self, parser, tree): | ||
2637 | Phase.__init__(self, parser, tree) | ||
2638 | |||
2639 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2640 | ("html", self.startTagHtml), | ||
2641 | ("noframes", self.startTagNoframes) | ||
2642 | ]) | ||
2643 | self.startTagHandler.default = self.startTagOther | ||
2644 | |||
2645 | self.endTagHandler = _utils.MethodDispatcher([ | ||
2646 | ("html", self.endTagHtml) | ||
2647 | ]) | ||
2648 | self.endTagHandler.default = self.endTagOther | ||
2649 | |||
2650 | def processEOF(self): | ||
2651 | # Stop parsing | ||
2652 | pass | ||
2653 | |||
2654 | def processCharacters(self, token): | ||
2655 | self.parser.parseError("unexpected-char-after-frameset") | ||
2656 | |||
2657 | def startTagNoframes(self, token): | ||
2658 | return self.parser.phases["inHead"].processStartTag(token) | ||
2659 | |||
2660 | def startTagOther(self, token): | ||
2661 | self.parser.parseError("unexpected-start-tag-after-frameset", | ||
2662 | {"name": token["name"]}) | ||
2663 | |||
2664 | def endTagHtml(self, token): | ||
2665 | self.parser.phase = self.parser.phases["afterAfterFrameset"] | ||
2666 | |||
2667 | def endTagOther(self, token): | ||
2668 | self.parser.parseError("unexpected-end-tag-after-frameset", | ||
2669 | {"name": token["name"]}) | ||
2670 | |||
2671 | class AfterAfterBodyPhase(Phase): | ||
2672 | def __init__(self, parser, tree): | ||
2673 | Phase.__init__(self, parser, tree) | ||
2674 | |||
2675 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2676 | ("html", self.startTagHtml) | ||
2677 | ]) | ||
2678 | self.startTagHandler.default = self.startTagOther | ||
2679 | |||
2680 | def processEOF(self): | ||
2681 | pass | ||
2682 | |||
2683 | def processComment(self, token): | ||
2684 | self.tree.insertComment(token, self.tree.document) | ||
2685 | |||
2686 | def processSpaceCharacters(self, token): | ||
2687 | return self.parser.phases["inBody"].processSpaceCharacters(token) | ||
2688 | |||
2689 | def processCharacters(self, token): | ||
2690 | self.parser.parseError("expected-eof-but-got-char") | ||
2691 | self.parser.phase = self.parser.phases["inBody"] | ||
2692 | return token | ||
2693 | |||
2694 | def startTagHtml(self, token): | ||
2695 | return self.parser.phases["inBody"].processStartTag(token) | ||
2696 | |||
2697 | def startTagOther(self, token): | ||
2698 | self.parser.parseError("expected-eof-but-got-start-tag", | ||
2699 | {"name": token["name"]}) | ||
2700 | self.parser.phase = self.parser.phases["inBody"] | ||
2701 | return token | ||
2702 | |||
2703 | def processEndTag(self, token): | ||
2704 | self.parser.parseError("expected-eof-but-got-end-tag", | ||
2705 | {"name": token["name"]}) | ||
2706 | self.parser.phase = self.parser.phases["inBody"] | ||
2707 | return token | ||
2708 | |||
2709 | class AfterAfterFramesetPhase(Phase): | ||
2710 | def __init__(self, parser, tree): | ||
2711 | Phase.__init__(self, parser, tree) | ||
2712 | |||
2713 | self.startTagHandler = _utils.MethodDispatcher([ | ||
2714 | ("html", self.startTagHtml), | ||
2715 | ("noframes", self.startTagNoFrames) | ||
2716 | ]) | ||
2717 | self.startTagHandler.default = self.startTagOther | ||
2718 | |||
2719 | def processEOF(self): | ||
2720 | pass | ||
2721 | |||
2722 | def processComment(self, token): | ||
2723 | self.tree.insertComment(token, self.tree.document) | ||
2724 | |||
2725 | def processSpaceCharacters(self, token): | ||
2726 | return self.parser.phases["inBody"].processSpaceCharacters(token) | ||
2727 | |||
2728 | def processCharacters(self, token): | ||
2729 | self.parser.parseError("expected-eof-but-got-char") | ||
2730 | |||
2731 | def startTagHtml(self, token): | ||
2732 | return self.parser.phases["inBody"].processStartTag(token) | ||
2733 | |||
2734 | def startTagNoFrames(self, token): | ||
2735 | return self.parser.phases["inHead"].processStartTag(token) | ||
2736 | |||
2737 | def startTagOther(self, token): | ||
2738 | self.parser.parseError("expected-eof-but-got-start-tag", | ||
2739 | {"name": token["name"]}) | ||
2740 | |||
2741 | def processEndTag(self, token): | ||
2742 | self.parser.parseError("expected-eof-but-got-end-tag", | ||
2743 | {"name": token["name"]}) | ||
2744 | # pylint:enable=unused-argument | ||
2745 | |||
2746 | return { | ||
2747 | "initial": InitialPhase, | ||
2748 | "beforeHtml": BeforeHtmlPhase, | ||
2749 | "beforeHead": BeforeHeadPhase, | ||
2750 | "inHead": InHeadPhase, | ||
2751 | "inHeadNoscript": InHeadNoscriptPhase, | ||
2752 | "afterHead": AfterHeadPhase, | ||
2753 | "inBody": InBodyPhase, | ||
2754 | "text": TextPhase, | ||
2755 | "inTable": InTablePhase, | ||
2756 | "inTableText": InTableTextPhase, | ||
2757 | "inCaption": InCaptionPhase, | ||
2758 | "inColumnGroup": InColumnGroupPhase, | ||
2759 | "inTableBody": InTableBodyPhase, | ||
2760 | "inRow": InRowPhase, | ||
2761 | "inCell": InCellPhase, | ||
2762 | "inSelect": InSelectPhase, | ||
2763 | "inSelectInTable": InSelectInTablePhase, | ||
2764 | "inForeignContent": InForeignContentPhase, | ||
2765 | "afterBody": AfterBodyPhase, | ||
2766 | "inFrameset": InFramesetPhase, | ||
2767 | "afterFrameset": AfterFramesetPhase, | ||
2768 | "afterAfterBody": AfterAfterBodyPhase, | ||
2769 | "afterAfterFrameset": AfterAfterFramesetPhase, | ||
2770 | # XXX after after frameset | ||
2771 | } | ||
2772 | |||
2773 | |||
2774 | def adjust_attributes(token, replacements): | ||
2775 | needs_adjustment = viewkeys(token['data']) & viewkeys(replacements) | ||
2776 | if needs_adjustment: | ||
2777 | token['data'] = OrderedDict((replacements.get(k, k), v) | ||
2778 | for k, v in token['data'].items()) | ||
2779 | |||
2780 | |||
2781 | def impliedTagToken(name, type="EndTag", attributes=None, | ||
2782 | selfClosing=False): | ||
2783 | if attributes is None: | ||
2784 | attributes = {} | ||
2785 | return {"type": tokenTypes[type], "name": name, "data": attributes, | ||
2786 | "selfClosing": selfClosing} | ||
2787 | |||
2788 | |||
2789 | class ParseError(Exception): | ||
2790 | """Error in parsed document""" | ||
2791 | pass | ||