diff options
author | bptato <nincsnevem662@gmail.com> | 2022-12-15 18:13:10 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2022-12-15 18:18:24 +0100 |
commit | 4cd5832ec1bfd02e8afdf5777546cadbe5eaaafb (patch) | |
tree | 841fd35d6d7d4984deb88ef45d3510fffd3056b0 | |
parent | 683cf2ee3ce2fbf6304839d3b68236228fc8dff2 (diff) | |
download | chawan-4cd5832ec1bfd02e8afdf5777546cadbe5eaaafb.tar.gz |
htmltokenizer: implement a small todo
-rw-r--r-- | src/html/htmlparser.nim | 33 | ||||
-rw-r--r-- | src/html/htmltokenizer.nim | 4 |
2 files changed, 21 insertions, 16 deletions
diff --git a/src/html/htmlparser.nim b/src/html/htmlparser.nim index ebdc6645..f04a94f7 100644 --- a/src/html/htmlparser.nim +++ b/src/html/htmlparser.nim @@ -99,10 +99,7 @@ proc resetInsertionMode(parser: var HTML5Parser) = switch_insertion_mode_and_return IN_BODY func currentNode(parser: HTML5Parser): Element = - if parser.openElements.len == 0: - assert false - else: - return parser.openElements[^1] + return parser.openElements[^1] func adjustedCurrentNode(parser: HTML5Parser): Element = if parser.fragment: parser.ctx @@ -222,6 +219,19 @@ func createElement(parser: HTML5Parser, token: Token, namespace: Namespace, inte element.parserInserted = true return element +proc pushElement(parser: var HTML5Parser, node: Element) = + parser.openElements.add(node) + parser.tokenizer.hasnonhtml = not parser.adjustedCurrentNode().inHTMLNamespace() + +proc popElement(parser: var HTML5Parser): Element = + result = parser.openElements.pop() + if result.tagType == TAG_TEXTAREA: + result.resetElement() + if parser.openElements.len == 0: + parser.tokenizer.hasnonhtml = false + else: + parser.tokenizer.hasnonhtml = not parser.adjustedCurrentNode().inHTMLNamespace() + proc insert(location: AdjustedInsertionLocation, node: Node) = location.inside.insert(node, location.before) @@ -231,7 +241,7 @@ proc insertForeignElement(parser: var HTML5Parser, token: Token, namespace: Name if location.inside.preInsertionValidity(element, location.before): #TODO custom elements location.insert(element) - parser.openElements.add(element) + parser.pushElement(element) return element proc insertHTMLElement(parser: var HTML5Parser, token: Token): Element = @@ -462,11 +472,6 @@ proc genericRCDATAElementParsingAlgorithm(parser: var HTML5Parser, token: Token) parser.oldInsertionMode = parser.insertionMode parser.insertionMode = TEXT -proc popElement(parser: var HTML5Parser): Element = - result = parser.openElements.pop() - if result.tagType == TAG_TEXTAREA: - result.resetElement() - # 13.2.6.3 proc generateImpliedEndTags(parser: var HTML5Parser) = const tags = {TAG_DD, TAG_DT, TAG_LI, TAG_OPTGROUP, TAG_OPTION, TAG_P, @@ -826,7 +831,7 @@ proc processInHTMLContent(parser: var HTML5Parser, token: Token, insertionMode = "<html>" => (block: let element = parser.createElement(token, Namespace.HTML, parser.document) parser.document.append(element) - parser.openElements.add(element) + parser.pushElement(element) parser.insertionMode = BEFORE_HEAD ) ("</head>", "</body>", "</html>", "</br>") => (block: anything_else) @@ -834,7 +839,7 @@ proc processInHTMLContent(parser: var HTML5Parser, token: Token, insertionMode = _ => (block: let element = parser.document.newHTMLElement(TAG_HTML, Namespace.HTML) parser.document.append(element) - parser.openElements.add(element) + parser.pushElement(element) parser.insertionMode = BEFORE_HEAD reprocess token ) @@ -897,7 +902,7 @@ proc processInHTMLContent(parser: var HTML5Parser, token: Token, insertionMode = element.alreadyStarted = true #TODO document.write (?) location.insert(element) - parser.openElements.add(element) + parser.pushElement(element) parser.tokenizer.state = SCRIPT_DATA parser.oldInsertionMode = parser.insertionMode parser.insertionMode = TEXT @@ -971,7 +976,7 @@ proc processInHTMLContent(parser: var HTML5Parser, token: Token, insertionMode = ) ("<base>", "<basefont>", "<bgsound>", "<link>", "<meta>", "<noframes>", "<script>", "<style>", "<template>", "<title>") => (block: parse_error - parser.openElements.add(parser.head) + parser.pushElement(parser.head) parser.processInHTMLContent(token, IN_HEAD) for i in countdown(parser.openElements.high, 0): if parser.openElements[i] == parser.head: diff --git a/src/html/htmltokenizer.nim b/src/html/htmltokenizer.nim index d09f54dd..5053af97 100644 --- a/src/html/htmltokenizer.nim +++ b/src/html/htmltokenizer.nim @@ -23,6 +23,7 @@ type attrn: string attrv: string attr: bool + hasnonhtml*: bool decoder: DecoderStream sbuf: seq[Rune] @@ -237,7 +238,6 @@ iterator tokenize*(tokenizer: var Tokenizer): Token = cast[char](r) else: char(128) - template has_adjusted_current_node(): bool = false #TODO implement this template consume_and_discard(n: int) = #TODO optimize var i = 0 while i < n: @@ -939,7 +939,7 @@ iterator tokenize*(tokenizer: var Tokenizer): Token = of '[': if peek_str("CDATA["): consume_and_discard "CDATA[".len - if has_adjusted_current_node: #TODO and it is not an element in the HTML namespace + if tokenizer.hasnonhtml: switch_state CDATA_SECTION else: parse_error cdata_in_html_content |