diff options
author | bptato <nincsnevem662@gmail.com> | 2024-01-15 18:03:00 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-01-15 19:58:38 +0100 |
commit | 161db572d34ccd193e0639c8968c11371a892b10 (patch) | |
tree | db65c6341f916b5a71baef3a72b9a6d98519f4f5 | |
parent | c04a25cd345b57eba4e66b3f49a6f90281b3a351 (diff) | |
download | chawan-161db572d34ccd193e0639c8968c11371a892b10.tar.gz |
tags: remove NodeType and various sets
* NodeType is somewhat convenient, but adds a 1-word overhead to each node and makes object construction more error-prone. If needed, library users can still add it without us defining the enum. * Now that we have atoms, the tagType function is useless. * SpecialElements is only used in the specification's parser section, and is not even complete because it should also contain non-HTML tags. Moved to htmlparser. * AllTagTypes can be expressed in a simpler way.
-rw-r--r-- | chame/htmlparser.nim | 23 | ||||
-rw-r--r-- | chame/minidom.nim | 88 | ||||
-rw-r--r-- | chame/minidom_cs.nim | 6 | ||||
-rw-r--r-- | chame/tags.nim | 58 | ||||
-rw-r--r-- | tests/shared/tree_common.nim | 26 | ||||
-rw-r--r-- | tests/test1.nim | 15 | ||||
-rw-r--r-- | tests/tree.nim | 2 |
7 files changed, 95 insertions, 123 deletions
diff --git a/chame/htmlparser.nim b/chame/htmlparser.nim index 279b9a1d..7d6591ba 100644 --- a/chame/htmlparser.nim +++ b/chame/htmlparser.nim @@ -996,6 +996,23 @@ proc findLastActiveFormattingAfterMarker(parser: var HTML5Parser, return i return -1 +#https://html.spec.whatwg.org/multipage/parsing.html#the-stack-of-open-elements +const SpecialElements = { + TAG_ADDRESS, TAG_APPLET, TAG_AREA, TAG_ARTICLE, TAG_ASIDE, TAG_BASE, + TAG_BASEFONT, TAG_BGSOUND, TAG_BLOCKQUOTE, TAG_BODY, TAG_BR, TAG_BUTTON, + TAG_CAPTION, TAG_CENTER, TAG_COL, TAG_COLGROUP, TAG_DD, TAG_DETAILS, TAG_DIR, + TAG_DIV, TAG_DL, TAG_DT, TAG_EMBED, TAG_FIELDSET, TAG_FIGCAPTION, TAG_FIGURE, + TAG_FOOTER, TAG_FORM, TAG_FRAME, TAG_FRAMESET, TAG_H1, TAG_H2, TAG_H3, TAG_H4, + TAG_H5, TAG_H6, TAG_HEAD, TAG_HEADER, TAG_HGROUP, TAG_HR, TAG_HTML, + TAG_IFRAME, TAG_IMG, TAG_INPUT, TAG_KEYGEN, TAG_LI, TAG_LINK, TAG_LISTING, + TAG_MAIN, TAG_MARQUEE, TAG_MENU, TAG_META, TAG_NAV, TAG_NOEMBED, TAG_NOFRAMES, + TAG_NOSCRIPT, TAG_OBJECT, TAG_OL, TAG_P, TAG_PARAM, TAG_PLAINTEXT, TAG_PRE, + TAG_SCRIPT, TAG_SEARCH, TAG_SECTION, TAG_SELECT, TAG_SOURCE, TAG_STYLE, + TAG_SUMMARY, TAG_TABLE, TAG_TBODY, TAG_TD, TAG_TEMPLATE, TAG_TEXTAREA, + TAG_TFOOT, TAG_TH, TAG_THEAD, TAG_TITLE, TAG_TR, TAG_TRACK, TAG_UL, TAG_WBR, + TAG_XMP +} + proc isSpecialElement[Handle, Atom](parser: HTML5Parser[Handle, Atom], element: Handle): bool = let localName = parser.getLocalName(element) @@ -1233,7 +1250,7 @@ macro match(token: Token, body: typed): untyped = assert s[i] in AsciiAlphaNumeric tagName &= s[i] inc i - let tt = int(tagType(tagName)) + let tt = int(parseEnum[TagType](tagName)) let tokt = if s[1] != '/': START_TAG else: END_TAG var found = false for i in 0..ofBranches[tokt].ofBranches.high: @@ -1567,12 +1584,12 @@ proc processInHTMLContent[Handle, Atom](parser: var HTML5Parser[Handle, Atom], template parse_error_if_body_has_disallowed_open_elements = if parser.hasParseError(): - const Disallowed = AllTagTypes - { + const Allowed = { TAG_DD, TAG_DT, TAG_LI, TAG_OPTGROUP, TAG_OPTION, TAG_P, TAG_RB, TAG_RP, TAG_RT, TAG_RTC, TAG_TBODY, TAG_TD, TAG_TFOOT, TAG_TH, TAG_THEAD, TAG_TR, TAG_BODY, TAG_HTML } - if parser.hasElement(Disallowed): + if parser.hasElement(AllTagTypes - Allowed): parse_error MISMATCHED_TAGS match token: diff --git a/chame/minidom.nim b/chame/minidom.nim index 1914a757..9a16fae5 100644 --- a/chame/minidom.nim +++ b/chame/minidom.nim @@ -75,7 +75,6 @@ type Attribute* = ParsedAttr[MAtom] Node* = ref object of RootObj - nodeType*: NodeType childList*: seq[Node] parentNode* {.cursor.}: Node @@ -211,7 +210,6 @@ proc createElement(document: Document, localName: MAtom, namespace: Namespace): ) else: Element() - element.nodeType = ELEMENT_NODE element.localName = localName element.namespace = namespace element.document = document @@ -241,26 +239,38 @@ proc getTemplateContentImpl(builder: MiniDOMBuilder, handle: Node): Node = return HTMLTemplateElement(handle).content proc createCommentImpl(builder: MiniDOMBuilder, text: string): Node = - return Comment(nodeType: COMMENT_NODE, data: text.toValidUTF8()) + return Comment(data: text.toValidUTF8()) proc createDocumentTypeImpl(builder: MiniDOMBuilder, name, publicId, systemId: string): Node = return DocumentType( - nodeType: DOCUMENT_TYPE_NODE, name: name.toValidUTF8(), publicId: publicId.toValidUTF8(), systemId: systemId.toValidUTF8() ) -func countChildren(node: Node, nodeType: NodeType): int = +func countElementChildren(node: Node): int = for child in node.childList: - if child.nodeType == nodeType: + if child of Element: inc result -func hasChild(node: Node, nodeType: NodeType): bool = +func hasTextChild(node: Node): bool = for child in node.childList: - if child.nodeType == nodeType: + if child of Text: return true + return false + +func hasElementChild(node: Node): bool = + for child in node.childList: + if child of Element: + return true + return false + +func hasDocumentTypeChild(node: Node): bool = + for child in node.childList: + if child of DocumentType: + return true + return false func isHostIncludingInclusiveAncestor(a, b: Node): bool = var b = b @@ -269,61 +279,64 @@ func isHostIncludingInclusiveAncestor(a, b: Node): bool = return true b = b.parentNode -func hasPreviousSibling(node: Node, nodeType: NodeType): bool = +func hasPreviousElementSibling(node: Node): bool = for n in node.parentNode.childList: if n == node: break - if n.nodeType == nodeType: + if n of Element: return true return false -func hasNextSibling(node: Node, nodeType: NodeType): bool = +func hasNextDocumentTypeSibling(node: Node): bool = for i in countdown(node.parentNode.childList.len, 0): let n = node.parentNode.childList[i] if n == node: break - if n.nodeType == nodeType: + if n of DocumentType: return true return false +func isValidParent(node: Node): bool = + return node of Element or node of Document or node of DocumentFragment + +func isValidChild(node: Node): bool = + return node.isValidParent or node of DocumentType or node of CharacterData + # WARNING the ordering of the arguments in the standard is whack so this # doesn't match that func preInsertionValidity*(parent, node: Node, before: Node): bool = - if parent.nodeType notin {DOCUMENT_NODE, DOCUMENT_FRAGMENT_NODE, ELEMENT_NODE}: + if not parent.isValidParent: return false if node.isHostIncludingInclusiveAncestor(parent): return false if before != nil and before.parentNode != parent: return false - if node.nodeType notin {DOCUMENT_FRAGMENT_NODE, DOCUMENT_TYPE_NODE, - ELEMENT_NODE} + CharacterDataNodes: + if not node.isValidChild: return false - if node.nodeType == TEXT_NODE and parent.nodeType == DOCUMENT_NODE: + if node of Text and parent of Document: return false - if node.nodeType == DOCUMENT_TYPE_NODE and parent.nodeType != DOCUMENT_NODE: + if node of DocumentType and not (parent of Document): return false - if parent.nodeType == DOCUMENT_NODE: - case node.nodeType - of DOCUMENT_FRAGMENT_NODE: - let elems = node.countChildren(ELEMENT_NODE) - if elems > 1 or node.hasChild(TEXT_NODE): + if parent of Document: + if node of DocumentFragment: + let elems = node.countElementChildren() + if elems > 1 or node.hasTextChild(): return false - elif elems == 1 and (parent.hasChild(ELEMENT_NODE) or - before != nil and (before.nodeType == DOCUMENT_TYPE_NODE or - before.hasNextSibling(DOCUMENT_TYPE_NODE))): + elif elems == 1 and (parent.hasElementChild() or + before != nil and + (before of DocumentType or before.hasNextDocumentTypeSibling())): return false - of ELEMENT_NODE: - if parent.hasChild(ELEMENT_NODE): + elif node of Element: + if parent.hasElementChild(): return false - elif before != nil and (before.nodeType == DOCUMENT_TYPE_NODE or - before.hasNextSibling(DOCUMENT_TYPE_NODE)): + elif before != nil and (before of DocumentType or + before.hasNextDocumentTypeSibling()): return false - of DOCUMENT_TYPE_NODE: - if parent.hasChild(DOCUMENT_TYPE_NODE) or - before != nil and before.hasPreviousSibling(ELEMENT_NODE) or - before == nil and parent.hasChild(ELEMENT_NODE): + elif node of DocumentType: + if parent.hasDocumentTypeChild() or + before != nil and before.hasPreviousElementSibling() or + before == nil and parent.hasElementChild(): return false - else: discard return true # no exception reached proc insertBefore(parent, child: Node, before: Option[Node]) = @@ -355,10 +368,10 @@ proc insertTextImpl(builder: MiniDOMBuilder, parent: Node, text: string, parent.childList[^1] else: nil - if prevSibling != nil and prevSibling.nodeType == TEXT_NODE: + if prevSibling != nil and prevSibling of Text: Text(prevSibling).data &= text else: - let text = Text(nodeType: TEXT_NODE, data: text) + let text = Text(data: text) parent.insertBefore(text, option(before)) proc removeImpl(builder: MiniDOMBuilder, child: Node) = @@ -391,7 +404,7 @@ method setEncodingImpl(builder: MiniDOMBuilder, encoding: string): return SET_ENCODING_CONTINUE proc newMiniDOMBuilder*(stream: Stream, factory: MAtomFactory): MiniDOMBuilder = - let document = Document(nodeType: DOCUMENT_NODE, factory: factory) + let document = Document(factory: factory) let builder = MiniDOMBuilder( document: document, factory: factory, @@ -440,7 +453,6 @@ proc parseHTMLFragment*(inputStream: Stream, element: Element, else: DATA let htmlAtom = builder.factory.tagTypeToAtom(TAG_HTML) let root = Element( - nodeType: ELEMENT_NODE, localName: htmlAtom, namespace: HTML, document: document diff --git a/chame/minidom_cs.nim b/chame/minidom_cs.nim index c1e1b6b3..9594d3be 100644 --- a/chame/minidom_cs.nim +++ b/chame/minidom_cs.nim @@ -50,7 +50,7 @@ method setEncodingImpl(builder: CharsetMiniDOMBuilder, encoding: string): return SET_ENCODING_STOP proc newCharsetMiniDOMBuilder(factory: MAtomFactory): CharsetMiniDOMBuilder = - let document = Document(nodeType: DOCUMENT_NODE, factory: factory) + let document = Document(factory: factory) let builder = CharsetMiniDOMBuilder(document: document, factory: factory) return builder @@ -140,14 +140,14 @@ proc parseHTML*(inputStream: Stream, opts: HTML5ParserOpts[Node, MAtom], # A meta tag describing the charset has been found; force use of this # charset. inputStream.setPosition(0) - builder.document = Document(nodeType: DOCUMENT_NODE, factory: factory) + builder.document = Document(factory: factory) charsetStack.add(builder.charset) seekable = false continue if decoder.failed and seekable: # Retry with another charset. inputStream.setPosition(0) - builder.document = Document(nodeType: DOCUMENT_NODE, factory: factory) + builder.document = Document(factory: factory) continue break return builder.document diff --git a/chame/tags.nim b/chame/tags.nim index a9479e68..b8e45d09 100644 --- a/chame/tags.nim +++ b/chame/tags.nim @@ -1,20 +1,4 @@ -import std/tables - type - NodeType* = enum - ELEMENT_NODE = 1, - ATTRIBUTE_NODE = 2, - TEXT_NODE = 3, - CDATA_SECTION_NODE = 4, - ENTITY_REFERENCE_NODE = 5, - ENTITY_NODE = 6 - PROCESSING_INSTRUCTION_NODE = 7, - COMMENT_NODE = 8, - DOCUMENT_NODE = 9, - DOCUMENT_TYPE_NODE = 10, - DOCUMENT_FRAGMENT_NODE = 11, - NOTATION_NODE = 12 - TagType* = enum TAG_UNKNOWN = "" TAG_APPLET = "applet" @@ -178,21 +162,7 @@ type PREFIX_XMLNS = "xmlns" PREFIX_UNKNOWN = "" -func getTagTypeMap(): Table[string, TagType] = - for i in TagType: - result[$TagType(i)] = TagType(i) - -const tagTypeMap = getTagTypeMap() - -func tagType*(s: string): TagType = - if s in tagTypeMap: - return tagTypeMap[s] - return TAG_UNKNOWN - -const AllTagTypes* = (func(): set[TagType] = - for tag in TagType: - result.incl(tag) -)() +const AllTagTypes* = {TagType.low..TagType.high} const HTagTypes* = { TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6 @@ -200,32 +170,10 @@ const HTagTypes* = { # 4.10.2 Categories const FormAssociatedElements* = { - TAG_BUTTON, TAG_FIELDSET, TAG_INPUT, TAG_OBJECT, TAG_OUTPUT, TAG_SELECT, TAG_TEXTAREA, TAG_IMG + TAG_BUTTON, TAG_FIELDSET, TAG_INPUT, TAG_OBJECT, TAG_OUTPUT, TAG_SELECT, + TAG_TEXTAREA, TAG_IMG } const ListedElements* = { TAG_FIELDSET, TAG_INPUT, TAG_OBJECT, TAG_OUTPUT, TAG_SELECT, TAG_TEXTAREA } - -const CharacterDataNodes* = { - TEXT_NODE, CDATA_SECTION_NODE, PROCESSING_INSTRUCTION_NODE, COMMENT_NODE -} - -#https://html.spec.whatwg.org/multipage/parsing.html#the-stack-of-open-elements -#NOTE MathML not implemented -#TODO SVG foreignObject, SVG desc, SVG title -const SpecialElements* = { - TAG_ADDRESS, TAG_APPLET, TAG_AREA, TAG_ARTICLE, TAG_ASIDE, TAG_BASE, - TAG_BASEFONT, TAG_BGSOUND, TAG_BLOCKQUOTE, TAG_BODY, TAG_BR, TAG_BUTTON, - TAG_CAPTION, TAG_CENTER, TAG_COL, TAG_COLGROUP, TAG_DD, TAG_DETAILS, TAG_DIR, - TAG_DIV, TAG_DL, TAG_DT, TAG_EMBED, TAG_FIELDSET, TAG_FIGCAPTION, TAG_FIGURE, - TAG_FOOTER, TAG_FORM, TAG_FRAME, TAG_FRAMESET, TAG_H1, TAG_H2, TAG_H3, TAG_H4, - TAG_H5, TAG_H6, TAG_HEAD, TAG_HEADER, TAG_HGROUP, TAG_HR, TAG_HTML, - TAG_IFRAME, TAG_IMG, TAG_INPUT, TAG_KEYGEN, TAG_LI, TAG_LINK, TAG_LISTING, - TAG_MAIN, TAG_MARQUEE, TAG_MENU, TAG_META, TAG_NAV, TAG_NOEMBED, TAG_NOFRAMES, - TAG_NOSCRIPT, TAG_OBJECT, TAG_OL, TAG_P, TAG_PARAM, TAG_PLAINTEXT, TAG_PRE, - TAG_SCRIPT, TAG_SEARCH, TAG_SECTION, TAG_SELECT, TAG_SOURCE, TAG_STYLE, - TAG_SUMMARY, TAG_TABLE, TAG_TBODY, TAG_TD, TAG_TEMPLATE, TAG_TEXTAREA, - TAG_TFOOT, TAG_TH, TAG_THEAD, TAG_TITLE, TAG_TR, TAG_TRACK, TAG_UL, TAG_WBR, - TAG_XMP -} diff --git a/tests/shared/tree_common.nim b/tests/shared/tree_common.nim index c1b88295..6bbd10a2 100644 --- a/tests/shared/tree_common.nim +++ b/tests/shared/tree_common.nim @@ -101,7 +101,6 @@ proc parseTestFragment(ctx: var TCTestParser): TCFragment = of FT_MATHML: Namespace.MATHML of FT_HTML: Namespace.HTML let element = Element( - nodeType: ELEMENT_NODE, namespace: namespace, localName: ctx.factory.strToAtom(line) ) @@ -111,7 +110,7 @@ proc parseTestFragment(ctx: var TCTestParser): TCFragment = ) proc parseDoctype(ctx: TCTestParser, s: string): DocumentType = - let doctype = DocumentType(nodeType: DOCUMENT_TYPE_NODE) + let doctype = DocumentType() var i = "<!DOCTYPE ".len while i < s.len and s[i] != ' ' and s[i] != '>': doctype.name &= s[i] @@ -143,7 +142,7 @@ proc parseDoctype(ctx: TCTestParser, s: string): DocumentType = return doctype proc parseTestDocument(ctx: var TCTestParser): Document = - result = Document(nodeType: DOCUMENT_NODE, factory: ctx.factory) + result = Document(factory: ctx.factory) var stack: seq[Node] stack.add(result) template top: auto = stack[^1] @@ -184,7 +183,7 @@ proc parseTestDocument(ctx: var TCTestParser): Document = let doctype = ctx.parseDoctype(str) top.childList.add(doctype) elif str.startsWith("<!-- "): - let comment = Comment(nodeType: COMMENT_NODE) + let comment = minidom.Comment() top.childList.add(comment) if not str.endsWith(" -->"): comment.data = str.substr("<!-- ".len) & "\n" @@ -206,7 +205,6 @@ proc parseTestDocument(ctx: var TCTestParser): Document = HTMLTemplateElement() else: Element() - element.nodeType = ELEMENT_NODE element.localName = ctx.factory.strToAtom(nameStr) element.namespace = namespace element.document = result @@ -219,7 +217,7 @@ proc parseTestDocument(ctx: var TCTestParser): Document = stack.add(fragment) indent += 2 elif str[0] == '"': - let text = Text(nodeType: TEXT_NODE) + let text = Text() top.childList.add(text) if str[^1] != '"' or str.len == 1: text.data = str.substr(1) & "\n" @@ -282,13 +280,12 @@ proc parseTests*(s: string, factory: MAtomFactory): seq[TCTest] = s &= $x & '\n' proc checkTest(nodein, nodep: Node) = - check nodein.nodeType == nodep.nodeType check nodein.childList.len == nodep.childList.len if nodein.childList.len != nodep.childList.len: echo nodein echo nodep - case nodein.nodeType - of ELEMENT_NODE: + if nodein of Element: + check nodep of Element let nodein = Element(nodein) let nodep = Element(nodep) check nodein.localName == nodep.localName @@ -297,19 +294,18 @@ proc checkTest(nodein, nodep: Node) = echo "NODEIN", $nodein echo "NODEP", $nodep check nodein.attrs == nodep.attrs - of ATTRIBUTE_NODE, ENTITY_REFERENCE_NODE, ENTITY_NODE, - DOCUMENT_FRAGMENT_NODE, NOTATION_NODE: + elif nodein of DocumentFragment: assert false - of TEXT_NODE, CDATA_SECTION_NODE, COMMENT_NODE: + elif nodein of CharacterData: + check nodep of CharacterData check CharacterData(nodein).data == CharacterData(nodep).data - of PROCESSING_INSTRUCTION_NODE: assert false, "todo" - of DOCUMENT_TYPE_NODE: + elif nodein of DocumentType: + check nodep of DocumentType let nodein = DocumentType(nodein) let nodep = DocumentType(nodep) check nodein.name == nodep.name check nodein.publicId == nodep.publicId check nodein.systemId == nodep.systemId - of DOCUMENT_NODE: discard for i in 0 ..< nodein.childList.len: checkTest(nodein.childList[i], nodep.childList[i]) diff --git a/tests/test1.nim b/tests/test1.nim index 0fde7216..d5d31837 100644 --- a/tests/test1.nim +++ b/tests/test1.nim @@ -32,8 +32,7 @@ func escapeText(s: string, attribute_mode = false): string = result &= c func `$`*(node: Node): string = - case node.nodeType - of ELEMENT_NODE: + if node of Element: let element = Element(node) var x = "" if element.namespace == Namespace.SVG: @@ -47,17 +46,17 @@ func `$`*(node: Node): string = for node in element.childList: result &= $node result &= "</" & x & element.localNameStr & ">" - of TEXT_NODE: + elif node of Text: let text = Text(node) result = text.data.escapeText() - of COMMENT_NODE: + elif node of Comment: result = "<!-- " & Comment(node).data & "-->" - of PROCESSING_INSTRUCTION_NODE: - result = "" #TODO - of DOCUMENT_TYPE_NODE: + elif node of DocumentType: result = "<!DOCTYPE" & ' ' & DocumentType(node).name & ">" + elif node of Document: + result = "Node of Document" else: - result = "Node of " & $node.nodeType + assert false # This is, in fact, standards-compliant behavior. # Don't ask. diff --git a/tests/tree.nim b/tests/tree.nim index eee1f05f..527c7648 100644 --- a/tests/tree.nim +++ b/tests/tree.nim @@ -16,7 +16,7 @@ proc runTest(test: TCTest, factory: MAtomFactory, scripting, print: bool) = for child in childList: if ctx.preInsertionValidity(child, nil): ctx.childList.add(child) - Document(nodeType: DOCUMENT_NODE, childList: ctx.childList) + Document(childList: ctx.childList) if print: var ins = "" for x in test.document.childList: |