import macros import options import sequtils import streams import strutils import tables import unicode import data/charset import encoding/decoderstream import html/htmltokenizer import html/parseerror import html/tags import utils/twtstr # Generics break without exporting macros. Maybe a compiler bug? export macros # Heavily inspired by html5ever's TreeSink design. type DOMBuilder*[Handle] = ref object of RootObj document*: Handle ## Must never be nil. finish*: DOMBuilderFinish[Handle] ## May be nil. parseError*: DOMBuilderParseError[Handle] ## May be nil. setQuirksMode*: DOMBuilderSetQuirksMode[Handle] ## May be nil. setCharacterSet*: DOMBuilderSetCharacterSet[Handle] ## May be nil. elementPopped*: DOMBuilderElementPopped[Handle] ## May be nil. getTemplateContent*: DOMBuilderGetTemplateContent[Handle] ## May be nil. (If nil, templates are treated as regular elements.) getParentNode*: DOMBuilderGetParentNode[Handle] ## Must never be nil. getLocalName*: DOMBuilderGetLocalName[Handle] ## Must never be nil. getTagType*: DOMBuilderGetTagType[Handle] ## May be nil. (If nil, the parser falls back to getLocalName.) getNamespace*: DOMBuilderGetNamespace[Handle] ## May be nil. (If nil, the parser always uses the HTML namespace.) createElement*: DOMBuilderCreateElement[Handle] ## Must never be nil. createComment*: DOMBuilderCreateComment[Handle] ## Must never be nil. createDocumentType*: DOMBuilderCreateDocumentType[Handle] ## Must never be nil. insertBefore*: DOMBuilderInsertBefore[Handle] ## Must never be nil. insertText*: DOMBuilderInsertText[Handle] ## Must never be nil. remove*: DOMBuilderRemove[Handle] ## Must never be nil. addAttrsIfMissing*: DOMBuilderAddAttrsIfMissing[Handle] ## May be nil. (If nil, some attributes may not be added to the HTML or ## BODY element if more than one of their respecting opening tags exist.) setScriptAlreadyStarted*: DOMBuilderSetScriptAlreadyStarted[Handle] ## May be nil. associateWithForm*: DOMBuilderAssociateWithForm[Handle] ## May be nil. isSVGIntegrationPoint*: DOMBuilderIsSVGIntegrationPoint[Handle] ## May be nil. (If nil, the parser considers no Handle an SVG integration ## point.) HTML5ParserOpts*[Handle] = object isIframeSrcdoc*: bool ## Is the document an iframe srcdoc? scripting*: bool ## Is scripting enabled for this document? canReinterpret*: bool ## Can we try to parse the document again with a different character set? ## ## Note: this only works if inputStream is seekable, i.e. ## inputStream.setPosition(0) must work correctly. ## ## Note 2: when this canReinterpret is false, confidence is set to ## certain, no BOM sniffing is performed and meta charset tags are ## disregarded. Expect this to change in the future. charsets*: seq[Charset] ## Fallback charsets. If empty, UTF-8 is used. In most cases, an empty ## sequence or a single-element sequence consisting of a character set ## chosen based on the user's locale will suffice. ## ## The parser goes through fallback charsets in the following order: ## * A charset stack is initialized to `charsets`, reversed. This ## means that the first charset specified in `charsets` is on top of ## the stack. (e.g. say `charsets = @[CHARSET_UTF_16_LE, CHARSET_UTF_8]`, ## then utf-16-le is tried before utf-8.) ## * BOM sniffing is attempted. If successful, confidence is set to ## certain and the resulting charset is used (i.e. other character ## sets will not be tried for decoding this document.) ## * If the charset stack is empty, UTF-8 is pushed on top. ## * Attempt to parse the document with the first charset on top of ## the stack. ## * If BOM sniffing was unsuccessful, and a tag ## is encountered, parsing is restarted with the specified charset. ## No further attempts are made to detect the encoding, and decoder ## errors are signaled by U+FFFD replacement characters. ## * Otherwise, each charset on the charset stack is tried until either no ## decoding errors are encountered, or only one charset is left. For ## the last charset, decoder errors are signaled by U+FFFD replacement ## characters. ctx*: Option[Handle] ## Context element for fragment parsing. When set to some Handle, ## the fragment case is used while parsing. DOMBuilderFinish*[Handle] = proc(builder: DOMBuilder[Handle]) {.nimcall.} ## Parsing has finished. DOMBuilderParseError*[Handle] = proc(builder: DOMBuilder[Handle], message: ParseError) {.nimcall.} ## Parse error. `message` is an error code either specified by the ## standard (in this case, message < LAST_SPECIFIED_ERROR) or named ## arbitrarily. (At the time of writing, only tokenizer errors have ## specified error codes.) DOMBuilderSetQuirksMode*[Handle] = proc(builder: DOMBuilder[Handle], quirksMode: QuirksMode) {.nimcall.} ## Set quirks mode to either QUIRKS or LIMITED_QUIRKS. NO_QUIRKS ## is the default and is therefore never used here. DOMBuilderSetCharacterSet*[Handle] = proc(builder: DOMBuilder[Handle], charset: Charset) {.nimcall.} ## Set the recognized charset, if it differs from the initial input. DOMBuilderElementPopped*[Handle] = proc(builder: DOMBuilder[Handle], element: Handle) {.nimcall.} ## Called when an element is popped from the stack of open elements ## (i.e. when it has been closed.) DOMBuilderGetTemplateContent*[Handle] = proc(builder: DOMBuilder[Handle], handle: Handle): Handle {.nimcall.} ## Retrieve a handle to the template element's contents. ## Note: this function must never return nil. DOMBuilderGetParentNode*[Handle] = proc(builder: DOMBuilder[Handle], handle: Handle): Handle {.nimcall.} ## Retrieve a handle to the parent node. DOMBuilderGetTagType*[Handle] = proc(builder: DOMBuilder[Handle], handle: Handle): TagType {.nimcall.} ## Retrieve the tag type of element. DOMBuilderGetLocalName*[Handle] = proc(builder: DOMBuilder[Handle], handle: Handle): string {.nimcall.} ## Retrieve the local name of element. (This is tagName(getTagType), ## unless the tag is unknown. DOMBuilderGetNamespace*[Handle] = proc(builder: DOMBuilder[Handle], handle: Handle): Namespace {.nimcall.} ## Retrieve the namespace of element. DOMBuilderCreateElement*[Handle] = proc(builder: DOMBuilder[Handle], localName: string, namespace: Namespace, tagType: TagType, attrs: Table[string, string]): Handle {.nimcall.} ## Create a new element node. ## ## localName is the tag name of the token. ## ## namespace is the namespace passed to the function. (For HTML elements, ## it's HTML.) ## tagType is set based on localName. (This saves the consumer from ## having to interpret localName again.) ## ## attrs is a table of the token's attributes. DOMBuilderCreateComment*[Handle] = proc(builder: DOMBuilder[Handle], text: string): Handle {.nimcall.} ## Create a new comment node. DOMBuilderInsertText*[Handle] = proc(builder: DOMBuilder[Handle], parent: Handle, text: string, before: Handle) {.nimcall.} ## Insert a text node at the specified location with contents ## `text`. If the specified location has a previous sibling that is ## a text node, no new text node should be created, but instead `text` ## should be appended to the previous sibling's character data. DOMBuilderCreateDocumentType*[Handle] = proc(builder: DOMBuilder[Handle], name, publicId, systemId: string): Handle {.nimcall.} ## Create a new document type node. DOMBuilderInsertBefore*[Handle] = proc(builder: DOMBuilder[Handle], parent, child, before: Handle) {.nimcall.} ## Insert node `child` before the node called `before`. ## ## If `before` is nil, `child` is expected to be appended to `parent`'s ## node list. ## ## If `child` is a text, and its previous sibling after insertion is a ## text as well, then they should be merged. `before` is never a ## text node (and thus never has to be merged). ## ## Note: parent may either be an Element or a Document node. DOMBuilderRemove*[Handle] = proc(builder: DOMBuilder[Handle], child: Handle) {.nimcall.} ## Remove `child` from its parent node, and do nothing if `child` ## has no parent node. DOMBuilderReparent*[Handle] = proc(builder: DOMBuilder[Handle], child, newParent: Handle) {.nimcall.} ## Remove `child` from its parent node, and append it to `newParent`. ## In terms of DOM operations, this should be equivalent to calling ## `child.remove()`, followed by `newParent.append(child)`. DOMBuilderAddAttrsIfMissing*[Handle] = proc(builder: DOMBuilder[Handle], element: Handle, attrs: Table[string, string]) {.nimcall.} ## Add the attributes in `attrs` to the element node `element`. ## At the time of writing, called for HTML and BODY only. (This may ## change in the future.) ## An example implementation: ## ```nim ## for k, v in attrs: ## if k notin element.attrs: ## element.attrs[k] = v ## ``` DOMBuilderSetScriptAlreadyStarted*[Handle] = proc(builder: DOMBuilder[Handle], script: Handle) {.nimcall.} ## Set the "already started" flag for the script element. ## ## Note: this flag is not togglable, so this callback should just set it ## to true. DOMBuilderAssociateWithForm*[Handle] = proc(builder: DOMBuilder[Handle], element, form, intendedParent: Handle) {.nimcall.} ## Called after createElement. Attempts to set form for form-associated ## elements. ## ## Note: the DOM builder is responsible for checking whether the ## intended parent and the form element are in the same tree. DOMBuilderIsSVGIntegrationPoint*[Handle] = proc(builder: DOMBuilder[Handle], element: Handle): bool {.nimcall.} ## Check if element is an SVG integration point. type CharsetConfidence = enum CONFIDENCE_TENTATIVE, CONFIDENCE_CERTAIN, CONFIDENCE_IRRELEVANT HTML5Parser[Handle] = object quirksMode: QuirksMode dombuilder: DOMBuilder[Handle] opts: HTML5ParserOpts[Handle] ctx: Option[Handle] needsreinterpret: bool charset: Charset confidence: CharsetConfidence openElements: seq[Handle] insertionMode: InsertionMode oldInsertionMode: InsertionMode templateModes: seq[InsertionMode] head: Option[Handle] tokenizer: Tokenizer form: Option[Handle] fosterParenting: bool # Handle is an element. nil => marker activeFormatting: seq[(Handle, Token)] framesetok: bool ignoreLF: bool pendingTableChars: string pendingTableCharsWhitespace: bool AdjustedInsertionLocation[Handle] = tuple[inside, before: Handle] # 13.2.4.1 InsertionMode = enum INITIAL, BEFORE_HTML, BEFORE_HEAD, IN_HEAD, IN_HEAD_NOSCRIPT, AFTER_HEAD, IN_BODY, TEXT, IN_TABLE, IN_TABLE_TEXT, IN_CAPTION, IN_COLUMN_GROUP, IN_TABLE_BODY, IN_ROW, IN_CELL, IN_SELECT, IN_SELECT_IN_TABLE, IN_TEMPLATE, AFTER_BODY, IN_FRAMESET, AFTER_FRAMESET, AFTER_AFTER_BODY, AFTER_AFTER_FRAMESET # DOMBuilder interface functions proc finish[Handle](parser: HTML5Parser[Handle]) = if parser.dombuilder.finish != nil: parser.dombuilder.finish(parser.dombuilder) proc parseError(parser: HTML5Parser, e: ParseError) = if parser.dombuilder.parseError != nil: parser.dombuilder.parseError(parser.dombuilder, e) proc setQuirksMode[Handle](parser: var HTML5Parser[Handle], mode: QuirksMode) = parser.quirksMode = mode if parser.dombuilder.setQuirksMode != nil: parser.dombuilder.setQuirksMode(parser.dombuilder, mode) func document[Handle](parser: HTML5Parser[Handle]): Handle {.inline.} = return parser.dombuilder.document func getTemplateContent[Handle](parser: HTML5Parser[Handle], handle: Handle): Handle = let dombuilder = parser.dombuilder return dombuilder.getTemplateContent(dombuilder, handle) func getParentNode[Handle](parser: HTML5Parser[Handle], handle: Handle): Handle = let dombuilder = parser.dombuilder return dombuilder.getParentNode(dombuilder, handle) func getLocalName[Handle](parser: HTML5Parser[Handle], handle: Handle): string = return parser.dombuilder.getLocalName(parser.dombuilder, handle) func getTagType[Handle](parser: HTML5Parser[Handle], handle: Handle): TagType = if parser.dombuilder.getTagType != nil: return parser.dombuilder.getTagType(parser.dombuilder, handle) return tagType(parser.getLocalName(handle)) func getNamespace[Handle](parser: HTML5Parser[Handle], handle: Handle): Namespace = if parser.dombuilder.getNamespace != nil: return parser.dombuilder.getNamespace(parser.dombuilder, handle) return Namespace.HTML func createElement[Handle](parser: HTML5Parser[Handle], localName: string, namespace: Namespace, tagType: TagType, attrs: Table[string, string]): Handle = return parser.dombuilder.createElement(parser.dombuilder, localName, namespace, tagType, attrs) func createElement[Handle](parser: HTML5Parser[Handle], tagType: TagType, namespace: Namespace): Handle = return parser.createElement(tagName(tagType), namespace, tagType, Table[string, string]()) func createComment[Handle](parser: HTML5Parser[Handle], text: string): Handle = let dombuilder = parser.dombuilder return dombuilder.createComment(dombuilder, text) proc createDocumentType[Handle](parser: HTML5Parser[Handle], name, publicId, systemId: string): Handle = let dombuilder = parser.dombuilder return dombuilder.createDocumentType(dombuilder, name, publicId, systemId) proc insertBefore[Handle](parser: HTML5Parser[Handle], parent, node, before: Handle) = let dombuilder = parser.dombuilder dombuilder.insertBefore(dombuilder, parent, node, before) proc insertText[Handle](parser: HTML5Parser[Handle], parent: Handle, text: string, before: Handle) = let dombuilder = parser.dombuilder dombuilder.insertText(dombuilder, parent, text, before) proc remove[Handle](parser: HTML5Parser[Handle], child: Handle) = let dombuilder = parser.dombuilder dombuilder.remove(dombuilder, child) proc addAttrsIfMissing[Handle](parser: HTML5Parser, element: Handle, attrs: Table[string, string]) = let dombuilder = parser.dombuilder if dombuilder.addAttrsIfMissing != nil: dombuilder.addAttrsIfMissing(dombuilder, element, attrs) proc setScriptAlreadyStarted[Handle](parser: HTML5Parser, script: Handle) = let dombuilder = parser.dombuilder if dombuilder.setScriptAlreadyStarted != nil: dombuilder.setScriptAlreadyStarted(dombuilder, script) proc associateWithForm[Handle](parser: HTML5Parser, element, form, intendedParent: Handle) = let dombuilder = parser.dombuilder if dombuilder.associateWithForm != nil: dombuilder.associateWithForm(dombuilder, element, form, intendedParent) func isSVGIntegrationPoint[Handle](parser: HTML5Parser, element: Handle): bool = let dombuilder = parser.dombuilder if dombuilder.isSVGIntegrationPoint != nil: return dombuilder.isSVGIntegrationPoint(dombuilder, element) return false # Parser func hasParseError(parser: HTML5Parser): bool = return parser.dombuilder.parseError != nil func tagNameEquals[Handle](parser: HTML5Parser, handle: Handle, token: Token): bool = let tagType = parser.getTagType(handle) if tagType != TAG_UNKNOWN: return tagType == token.tagtype let localName = parser.getLocalName(handle) return localName == token.tagname func tagNameEquals[Handle](parser: HTML5Parser, a, b: Handle): bool = let tagType = parser.getTagType(a) if tagType != TAG_UNKNOWN: return tagType == parser.getTagType(b) return parser.getLocalName(a) == parser.getLocalName(b) func fragment(parser: HTML5Parser): bool = return parser.ctx.isSome # https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately proc resetInsertionMode(parser: var HTML5Parser) = template switch_insertion_mode_and_return(mode: InsertionMode) = parser.insertionMode = mode return for i in countdown(parser.openElements.high, 0): var node = parser.openElements[i] let last = i == 0 if parser.fragment: node = parser.ctx.get let tagType = parser.getTagType(node) if tagType == TAG_SELECT: if not last: for j in countdown(parser.openElements.high, 1): let ancestor = parser.openElements[j] case parser.getTagType(ancestor) of TAG_TEMPLATE: break of TAG_TABLE: switch_insertion_mode_and_return IN_SELECT_IN_TABLE else: discard switch_insertion_mode_and_return IN_SELECT case tagType of TAG_TD, TAG_TH: if not last: switch_insertion_mode_and_return IN_CELL of TAG_TR: switch_insertion_mode_and_return IN_ROW of TAG_TBODY, TAG_THEAD, TAG_TFOOT: switch_insertion_mode_and_return IN_CAPTION of TAG_COLGROUP: switch_insertion_mode_and_return IN_COLUMN_GROUP of TAG_TABLE: switch_insertion_mode_and_return IN_TABLE of TAG_TEMPLATE: switch_insertion_mode_and_return parser.templateModes[^1] of TAG_HEAD: if not last: switch_insertion_mode_and_return IN_HEAD of TAG_BODY: switch_insertion_mode_and_return IN_BODY of TAG_FRAMESET: switch_insertion_mode_and_return IN_FRAMESET of TAG_HTML: if parser.head.isNone: switch_insertion_mode_and_return BEFORE_HEAD else: switch_insertion_mode_and_return AFTER_HEAD else: discard if last: switch_insertion_mode_and_return IN_BODY func currentNode[Handle](parser: HTML5Parser[Handle]): Handle = return parser.openElements[^1] func adjustedCurrentNode[Handle](parser: HTML5Parser[Handle]): Handle = if parser.fragment: parser.ctx.get else: parser.currentNode func lastElementOfTag[Handle](parser: HTML5Parser[Handle], tagType: TagType): tuple[element: Handle, pos: int] = for i in countdown(parser.openElements.high, 0): if parser.getTagType(parser.openElements[i]) == tagType: return (parser.openElements[i], i) return (nil, -1) template last_child_of[Handle](n: Handle): AdjustedInsertionLocation[Handle] = (n, nil) # https://html.spec.whatwg.org/multipage/#appropriate-place-for-inserting-a-node func appropriatePlaceForInsert[Handle](parser: HTML5Parser[Handle], target: Handle): AdjustedInsertionLocation[Handle] = assert parser.getTagType(parser.openElements[0]) == TAG_HTML let targetTagType = parser.getTagType(target) const FosterTagTypes = {TAG_TABLE, TAG_TBODY, TAG_TFOOT, TAG_THEAD, TAG_TR} if parser.fosterParenting and targetTagType in FosterTagTypes: let lastTemplate = parser.lastElementOfTag(TAG_TEMPLATE) let lastTable = parser.lastElementOfTag(TAG_TABLE) if lastTemplate.element != nil and parser.dombuilder.getTemplateContent != nil and (lastTable.element == nil or lastTable.pos < lastTemplate.pos): let content = parser.getTemplateContent(lastTemplate.element) return last_child_of(content) if lastTable.element == nil: return last_child_of(parser.openElements[0]) let parentNode = parser.getParentNode(lastTable.element) if parentNode != nil: return (parentNode, lastTable.element) let previousElement = parser.openElements[lastTable.pos - 1] result = last_child_of(previousElement) else: result = last_child_of(target) if parser.getTagType(result.inside) == TAG_TEMPLATE and parser.dombuilder.getTemplateContent != nil: result = (parser.getTemplateContent(result.inside), nil) func appropriatePlaceForInsert[Handle](parser: HTML5Parser[Handle]): AdjustedInsertionLocation[Handle] = parser.appropriatePlaceForInsert(parser.currentNode) func hasElement[Handle](parser: HTML5Parser[Handle], tag: TagType): bool = for element in parser.openElements: if parser.getTagType(element) == tag: return true return false func hasElement[Handle](parser: HTML5Parser[Handle], tags: set[TagType]): bool = for element in parser.openElements: if parser.getTagType(element) in tags: return true return false func hasElementInSpecificScope[Handle](parser: HTML5Parser[Handle], target: Handle, list: set[TagType]): bool = for i in countdown(parser.openElements.high, 0): if parser.openElements[i] == target: return true if parser.getTagType(parser.openElements[i]) in list: return false assert false func hasElementInSpecificScope[Handle](parser: HTML5Parser[Handle], target: TagType, list: set[TagType]): bool = for i in countdown(parser.openElements.high, 0): let tagType = parser.getTagType(parser.openElements[i]) if tagType == target: return true if tagType in list: return false assert false func hasElementInSpecificScope[Handle](parser: HTML5Parser[Handle], target: set[TagType], list: set[TagType]): bool = for i in countdown(parser.openElements.high, 0): let tagType = parser.getTagType(parser.openElements[i]) if tagType in target: return true if tagType in list: return false assert false const Scope = { TAG_APPLET, TAG_CAPTION, TAG_HTML, TAG_TABLE, TAG_TD, TAG_TH, TAG_MARQUEE, TAG_OBJECT, TAG_TEMPLATE #TODO SVG # Note: MathML is not implemented } func hasElementInScope[Handle](parser: HTML5Parser[Handle], target: TagType): bool = return parser.hasElementInSpecificScope(target, Scope) func hasElementInScope[Handle](parser: HTML5Parser[Handle], target: set[TagType]): bool = return parser.hasElementInSpecificScope(target, Scope) func hasElementInScope[Handle](parser: HTML5Parser[Handle], target: Handle): bool = return parser.hasElementInSpecificScope(target, Scope) func hasElementInListItemScope[Handle](parser: HTML5Parser[Handle], target: TagType): bool = const ListItemScope = Scope + {TAG_OL, TAG_UL} return parser.hasElementInSpecificScope(target, ListItemScope) func hasElementInButtonScope[Handle](parser: HTML5Parser[Handle], target: TagType): bool = const ButtonScope = Scope + {TAG_BUTTON} return parser.hasElementInSpecificScope(target, ButtonScope) const TableScope = {TAG_HTML, TAG_TABLE, TAG_TEMPLATE} func hasElementInTableScope[Handle](parser: HTML5Parser[Handle], target: TagType): bool = return parser.hasElementInSpecificScope(target, TableScope) func hasElementInTableScope[Handle](parser: HTML5Parser[Handle], target: set[TagType]): bool = return parser.hasElementInSpecificScope(target, TableScope) func hasElementInSelectScope[Handle](parser: HTML5Parser[Handle], target: TagType): bool = for i in countdown(parser.openElements.high, 0): let tagType = parser.getTagType(parser.openElements[i]) if tagType == target: return true if tagType notin {TAG_OPTION, TAG_OPTGROUP}: return false assert false func createElement[Handle](parser: HTML5Parser[Handle], token: Token, namespace: Namespace, intendedParent: Handle): Handle = #TODO custom elements let localName = token.tagname let element = parser.createElement(localName, namespace, token.tagtype, token.attrs) if token.tagtype in FormAssociatedElements and parser.form.isSome and not parser.hasElement(TAG_TEMPLATE) and (token.tagtype notin ListedElements or "form" notin token.attrs): parser.associateWithForm(element, parser.form.get, intendedParent) return element proc pushElement[Handle](parser: var HTML5Parser[Handle], node: Handle) = parser.openElements.add(node) let node = parser.adjustedCurrentNode() parser.tokenizer.hasnonhtml = parser.getNamespace(node) != Namespace.HTML proc popElement[Handle](parser: var HTML5Parser[Handle]): Handle = result = parser.openElements.pop() if parser.dombuilder.elementPopped != nil: parser.dombuilder.elementPopped(parser.dombuilder, result) if parser.openElements.len == 0: parser.tokenizer.hasnonhtml = false else: let node = parser.adjustedCurrentNode() parser.tokenizer.hasnonhtml = parser.getNamespace(node) != Namespace.HTML template pop_current_node = discard parser.popElement() proc insert[Handle](parser: HTML5Parser[Handle], location: AdjustedInsertionLocation[Handle], node: Handle) = parser.insertBefore(location.inside, node, location.before) proc append[Handle](parser: HTML5Parser[Handle], parent, node: Handle) = parser.insertBefore(parent, node, nil) proc insertForeignElement[Handle](parser: var HTML5Parser[Handle], token: Token, namespace: Namespace): Handle = let location = parser.appropriatePlaceForInsert() let element = parser.createElement(token, namespace, location.inside) #TODO custom elements parser.insert(location, element) parser.pushElement(element) return element proc insertHTMLElement[Handle](parser: var HTML5Parser[Handle], token: Token): Handle = return parser.insertForeignElement(token, Namespace.HTML) proc adjustSVGAttributes(token: Token) = const adjusted = { "attributename": "attributeName", "attributetype": "attributeType", "basefrequency": "baseFrequency", "baseprofile": "baseProfile", "calcmode": "calcMode", "clippathunits": "clipPathUnits", "diffuseconstant": "diffuseConstant", "edgemode": "edgeMode", "filterunits": "filterUnits", "glyphref": "glyphRef", "gradienttransform": "gradientTransform", "gradientunits": "gradientUnits", "kernelmatrix": "kernelMatrix", "kernelunitlength": "kernelUnitLength", "keypoints": "keyPoints", "keysplines": "keySplines", "keytimes": "keyTimes", "lengthadjust": "lengthAdjust", "limitingconeangle": "limitingConeAngle", "markerheight": "markerHeight", "markerunits": "markerUnits", "markerwidth": "markerWidth", "maskcontentunits": "maskContentUnits", "maskunits": "maskUnits", "numoctaves": "numOctaves", "pathlength": "pathLength", "patterncontentunits": "patternContentUnits", "patterntransform": "patternTransform", "patternunits": "patternUnits", "pointsatx": "pointsAtX", "pointsaty": "pointsAtY", "pointsatz": "pointsAtZ", "preservealpha": "preserveAlpha", "preserveaspectratio": "preserveAspectRatio", "primitiveunits": "primitiveUnits", "refx": "refX", "refy": "refY", "repeatcount": "repeatCount", "repeatdur": "repeatDur", "requiredextensions": "requiredExtensions", "requiredfeatures": "requiredFeatures", "specularconstant": "specularConstant", "specularexponent": "specularExponent", "spreadmethod": "spreadMethod", "startoffset": "startOffset", "stddeviation": "stdDeviation", "stitchtiles": "stitchTiles", "surfacescale": "surfaceScale", "systemlanguage": "systemLanguage", "tablevalues": "tableValues", "targetx": "targetX", "targety": "targetY", "textlength": "textLength", "viewbox": "viewBox", "viewtarget": "viewTarget", "xchannelselector": "xChannelSelector", "ychannelselector": "yChannelSelector", "zoomandpan": "zoomAndPan", }.toTable() var todo: seq[string] for k in token.attrs.keys: if k in adjusted: todo.add(k) for s in todo: token.attrs[adjusted[s]] = token.attrs[s] template insert_character_impl(parser: var HTML5Parser, data: typed) = let location = parser.appropriatePlaceForInsert() if location.inside.nodeType == DOCUMENT_NODE: return insertText(parser, location.inside, $data, location.before) proc insertCharacter(parser: var HTML5Parser, data: string) = insert_character_impl(parser, data) proc insertCharacter(parser: var HTML5Parser, data: char) = insert_character_impl(parser, data) proc insertCharacter(parser: var HTML5Parser, data: Rune) = insert_character_impl(parser, data) proc insertComment[Handle](parser: var HTML5Parser[Handle], token: Token, position: AdjustedInsertionLocation[Handle]) = let comment = parser.createComment(token.data) parser.insert(position, comment) proc insertComment(parser: var HTML5Parser, token: Token) = let position = parser.appropriatePlaceForInsert() parser.insertComment(token, position) const PublicIdentifierEquals = [ "-//W3O//DTD W3 HTML Strict 3.0//EN//", "-/W3C/DTD HTML 4.0 Transitional/EN", "HTML" ] const PublicIdentifierStartsWith = [ "+//Silmaril//dtd html Pro v0r11 19970101//", "-//AS//DTD HTML 3.0 asWedit + extensions//", "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", "-//IETF//DTD HTML 2.0 Level 1//", "-//IETF//DTD HTML 2.0 Level 2//", "-//IETF//DTD HTML 2.0 Strict Level 1//", "-//IETF//DTD HTML 2.0 Strict Level 2//", "-//IETF//DTD HTML 2.0 Strict//", "-//IETF//DTD HTML 2.0//", "-//IETF//DTD HTML 2.1E//", "-//IETF//DTD HTML 3.0//", "-//IETF//DTD HTML 3.2 Final//", "-//IETF//DTD HTML 3.2//", "-//IETF//DTD HTML 3//", "-//IETF//DTD HTML Level 0//", "-//IETF//DTD HTML Level 1//", "-//IETF//DTD HTML Level 2//", "-//IETF//DTD HTML Level 3//", "-//IETF//DTD HTML Strict Level 0//", "-//IETF//DTD HTML Strict Level 1//", "-//IETF//DTD HTML Strict Level 2//", "-//IETF//DTD HTML Strict Level 3//", "-//IETF//DTD HTML Strict//", "-//IETF//DTD HTML//", "-//Metrius//DTD Metrius Presentational//", "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", "-//Microsoft//DTD Internet Explorer 2.0 HTML//", "-//Microsoft//DTD Internet Explorer 2.0 Tables//", "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", "-//Microsoft//DTD Internet Explorer 3.0 HTML//", "-//Microsoft//DTD Internet Explorer 3.0 Tables//", "-//Netscape Comm. Corp.//DTD HTML//", "-//Netscape Comm. Corp.//DTD Strict HTML//", "-//O'Reilly and Associates//DTD HTML 2.0//", "-//O'Reilly and Associates//DTD HTML Extended 1.0//", "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", "-//Spyglass//DTD HTML 2.0 Extended//", "-//Sun Microsystems Corp.//DTD HotJava HTML//", "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", "-//W3C//DTD HTML 3 1995-03-24//", "-//W3C//DTD HTML 3.2 Draft//", "-//W3C//DTD HTML 3.2 Final//", "-//W3C//DTD HTML 3.2//", "-//W3C//DTD HTML 3.2S Draft//", "-//W3C//DTD HTML 4.0 Frameset//", "-//W3C//DTD HTML 4.0 Transitional//", "-//W3C//DTD HTML Experimental 19960712//", "-//W3C//DTD HTML Experimental 970421//", "-//W3C//DTD W3 HTML//", "-//W3O//DTD W3 HTML 3.0//", "-//WebTechs//DTD Mozilla HTML 2.0//", "-//WebTechs//DTD Mozilla HTML//", ] const SystemIdentifierMissingAndPublicIdentifierStartsWith = [ "-//W3C//DTD HTML 4.01 Frameset//", "-//W3C//DTD HTML 4.01 Transitional//" ] const PublicIdentifierStartsWithLimited = [ "-//W3C//DTD XHTML 1.0 Frameset//", "-//W3C//DTD XHTML 1.0 Transitional//" ] const SystemIdentifierNotMissingAndPublicIdentifierStartsWith = [ "-//W3C//DTD HTML 4.01 Frameset//", "-//W3C//DTD HTML 4.01 Transitional//" ] func quirksConditions(token: Token): bool = if token.quirks: return true if token.name.isnone or token.name.get != "html": return true if token.sysid.issome: if token.sysid.get == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd": return true if token.pubid.issome: if token.pubid.get in PublicIdentifierEquals: return true for id in PublicIdentifierStartsWith: if token.pubid.get.startsWithNoCase(id): return true if token.sysid.isnone: for id in SystemIdentifierMissingAndPublicIdentifierStartsWith: if token.pubid.get.startsWithNoCase(id): return true return false func limitedQuirksConditions(token: Token): bool = if token.pubid.isnone: return false for id in PublicIdentifierStartsWithLimited: if token.pubid.get.startsWithNoCase(id): return true if token.sysid.isnone: return false for id in SystemIdentifierNotMissingAndPublicIdentifierStartsWith: if token.pubid.get.startsWithNoCase(id): return true return false # 13.2.6.2 proc genericRawtextElementParsingAlgorithm(parser: var HTML5Parser, token: Token) = discard parser.insertHTMLElement(token) parser.tokenizer.state = RAWTEXT parser.oldInsertionMode = parser.insertionMode parser.insertionMode = TEXT proc genericRCDATAElementParsingAlgorithm(parser: var HTML5Parser, token: Token) = discard parser.insertHTMLElement(token) parser.tokenizer.state = RCDATA parser.oldInsertionMode = parser.insertionMode parser.insertionMode = TEXT # Pop all elements, including the specified tag. proc popElementsIncl(parser: var HTML5Parser, tag: TagType) = while parser.getTagType(parser.popElement()) != tag: discard proc popElementsIncl(parser: var HTML5Parser, tags: set[TagType]) = while parser.getTagType(parser.popElement()) notin tags: discard # https://html.spec.whatwg.org/multipage/parsing.html#closing-elements-that-have-implied-end-tags proc generateImpliedEndTags(parser: var HTML5Parser) = const tags = {TAG_DD, TAG_DT, TAG_LI, TAG_OPTGROUP, TAG_OPTION, TAG_P, TAG_RB, TAG_RP, TAG_RT, TAG_RTC} while parser.getTagType(parser.currentNode) in tags: discard parser.popElement() proc generateImpliedEndTags(parser: var HTML5Parser, exclude: TagType) = let tags = { TAG_DD, TAG_DT, TAG_LI, TAG_OPTGROUP, TAG_OPTION, TAG_P, TAG_RB, TAG_RP, TAG_RT, TAG_RTC } - {exclude} while parser.getTagType(parser.currentNode) in tags: discard parser.popElement() proc generateImpliedEndTagsThoroughly(parser: var HTML5Parser) = const tags = {TAG_CAPTION, TAG_COLGROUP, TAG_DD, TAG_DT, TAG_LI, TAG_OPTGROUP, TAG_OPTION, TAG_P, TAG_RB, TAG_RP, TAG_RT, TAG_RTC, TAG_TBODY, TAG_TD, TAG_TFOOT, TAG_TH, TAG_THEAD, TAG_TR} while parser.getTagType(parser.currentNode) in tags: discard parser.popElement() # https://html.spec.whatwg.org/multipage/parsing.html#push-onto-the-list-of-active-formatting-elements proc pushOntoActiveFormatting[Handle](parser: var HTML5Parser[Handle], element: Handle, token: Token) = var count = 0 for i in countdown(parser.activeFormatting.high, 0): let it = parser.activeFormatting[i] if it[0] == nil: break if not parser.tagNameEquals(it[0], element): continue if parser.getNamespace(it[0]) != parser.getNamespace(element): continue var fail = false for k, v in it[1].attrs: if k notin token.attrs: fail = true break if v != token.attrs[k]: fail = true break if fail: continue for k, v in token.attrs: if k notin it[1].attrs: fail = true break if fail: continue inc count if count == 3: parser.activeFormatting.delete(i) break parser.activeFormatting.add((element, token)) proc reconstructActiveFormatting[Handle](parser: var HTML5Parser[Handle]) = type State = enum REWIND, ADVANCE, CREATE if parser.activeFormatting.len == 0: return if parser.activeFormatting[^1][0] == nil: return let tagType = parser.getTagType(parser.activeFormatting[^1][0]) if parser.hasElement(tagType): return var i = parser.activeFormatting.high template entry: Handle = (parser.activeFormatting[i][0]) var state = REWIND while true: {.computedGoto.} case state of REWIND: if i == 0: state = CREATE continue dec i if entry != nil: let tagType = parser.getTagType(entry) if not parser.hasElement(tagType): continue state = ADVANCE of ADVANCE: inc i state = CREATE of CREATE: parser.activeFormatting[i] = (parser.insertHTMLElement(parser.activeFormatting[i][1]), parser.activeFormatting[i][1]) if i != parser.activeFormatting.high: state = ADVANCE continue break proc clearActiveFormattingTillMarker(parser: var HTML5Parser) = while parser.activeFormatting.len > 0 and parser.activeFormatting.pop()[0] != nil: discard func isHTMLIntegrationPoint[Handle](parser: HTML5Parser[Handle], element: Handle): bool = return parser.isSVGIntegrationPoint(element) # (NOTE MathML not implemented) func extractEncFromMeta(s: string): Charset = var i = 0 while true: # Loop: var j = 0 while i < s.len: template check(c: static char) = if s[i] in {c, c.toUpperAscii()}: inc j else: j = 0 case j of 0: check 'c' of 1: check 'h' of 2: check 'a' of 3: check 'r' of 4: check 's' of 5: check 'e' of 6: check 't' of 7: inc j break else: discard inc i if j < 7: return CHARSET_UNKNOWN while i < s.len and s[i] in AsciiWhitespace: inc i if i >= s.len or s[i] != '=': continue while i < s.len and s[i] in AsciiWhitespace: inc i break inc i if i >= s.len: return CHARSET_UNKNOWN if s[i] in {'"', '\''}: let s2 = s.substr(i + 1).until(s[i]) if s2.len == 0 or s2[^1] != s[i]: return CHARSET_UNKNOWN return getCharset(s2) return getCharset(s.substr(i).until({';', ' '})) proc changeEncoding(parser: var HTML5Parser, cs: Charset) = if parser.charset in {CHARSET_UTF_16_LE, CHARSET_UTF_16_BE}: parser.confidence = CONFIDENCE_CERTAIN return parser.confidence = CONFIDENCE_CERTAIN if cs == parser.charset: return if cs == CHARSET_X_USER_DEFINED: parser.charset = CHARSET_WINDOWS_1252 else: parser.charset = cs parser.needsreinterpret = true proc parseErrorByTokenType(parser: var HTML5Parser, tokenType: TokenType) = case tokenType of START_TAG: parser.parseError UNEXPECTED_START_TAG of END_TAG: parser.parseError UNEXPECTED_END_TAG of EOF: parser.parseError UNEXPECTED_EOF else: doAssert false proc adoptionAgencyAlgorithm[Handle](parser: var HTML5Parser[Handle], token: Token): bool = template parse_error(e: ParseError) = parser.parseError(e) if parser.tagNameEquals(parser.currentNode, token): var fail = true for it in parser.activeFormatting: if it[0] == parser.currentNode: fail = false if fail: pop_current_node return false var i = 0 while true: if i >= 8: return false inc i if parser.activeFormatting.len == 0: return true var formatting: Handle var formattingIndex: int for j in countdown(parser.activeFormatting.high, 0): let element = parser.activeFormatting[j][0] if element == nil: return true if parser.tagNameEquals(parser.currentNode, token): formatting = element formattingIndex = j break if j == 0: return true let stackIndex = parser.openElements.find(formatting) if stackIndex < 0: parse_error ELEMENT_NOT_IN_OPEN_ELEMENTS parser.activeFormatting.delete(formattingIndex) return false if not parser.hasElementInScope(formatting): parse_error ELEMENT_NOT_IN_SCOPE return false if formatting != parser.currentNode: parse_error ELEMENT_NOT_CURRENT_NODE var furthestBlock: Handle = nil var furthestBlockIndex: int for j in countdown(parser.openElements.high, 0): if parser.openElements[j] == formatting: break if parser.getTagType(parser.openElements[j]) in SpecialElements: furthestBlock = parser.openElements[j] furthestBlockIndex = j break if furthestBlock == nil: while parser.popElement() != formatting: discard parser.activeFormatting.delete(formattingIndex) return false let commonAncestor = parser.openElements[stackIndex - 1] var bookmark = formattingIndex var node = furthestBlock var aboveNode = parser.openElements[furthestBlockIndex - 1] var lastNode = furthestBlock var j = 0 while true: inc j node = aboveNode let nodeStackIndex = parser.openElements.find(node) if node == formatting: break var nodeFormattingIndex = -1 for i in countdown(parser.activeFormatting.high, 0): if parser.activeFormatting[i][0] == node: nodeFormattingIndex = i break if j > 3 and nodeFormattingIndex >= 0: parser.activeFormatting.delete(nodeFormattingIndex) if nodeFormattingIndex < bookmark: dec bookmark # a previous node got deleted, so decrease bookmark by one if nodeFormattingIndex < 0: aboveNode = parser.openElements[nodeStackIndex - 1] parser.openElements.delete(nodeStackIndex) if nodeStackIndex < furthestBlockIndex: dec furthestBlockIndex furthestBlock = parser.openElements[furthestBlockIndex] continue let element = parser.createElement(parser.activeFormatting[nodeFormattingIndex][1], Namespace.HTML, commonAncestor) parser.activeFormatting[nodeFormattingIndex] = (element, parser.activeFormatting[nodeFormattingIndex][1]) parser.openElements[nodeStackIndex] = element aboveNode = parser.openElements[nodeStackIndex - 1] node = element if lastNode == furthestBlock: bookmark = nodeFormattingIndex + 1 parser.append(node, lastNode) lastNode = node let location = parser.appropriatePlaceForInsert(commonAncestor) location.inside.insert(lastNode, location.before) let token = parser.activeFormatting[formattingIndex][1] let element = parser.createElement(token, Namespace.HTML, furthestBlock) var tomove: seq[Handle] j = furthestBlock.childList.high while j >= 0: let child = furthestBlock.childList[j] tomove.add(child) parser.remove(child) dec j for child in tomove: parser.append(element, child) parser.append(furthestBlock, element) parser.activeFormatting.insert((element, token), bookmark) parser.activeFormatting.delete(formattingIndex) parser.openElements.insert(element, furthestBlockIndex) parser.openElements.delete(stackIndex) proc closeP(parser: var HTML5Parser) = parser.generateImpliedEndTags(TAG_P) if parser.getTagType(parser.currentNode) != TAG_P: parser.parseError(MISMATCHED_TAGS) while parser.getTagType(parser.popElement()) != TAG_P: discard # Following is an implementation of the state (?) machine defined in # https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhtml # It uses the ad-hoc pattern matching macro `match' to apply the following # transformations: # * First, pairs of patterns and actions are stored in tuples (and `discard' # statements...) # * These pairs are then assigned to token types, later mapped to legs of the # first case statement. # * Another case statement is constructed where needed, e.g. for switching on # characters/tags/etc. # * Finally, the whole thing is wrapped in a named block, to implement a # pseudo-goto by breaking out only when the else statement needn't be # executed. # # For example, the following code: # # match token: # TokenType.COMMENT => (block: echo "comment") # ("