diff options
author | bptato <nincsnevem662@gmail.com> | 2021-08-05 17:05:20 +0200 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2021-08-05 17:05:20 +0200 |
commit | 69a0f081e6eefdd6a52b0da6586100349b1a6ea8 (patch) | |
tree | 23a61bc20918809fb99927071feca01a55e37c92 /src/html | |
parent | caad7b577162a73524277a943050493c489bfb59 (diff) | |
download | chawan-69a0f081e6eefdd6a52b0da6586100349b1a6ea8.tar.gz |
more stuff
Diffstat (limited to 'src/html')
-rw-r--r-- | src/html/dom.nim | 15 | ||||
-rw-r--r-- | src/html/htmlparser.nim | 92 |
2 files changed, 54 insertions, 53 deletions
diff --git a/src/html/dom.nim b/src/html/dom.nim index 1a393134..2227aece 100644 --- a/src/html/dom.nim +++ b/src/html/dom.nim @@ -66,6 +66,7 @@ type all_elements*: seq[Element] head*: HTMLElement body*: HTMLElement + root*: Element CharacterData* = ref CharacterDataObj CharacterDataObj = object of NodeObj @@ -259,14 +260,8 @@ proc getRawText*(htmlNode: Node): string = else: return "" elif htmlNode.isTextNode(): let chardata = CharacterData(htmlNode) - #eprint "char data", chardata.data if htmlNode.parentElement != nil and htmlNode.parentElement.tagType != TAG_PRE: result = chardata.data.remove("\n") - #if unicode.strip(result).runeLen() > 0: - # if htmlNode.getStyle().display != DISPLAY_INLINE: - # result = unicode.strip(result) - #else: - # result = "" else: result = unicode.strip(chardata.data) if htmlNode.parentElement != nil and htmlNode.parentElement.tagType == TAG_OPTION: @@ -334,6 +329,7 @@ func newHtmlElement*(tagType: TagType): HTMLElement = func newDocument*(): Document = new(result) + result.root = newHtmlElement(TAG_HTML) result.head = newHtmlElement(TAG_HEAD) result.body = newHtmlElement(TAG_BODY) result.nodeType = DOCUMENT_NODE @@ -467,7 +463,8 @@ func calcRules(elem: Element, rules: CSSStylesheet): seq[CSSSimpleBlock] = proc applyRules*(document: Document, rules: CSSStylesheet): seq[tuple[e:Element,d:CSSDeclaration]] = var stack: seq[Element] - stack.add(document.firstElementChild) + stack.add(document.root) + while stack.len > 0: let elem = stack.pop() for oblock in calcRules(elem, rules): @@ -475,9 +472,9 @@ proc applyRules*(document: Document, rules: CSSStylesheet): seq[tuple[e:Element, for item in decls: if item of CSSDeclaration: if ((CSSDeclaration)item).important: - result.add((elem, (CSSDeclaration)item)) + result.add((elem, CSSDeclaration(item))) else: - elem.style.applyProperty((CSSDeclaration)item) + elem.style.applyProperty(CSSDeclaration(item)) for child in elem.children: stack.add(child) diff --git a/src/html/htmlparser.nim b/src/html/htmlparser.nim index 3cfc1d8d..67aec2e4 100644 --- a/src/html/htmlparser.nim +++ b/src/html/htmlparser.nim @@ -28,8 +28,9 @@ type in_style: bool in_noscript: bool in_body: bool - parentNode: Node + elementNode: Element textNode: Text + commentNode: Comment func inputSize*(str: string): int = if str.len == 0: @@ -88,38 +89,38 @@ proc getescapecmd(buf: string, at: var int): string = elif not isAlphaAscii(buf[i]): return "" - when defined(small): - var n = entityMap + when defined(full): + var n = 0 var s = "" while true: s &= buf[i] if not entityMap.hasPrefix(s, n): break let pn = n - n = n{s} + n = entityMap{s, n} if n != pn: s = "" inc i - if n.leaf: + if entityMap.nodes[n].leaf: at = i - return n.value + return entityMap.nodes[n].value else: - var n = 0 + var n = entityMap var s = "" while true: s &= buf[i] if not entityMap.hasPrefix(s, n): break let pn = n - n = entityMap{s, n} + n = n{s} if n != pn: s = "" inc i - if entityMap.nodes[n].leaf: + if n.leaf: at = i - return entityMap.nodes[n].value + return n.value return "" @@ -163,12 +164,13 @@ proc parse_tag(buf: string, at: var int): DOMParsedTag = let startc = buf[at] inc at while at < buf.len and buf[at] != startc: - var r: Rune - fastRuneAt(buf, at, r) - if r == Rune('&'): + if buf[at + 1] == '&': + inc at value &= getescapecmd(buf, at) else: - value &= $r + var r: Rune + fastRuneAt(buf, at, r) + value &= r if at < buf.len: inc at elif at < buf.len: @@ -223,23 +225,22 @@ proc insertNode(parent: Node, node: Node) = proc processDocumentBody(state: var HTMLParseState) = if not state.in_body: state.in_body = true - if state.parentNode.ownerDocument != nil: - state.parentNode = state.parentNode.ownerDocument.body + if state.elementNode.ownerDocument != nil: + state.elementNode = state.elementNode.ownerDocument.body -proc processDocumentStartNode(state: var HTMLParseState, newNode: Node) = - if state.parentNode.nodeType == ELEMENT_NODE and ((Element)state.parentNode).tagType == TAG_HTML: +proc processDocumentAddNode(state: var HTMLParseState, newNode: Node) = + if state.elementNode.nodeType == ELEMENT_NODE and ((Element)state.elementNode).tagType == TAG_HTML: if state.in_body: - state.parentNode = state.parentNode.ownerDocument.body + state.elementNode = state.elementNode.ownerDocument.body else: - state.parentNode = state.parentNode.ownerDocument.head + state.elementNode = state.elementNode.ownerDocument.head - insertNode(state.parentNode, newNode) - state.parentNode = newNode + insertNode(state.elementNode, newNode) proc processDocumentEndNode(state: var HTMLParseState) = - if state.parentNode == nil or state.parentNode.parentNode == nil: + if state.elementNode == nil or state.elementNode.nodeType == DOCUMENT_NODE: return - state.parentNode = state.parentNode.parentNode + state.elementNode = state.elementNode.parentElement proc processDocumentText(state: var HTMLParseState) = if state.textNode != nil and state.textNode.data.len > 0: @@ -247,8 +248,7 @@ proc processDocumentText(state: var HTMLParseState) = if state.textNode == nil: state.textNode = newText() - processDocumentStartNode(state, state.textNode) - processDocumentEndNode(state) + processDocumentAddNode(state, state.textNode) proc processDocumentStartElement(state: var HTMLParseState, element: Element, tag: DOMParsedTag) = var add = true @@ -288,10 +288,10 @@ proc processDocumentStartElement(state: var HTMLParseState, element: Element, ta processDocumentBody(state) else: discard - if state.parentNode.nodeType == ELEMENT_NODE: + if state.elementNode.nodeType == ELEMENT_NODE: case element.tagType - of TAG_LI, TAG_P: - if Element(state.parentNode).tagType == element.tagType: + of SelfClosingTagTypes: + if Element(state.elementNode).tagType == element.tagType: processDocumentEndNode(state) of TAG_H1: HTMLHeadingElement(element).rank = 1 @@ -307,8 +307,12 @@ proc processDocumentStartElement(state: var HTMLParseState, element: Element, ta HTMLHeadingElement(element).rank = 6 else: discard + if Element(state.elementNode).tagType == TAG_P and element.tagType in PClosingTagTypes: + processDocumentEndNode(state) + if add: - processDocumentStartNode(state, element) + processDocumentAddNode(state, element) + state.elementNode = element if element.tagType in VoidTagTypes: processDocumentEndNode(state) @@ -321,8 +325,8 @@ proc processDocumentEndElement(state: var HTMLParseState, tag: DOMParsedTag) = return if tag.tagid == TAG_BODY: return - if state.parentNode.nodeType == ELEMENT_NODE: - if Element(state.parentNode).tagType in {TAG_LI, TAG_P}: + if state.elementNode.nodeType == ELEMENT_NODE and tag.tagid != Element(state.elementNode).tagType: + if Element(state.elementNode).tagType in SelfClosingTagTypes: processDocumentEndNode(state) processDocumentEndNode(state) @@ -364,13 +368,13 @@ proc processDocumentPart(state: var HTMLParseState, buf: string) = inc at let p = getescapecmd(buf, at) if state.in_comment: - CharacterData(state.parentNode).data &= p + state.commentNode.data &= p else: processDocumentText(state) state.textNode.data &= p of '<': if state.in_comment: - CharacterData(state.parentNode).data &= buf[at] + state.commentNode.data &= buf[at] inc at else: var p = at @@ -383,7 +387,9 @@ proc processDocumentPart(state: var HTMLParseState, buf: string) = inc p at = p state.in_comment = true - processDocumentStartNode(state, newComment()) + let comment = newComment() + state.commentNode = comment + processDocumentAddNode(state, comment) if state.textNode != nil: state.textNode.rawtext = state.textNode.getRawText() state.textNode = nil @@ -420,31 +426,29 @@ proc processDocumentPart(state: var HTMLParseState, buf: string) = if p < max and buf[p] == '>': inc p at = p + state.commentNode = nil state.in_comment = false - processDocumentEndNode(state) if state.in_comment: - CharacterData(state.parentNode).data &= buf[at] + state.commentNode.data &= buf[at] inc at else: var r: Rune fastRuneAt(buf, at, r) if state.in_comment: - CharacterData(state.parentNode).data &= $r + state.commentNode.data &= $r else: processDocumentText(state) state.textNode.data &= $r proc parseHtml*(inputStream: Stream): Document = let document = newDocument() - let html = newHtmlElement(TAG_HTML) - insertNode(document, html) - insertNode(html, document.head) - insertNode(html, document.body) - #eprint document.body.firstElementChild != nil + insertNode(document, document.root) + insertNode(document.root, document.head) + insertNode(document.root, document.body) var state = HTMLParseState() - state.parentNode = html + state.elementNode = document.root var till_when = false |