diff options
author | bptato <nincsnevem662@gmail.com> | 2021-01-31 22:23:59 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2021-01-31 22:23:59 +0100 |
commit | e3a2ad473e2239bef3726e3ad6657f5f41c9462c (patch) | |
tree | cf2c16ee833c74826eda397780ff8b51d0689e97 | |
parent | 5d79801b655aae7801fb27fc3a9422bdd5764882 (diff) | |
download | chawan-e3a2ad473e2239bef3726e3ad6657f5f41c9462c.tar.gz |
New html parser now does things, however pages are rendered upside down
-rw-r--r-- | buffer.nim | 51 | ||||
-rw-r--r-- | display.nim | 9 | ||||
-rw-r--r-- | htmlelement.nim | 1 | ||||
-rw-r--r-- | main.nim | 42 | ||||
-rw-r--r-- | parser.nim | 194 | ||||
-rw-r--r-- | twtstr.nim | 2 |
6 files changed, 180 insertions, 119 deletions
diff --git a/buffer.nim b/buffer.nim index 06459d11..1c244baa 100644 --- a/buffer.nim +++ b/buffer.nim @@ -59,7 +59,7 @@ func currentLineLength*(buffer: Buffer): int = return buffer.rawtext[buffer.cursory].runeLen() func atPercentOf*(buffer: Buffer): int = - return (100 * buffer.cursory) div buffer.lastLine() + return (100 * (buffer.cursory + 1)) div (buffer.lastLine() + 1) func fmtBetween*(buffer: Buffer, sx: int, sy: int, ex: int, ey: int): string = if sy < ey: @@ -257,19 +257,23 @@ proc cursorNextWord*(buffer: Buffer): bool = var r: Rune var x = buffer.cursorx var y = buffer.cursory - fastRuneAt(buffer.rawtext[y], x, r, false) - - while r != Rune(' '): - if x >= llen: - break - inc x + eprint 1 + if llen >= 0: fastRuneAt(buffer.rawtext[y], x, r, false) - while r == Rune(' '): - if x >= llen: - break - inc x - fastRuneAt(buffer.rawtext[y], x, r, false) + while r != Rune(' '): + if x >= llen: + break + inc x + eprint 2 + fastRuneAt(buffer.rawtext[y], x, r, false) + + while r == Rune(' '): + if x >= llen: + break + inc x + eprint 3 + fastRuneAt(buffer.rawtext[y], x, r, false) if x >= llen: if y < buffer.lastLine(): @@ -281,19 +285,20 @@ proc cursorPrevWord*(buffer: Buffer): bool = var r: Rune var x = buffer.cursorx var y = buffer.cursory - fastRuneAt(buffer.rawtext[y], x, r, false) - - while r != Rune(' '): - if x == 0: - break - dec x + if buffer.currentLineLength() > 0: fastRuneAt(buffer.rawtext[y], x, r, false) - while r == Rune(' '): - if x == 0: - break - dec x - fastRuneAt(buffer.rawtext[y], x, r, false) + while r != Rune(' '): + if x == 0: + break + dec x + fastRuneAt(buffer.rawtext[y], x, r, false) + + while r == Rune(' '): + if x == 0: + break + dec x + fastRuneAt(buffer.rawtext[y], x, r, false) if x == 0: if y < buffer.lastLine(): diff --git a/display.nim b/display.nim index 78401598..da34f6c5 100644 --- a/display.nim +++ b/display.nim @@ -172,7 +172,7 @@ proc postAlignNode(buffer: Buffer, node: HtmlNode, state: var RenderState) = buffer.flushLine(state) proc renderNode(buffer: Buffer, node: HtmlNode, state: var RenderState) = - if node.isDocument() or node.parentNode == nil: + if node.isDocument(): return let elem = node.nodeAttr() if elem.tagType == TAG_TITLE: @@ -275,9 +275,8 @@ proc nrenderHtml*(buffer: Buffer) = let currElem = stack.pop() buffer.addNode(currElem) buffer.renderNode(currElem, state) - if currElem.childNodes.len > 0: - for item in currElem.childNodes: - stack.add(item) + for item in currElem.childNodes: + stack.add(item) buffer.setLastHtmlLine(state) @@ -288,7 +287,7 @@ proc drawHtml(buffer: Buffer) = buffer.setLastHtmlLine(state) proc statusMsgForBuffer(buffer: Buffer) = - var msg = $buffer.cursory & "/" & $buffer.lastLine() & " (" & + var msg = $(buffer.cursory + 1) & "/" & $(buffer.lastLine() + 1) & " (" & $buffer.atPercentOf() & "%) " & "<" & buffer.title & ">" if buffer.hovertext.len > 0: diff --git a/htmlelement.nim b/htmlelement.nim index 7c0d9169..1d4106a5 100644 --- a/htmlelement.nim +++ b/htmlelement.nim @@ -76,6 +76,7 @@ type HtmlSelectElementObj = object of HtmlElementObj name*: string value*: string + valueSet*: bool HtmlOptionElement* = ref HtmlOptionElementObj HtmlOptionElementObj = object of HtmlElementObj diff --git a/main.nim b/main.nim index b6ee8cb2..bb709562 100644 --- a/main.nim +++ b/main.nim @@ -45,27 +45,27 @@ proc main*() = eprint "Failed to read keymap, falling back to default" let attrs = getTermAttributes() let buffer = newBuffer(attrs) - let uri = parseUri(paramStr(1)) - buffers.add(buffer) - buffer.setLocation(uri) - buffer.htmlSource = loadPageUri(uri, buffer.htmlSource) - buffer.renderHtml() - var lastUri = uri - while displayPage(attrs, buffer): - statusMsg("Loading...", buffer.height) - var newUri = buffer.document.location - lastUri.anchor = "" - newUri.anchor = "" - if $lastUri != $newUri: - buffer.clearBuffer() - buffer.htmlSource = loadPageUri(buffer.document.location, buffer.htmlSource) - buffer.renderHtml() - lastUri = newUri + buffer.document = nparseHtml(getRemotePage("http://lite.duckduckgo.com")) + buffer.nrenderHtml() + discard displayPage(getTermAttributes(), buffer) + return + #let uri = parseUri(paramStr(1)) + #buffers.add(buffer) + #buffer.setLocation(uri) + #buffer.htmlSource = loadPageUri(uri, buffer.htmlSource) + #buffer.renderHtml() + #var lastUri = uri + #while displayPage(attrs, buffer): + # statusMsg("Loading...", buffer.height) + # var newUri = buffer.document.location + # lastUri.anchor = "" + # newUri.anchor = "" + # if $lastUri != $newUri: + # buffer.clearBuffer() + # buffer.htmlSource = loadPageUri(buffer.document.location, buffer.htmlSource) + # buffer.renderHtml() + # lastUri = newUri #waitFor loadPage("https://lite.duckduckgo.com/lite/?q=hello%20world") #eprint mk_wcswidth_cjk("abc•de") -var buf = newBuffer(getTermAttributes()) -buf.document = nparseHtml(getRemotePage("http://lite.duckduckgo.com")) -buf.nrenderHtml() -discard displayPage(getTermAttributes(), buf) -#main() +main() diff --git a/parser.nim b/parser.nim index d69accf3..bb4ab6e1 100644 --- a/parser.nim +++ b/parser.nim @@ -7,6 +7,12 @@ import twtio import enums import strutils +type + ParseState = object + closed: bool + parents: seq[HtmlNode] + parsedNode: HtmlNode + #> no I won't manually write all this down #> maybe todo to accept stuff other than tagtype (idk how useful that'd be) #still todo, it'd be very useful @@ -31,15 +37,65 @@ macro genEnumCase(s: string): untyped = func tagType(s: string): TagType = genEnumCase(s) -func newHtmlElement(tagType: TagType): HtmlElement = +func newHtmlElement(tagType: TagType, parentNode: HtmlNode): HtmlElement = case tagType of TAG_INPUT: result = new(HtmlInputElement) of TAG_A: result = new(HtmlAnchorElement) of TAG_SELECT: result = new(HtmlSelectElement) of TAG_OPTION: result = new(HtmlOptionElement) else: result = new(HtmlElement) - result.tagType = tagType + result.nodeType = NODE_ELEMENT + result.tagType = tagType + result.parentNode = parentNode + if parentNode.isElemNode(): + result.parentElement = HtmlElement(parentNode) + + if tagType in DisplayInlineTags: + result.display = DISPLAY_INLINE + elif tagType in DisplayBlockTags: + result.display = DISPLAY_BLOCK + elif tagType in DisplayInlineBlockTags: + result.display = DISPLAY_INLINE_BLOCK + elif tagType == TAG_LI: + result.display = DISPLAY_LIST_ITEM + else: + result.display = DISPLAY_NONE + + case tagType + of TAG_CENTER: + result.centered = true + of TAG_B: + result.bold = true + of TAG_I: + result.italic = true + of TAG_U: + result.underscore = true + of TAG_HEAD: + result.hidden = true + of TAG_STYLE: + result.hidden = true + of TAG_SCRIPT: + result.hidden = true + of TAG_OPTION: + result.hidden = true #TODO + of TAG_PRE, TAG_TD, TAG_TH: + result.margin = 1 + of TAG_UL, TAG_OL: + result.indent = 1 + of TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6: + result.bold = true + result.marginbottom = 1 + else: discard + + if parentNode.isElemNode(): + let parent = HtmlElement(parentNode) + result.centered = result.centered or parent.centered + result.bold = result.bold or parent.bold + result.italic = result.italic or parent.italic + result.underscore = result.underscore or parent.underscore + result.hidden = result.hidden or parent.hidden + result.islink = result.islink or parent.islink func toInputType*(str: string): InputType = case str @@ -103,77 +159,77 @@ proc applyAttribute(htmlElement: HtmlElement, key: string, value: string) = else: discard else: return -var s = "" +proc closeNode(state: var ParseState) = + state.parents.setLen(state.parents.len - 1) + state.closed = true + +proc closeSingleNodes(state: var ParseState) = + if not state.closed and state.parents[^1].isElemNode() and HtmlElement(state.parents[^1]).tagType in SingleTagTypes: + state.closeNode() + +proc processHtmlElement(state: var ParseState, htmlElement: HtmlElement) = + state.closed = false + if state.parents[^1].childNodes.len > 0: + htmlElement.previousSibling = state.parents[^1].childNodes[^1] + htmlElement.previousSibling.nextSibling = htmlElement + state.parents[^1].childNodes.add(htmlElement) + state.parents.add(htmlElement) + +proc applyNodeText(htmlNode: HtmlNode) = + htmlNode.rawtext = htmlNode.getRawText() + htmlNode.fmttext = htmlNode.getFmtText() + proc nparseHtml*(inputStream: Stream): Document = var x: XmlParser x.open(inputStream, "") - var parents: seq[HtmlNode] + var state: ParseState let document = newDocument() - parents.add(document) - var closed = true - while parents.len > 0 and x.kind != xmlEof: - var currParent = parents[^1] - while true: - var parsedNode: HtmlNode + state.parents.add(document) + while state.parents.len > 0 and x.kind != xmlEof: + x.next() + case x.kind + of xmlComment: discard #TODO + of xmlElementStart: + eprint "<" & x.rawdata & ">" + state.closeSingleNodes() + let parsedNode = newHtmlElement(tagType(x.rawData), state.parents[^1]) + parsedNode.applyNodeText() + state.processHtmlElement(parsedNode) + of xmlElementEnd: + eprint "</" & x.rawdata & ">" + state.closeNode() + of xmlElementOpen: + var s = "<" & x.rawdata + state.closeSingleNodes() + let parsedNode = newHtmlElement(tagType(x.rawData), state.parents[^1]) x.next() - case x.kind - of xmlComment: discard #TODO - of xmlElementStart: - if not closed and currParent.isElemNode() and HtmlElement(currParent).tagType in SingleTagTypes: - parents.setLen(parents.len - 1) - currParent = parents[^1] - closed = true - eprint "<" & x.rawData & ">" - parsedNode = newHtmlElement(tagType(x.rawData)) - currParent.childNodes.add(parsedNode) - if currParent.isElemNode(): - parsedNode.parentElement = HtmlElement(currParent) - parsedNode.parentNode = currParent - parents.add(parsedNode) - closed = false - break - of xmlElementEnd: - eprint "</" & x.rawData & ">" - parents.setLen(parents.len - 1) - closed = true - of xmlElementOpen: - if not closed and currParent.isElemNode() and HtmlElement(currParent).tagType in SingleTagTypes: - parents.setLen(parents.len - 1) - currParent = parents[^1] - closed = true - parsedNode = newHtmlElement(tagType(x.rawData)) - s = "<" & x.rawData + while x.kind != xmlElementClose and x.kind != xmlEof: + if x.kind == xmlAttribute: + HtmlElement(parsedNode).applyAttribute(x.rawData.tolower(), x.rawData2) + s &= " " & x.rawdata & "=\"" & x.rawdata2 & "\"" + elif x.kind == xmlError: + HtmlElement(parsedNode).applyAttribute(x.rawData.tolower(), "") + elif x.kind == xmlCharData: + if x.rawData.strip() == "/>": + break + else: + assert(false, "wtf") #TODO x.next() - while x.kind != xmlElementClose and x.kind != xmlEof: - if x.kind == xmlAttribute: - HtmlElement(parsedNode).applyAttribute(x.rawData.tolower(), x.rawData2) - s &= " " - s &= x.rawData - s &= "=\"" - s &= x.rawData2 - s &= "\"" - x.next() - s &= ">" - eprint s - - currParent.childNodes.add(parsedNode) - if currParent.isElemNode(): - parsedNode.parentElement = HtmlElement(currParent) - parsedNode.parentNode = currParent - parents.add(parsedNode) - closed = false - break - of xmlCharData: - let textNode = new(HtmlNode) - textNode.nodeType = NODE_TEXT - textNode.rawtext = x.rawData - currParent.childNodes.add(textNode) - textNode.parentNode = currParent - if currParent.isElemNode(): - textNode.parentElement = HtmlElement(currParent) - eprint x.rawData, currParent.nodeType - of xmlEntity: - eprint "entity", x.rawData - of xmlEof: break - else: discard + s &= ">" + eprint s + parsedNode.applyNodeText() + state.processHtmlElement(parsedNode) + of xmlCharData: + eprint x.rawdata + let textNode = new(HtmlNode) + textNode.nodeType = NODE_TEXT + state.parents[^1].childNodes.add(textNode) + textNode.parentNode = state.parents[^1] + if state.parents[^1].isElemNode(): + textNode.parentElement = HtmlElement(state.parents[^1]) + textNode.rawtext = x.rawData + textNode.applyNodeText() + of xmlEntity: discard #TODO + of xmlEof: break + else: discard return document diff --git a/twtstr.nim b/twtstr.nim index 1f6b49d4..09b19cc0 100644 --- a/twtstr.nim +++ b/twtstr.nim @@ -88,7 +88,7 @@ func findChar*(str: string, c: Rune, start: int = 0): int = #Measure length of rune. Transpiled from https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c #auxiliary function for binary search in interval table -#TODO: use binary search in stdlib? +#TODO: use binary search from stdlib? func bisearch(ucs: Rune, table: openarray[(int, int)]): bool = var max = table.high var min = 0 |