New html parser now does things, however pages are rendered upside down

author: bptato <nincsnevem662@gmail.com> 2021-01-31 22:23:59 +0100
committer: bptato <nincsnevem662@gmail.com> 2021-01-31 22:23:59 +0100
commit: e3a2ad473e2239bef3726e3ad6657f5f41c9462c (patch)
tree: cf2c16ee833c74826eda397780ff8b51d0689e97
parent: 5d79801b655aae7801fb27fc3a9422bdd5764882 (diff)
download: chawan-e3a2ad473e2239bef3726e3ad6657f5f41c9462c.tar.gz
6 files changed, 180 insertions, 119 deletions
diff --git a/buffer.nim b/buffer.nim
index 06459d11..1c244baa 100644
--- a/buffer.nim
+++ b/buffer.nim
@@ -59,7 +59,7 @@ func currentLineLength*(buffer: Buffer): int =
   return buffer.rawtext[buffer.cursory].runeLen()
 
 func atPercentOf*(buffer: Buffer): int =
-  return (100 * buffer.cursory) div buffer.lastLine()
+  return (100 * (buffer.cursory + 1)) div (buffer.lastLine() + 1)
 
 func fmtBetween*(buffer: Buffer, sx: int, sy: int, ex: int, ey: int): string =
   if sy < ey:
@@ -257,19 +257,23 @@ proc cursorNextWord*(buffer: Buffer): bool =
   var r: Rune
   var x = buffer.cursorx
   var y = buffer.cursory
-  fastRuneAt(buffer.rawtext[y], x, r, false)
-
-  while r != Rune(' '):
-    if x >= llen:
-      break
-    inc x
+  eprint 1
+  if llen >= 0:
     fastRuneAt(buffer.rawtext[y], x, r, false)
 
-  while r == Rune(' '):
-    if x >= llen:
-      break
-    inc x
-    fastRuneAt(buffer.rawtext[y], x, r, false)
+    while r != Rune(' '):
+      if x >= llen:
+        break
+      inc x
+      eprint 2
+      fastRuneAt(buffer.rawtext[y], x, r, false)
+
+    while r == Rune(' '):
+      if x >= llen:
+        break
+      inc x
+      eprint 3
+      fastRuneAt(buffer.rawtext[y], x, r, false)
 
   if x >= llen:
     if y < buffer.lastLine():
@@ -281,19 +285,20 @@ proc cursorPrevWord*(buffer: Buffer): bool =
   var r: Rune
   var x = buffer.cursorx
   var y = buffer.cursory
-  fastRuneAt(buffer.rawtext[y], x, r, false)
-
-  while r != Rune(' '):
-    if x == 0:
-      break
-    dec x
+  if buffer.currentLineLength() > 0:
     fastRuneAt(buffer.rawtext[y], x, r, false)
 
-  while r == Rune(' '):
-    if x == 0:
-      break
-    dec x
-    fastRuneAt(buffer.rawtext[y], x, r, false)
+    while r != Rune(' '):
+      if x == 0:
+        break
+      dec x
+      fastRuneAt(buffer.rawtext[y], x, r, false)
+
+    while r == Rune(' '):
+      if x == 0:
+        break
+      dec x
+      fastRuneAt(buffer.rawtext[y], x, r, false)
 
   if x == 0:
     if y < buffer.lastLine():
diff --git a/display.nim b/display.nim
index 78401598..da34f6c5 100644
--- a/display.nim
+++ b/display.nim
@@ -172,7 +172,7 @@ proc postAlignNode(buffer: Buffer, node: HtmlNode, state: var RenderState) =
     buffer.flushLine(state)
 
 proc renderNode(buffer: Buffer, node: HtmlNode, state: var RenderState) =
-  if node.isDocument() or node.parentNode == nil:
+  if node.isDocument():
     return
   let elem = node.nodeAttr()
   if elem.tagType == TAG_TITLE:
@@ -275,9 +275,8 @@ proc nrenderHtml*(buffer: Buffer) =
     let currElem = stack.pop()
     buffer.addNode(currElem)
     buffer.renderNode(currElem, state)
-    if currElem.childNodes.len > 0:
-      for item in currElem.childNodes:
-        stack.add(item)
+    for item in currElem.childNodes:
+      stack.add(item)
 
   buffer.setLastHtmlLine(state)
 
@@ -288,7 +287,7 @@ proc drawHtml(buffer: Buffer) =
   buffer.setLastHtmlLine(state)
 
 proc statusMsgForBuffer(buffer: Buffer) =
-  var msg = $buffer.cursory & "/" & $buffer.lastLine() & " (" &
+  var msg = $(buffer.cursory + 1) & "/" & $(buffer.lastLine() + 1) & " (" &
             $buffer.atPercentOf() & "%) " &
             "<" & buffer.title & ">"
   if buffer.hovertext.len > 0:
diff --git a/htmlelement.nim b/htmlelement.nim
index 7c0d9169..1d4106a5 100644
--- a/htmlelement.nim
+++ b/htmlelement.nim
@@ -76,6 +76,7 @@ type
   HtmlSelectElementObj = object of HtmlElementObj
     name*: string
     value*: string
+    valueSet*: bool
 
   HtmlOptionElement* = ref HtmlOptionElementObj
   HtmlOptionElementObj = object of HtmlElementObj
diff --git a/main.nim b/main.nim
index b6ee8cb2..bb709562 100644
--- a/main.nim
+++ b/main.nim
@@ -45,27 +45,27 @@ proc main*() =
     eprint "Failed to read keymap, falling back to default"
   let attrs = getTermAttributes()
   let buffer = newBuffer(attrs)
-  let uri = parseUri(paramStr(1))
-  buffers.add(buffer)
-  buffer.setLocation(uri)
-  buffer.htmlSource = loadPageUri(uri, buffer.htmlSource)
-  buffer.renderHtml()
-  var lastUri = uri
-  while displayPage(attrs, buffer):
-    statusMsg("Loading...", buffer.height)
-    var newUri = buffer.document.location
-    lastUri.anchor = ""
-    newUri.anchor = ""
-    if $lastUri != $newUri:
-      buffer.clearBuffer()
-      buffer.htmlSource = loadPageUri(buffer.document.location, buffer.htmlSource)
-      buffer.renderHtml()
-    lastUri = newUri
+  buffer.document = nparseHtml(getRemotePage("http://lite.duckduckgo.com"))
+  buffer.nrenderHtml()
+  discard displayPage(getTermAttributes(), buffer)
+  return
+  #let uri = parseUri(paramStr(1))
+  #buffers.add(buffer)
+  #buffer.setLocation(uri)
+  #buffer.htmlSource = loadPageUri(uri, buffer.htmlSource)
+  #buffer.renderHtml()
+  #var lastUri = uri
+  #while displayPage(attrs, buffer):
+  #  statusMsg("Loading...", buffer.height)
+  #  var newUri = buffer.document.location
+  #  lastUri.anchor = ""
+  #  newUri.anchor = ""
+  #  if $lastUri != $newUri:
+  #    buffer.clearBuffer()
+  #    buffer.htmlSource = loadPageUri(buffer.document.location, buffer.htmlSource)
+  #    buffer.renderHtml()
+  #  lastUri = newUri
 
 #waitFor loadPage("https://lite.duckduckgo.com/lite/?q=hello%20world")
 #eprint mk_wcswidth_cjk("abc•de")
-var buf = newBuffer(getTermAttributes())
-buf.document = nparseHtml(getRemotePage("http://lite.duckduckgo.com"))
-buf.nrenderHtml()
-discard displayPage(getTermAttributes(), buf)
-#main()
+main()
diff --git a/parser.nim b/parser.nim
index d69accf3..bb4ab6e1 100644
--- a/parser.nim
+++ b/parser.nim
@@ -7,6 +7,12 @@ import twtio
 import enums
 import strutils
 
+type
+  ParseState = object
+    closed: bool
+    parents: seq[HtmlNode]
+    parsedNode: HtmlNode
+
 #> no I won't manually write all this down
 #> maybe todo to accept stuff other than tagtype (idk how useful that'd be)
 #still todo, it'd be very useful
@@ -31,15 +37,65 @@ macro genEnumCase(s: string): untyped =
 func tagType(s: string): TagType =
   genEnumCase(s)
 
-func newHtmlElement(tagType: TagType): HtmlElement =
+func newHtmlElement(tagType: TagType, parentNode: HtmlNode): HtmlElement =
   case tagType
   of TAG_INPUT: result = new(HtmlInputElement)
   of TAG_A: result = new(HtmlAnchorElement)
   of TAG_SELECT: result = new(HtmlSelectElement)
   of TAG_OPTION: result = new(HtmlOptionElement)
   else: result = new(HtmlElement)
-  result.tagType = tagType
+
   result.nodeType = NODE_ELEMENT
+  result.tagType = tagType
+  result.parentNode = parentNode
+  if parentNode.isElemNode():
+    result.parentElement = HtmlElement(parentNode)
+
+  if tagType in DisplayInlineTags:
+    result.display = DISPLAY_INLINE
+  elif tagType in DisplayBlockTags:
+    result.display = DISPLAY_BLOCK
+  elif tagType in DisplayInlineBlockTags:
+    result.display = DISPLAY_INLINE_BLOCK
+  elif tagType == TAG_LI:
+    result.display = DISPLAY_LIST_ITEM
+  else:
+    result.display = DISPLAY_NONE
+
+  case tagType
+  of TAG_CENTER:
+    result.centered = true
+  of TAG_B:
+    result.bold = true
+  of TAG_I:
+    result.italic = true
+  of TAG_U:
+    result.underscore = true
+  of TAG_HEAD:
+    result.hidden = true
+  of TAG_STYLE:
+    result.hidden = true
+  of TAG_SCRIPT:
+    result.hidden = true
+  of TAG_OPTION:
+    result.hidden = true #TODO
+  of TAG_PRE, TAG_TD, TAG_TH:
+    result.margin = 1
+  of TAG_UL, TAG_OL:
+    result.indent = 1
+  of TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6:
+    result.bold = true
+    result.marginbottom = 1
+  else: discard
+
+  if parentNode.isElemNode():
+    let parent = HtmlElement(parentNode)
+    result.centered = result.centered or parent.centered
+    result.bold = result.bold or parent.bold
+    result.italic = result.italic or parent.italic
+    result.underscore = result.underscore or parent.underscore
+    result.hidden = result.hidden or parent.hidden
+    result.islink = result.islink or parent.islink
 
 func toInputType*(str: string): InputType =
   case str
@@ -103,77 +159,77 @@ proc applyAttribute(htmlElement: HtmlElement, key: string, value: string) =
     else: discard
   else: return
 
-var s = ""
+proc closeNode(state: var ParseState) =
+  state.parents.setLen(state.parents.len - 1)
+  state.closed = true
+
+proc closeSingleNodes(state: var ParseState) =
+  if not state.closed and state.parents[^1].isElemNode() and HtmlElement(state.parents[^1]).tagType in SingleTagTypes:
+    state.closeNode()
+
+proc processHtmlElement(state: var ParseState, htmlElement: HtmlElement) =
+  state.closed = false
+  if state.parents[^1].childNodes.len > 0:
+    htmlElement.previousSibling = state.parents[^1].childNodes[^1]
+    htmlElement.previousSibling.nextSibling = htmlElement
+  state.parents[^1].childNodes.add(htmlElement)
+  state.parents.add(htmlElement)
+
+proc applyNodeText(htmlNode: HtmlNode) =
+  htmlNode.rawtext = htmlNode.getRawText()
+  htmlNode.fmttext = htmlNode.getFmtText()
+
 proc nparseHtml*(inputStream: Stream): Document =
   var x: XmlParser
   x.open(inputStream, "")
-  var parents: seq[HtmlNode]
+  var state: ParseState
   let document = newDocument()
-  parents.add(document)
-  var closed = true
-  while parents.len > 0 and x.kind != xmlEof:
-    var currParent = parents[^1]
-    while true:
-      var parsedNode: HtmlNode
+  state.parents.add(document)
+  while state.parents.len > 0 and x.kind != xmlEof:
+    x.next()
+    case x.kind
+    of xmlComment: discard #TODO
+    of xmlElementStart:
+      eprint "<" & x.rawdata & ">"
+      state.closeSingleNodes()
+      let parsedNode = newHtmlElement(tagType(x.rawData), state.parents[^1])
+      parsedNode.applyNodeText()
+      state.processHtmlElement(parsedNode)
+    of xmlElementEnd:
+      eprint "</" & x.rawdata & ">"
+      state.closeNode()
+    of xmlElementOpen:
+      var s = "<" & x.rawdata
+      state.closeSingleNodes()
+      let parsedNode = newHtmlElement(tagType(x.rawData), state.parents[^1])
       x.next()
-      case x.kind
-      of xmlComment: discard #TODO
-      of xmlElementStart:
-        if not closed and currParent.isElemNode() and HtmlElement(currParent).tagType in SingleTagTypes:
-          parents.setLen(parents.len - 1)
-          currParent = parents[^1]
-          closed = true
-        eprint "<" & x.rawData & ">"
-        parsedNode = newHtmlElement(tagType(x.rawData))
-        currParent.childNodes.add(parsedNode)
-        if currParent.isElemNode():
-          parsedNode.parentElement = HtmlElement(currParent)
-        parsedNode.parentNode = currParent
-        parents.add(parsedNode)
-        closed = false
-        break
-      of xmlElementEnd:
-        eprint "</" & x.rawData & ">"
-        parents.setLen(parents.len - 1)
-        closed = true
-      of xmlElementOpen:
-        if not closed and currParent.isElemNode() and HtmlElement(currParent).tagType in SingleTagTypes:
-          parents.setLen(parents.len - 1)
-          currParent = parents[^1]
-          closed = true
-        parsedNode = newHtmlElement(tagType(x.rawData))
-        s = "<" & x.rawData
+      while x.kind != xmlElementClose and x.kind != xmlEof:
+        if x.kind == xmlAttribute:
+          HtmlElement(parsedNode).applyAttribute(x.rawData.tolower(), x.rawData2)
+          s &= " " & x.rawdata & "=\"" & x.rawdata2 & "\""
+        elif x.kind == xmlError:
+          HtmlElement(parsedNode).applyAttribute(x.rawData.tolower(), "")
+        elif x.kind == xmlCharData:
+          if x.rawData.strip() == "/>":
+            break
+        else:
+          assert(false, "wtf") #TODO
         x.next()
-        while x.kind != xmlElementClose and x.kind != xmlEof:
-          if x.kind == xmlAttribute:
-            HtmlElement(parsedNode).applyAttribute(x.rawData.tolower(), x.rawData2)
-            s &= " "
-            s &= x.rawData
-            s &= "=\""
-            s &= x.rawData2
-            s &= "\""
-          x.next()
-        s &= ">"
-        eprint s
-
-        currParent.childNodes.add(parsedNode)
-        if currParent.isElemNode():
-          parsedNode.parentElement = HtmlElement(currParent)
-        parsedNode.parentNode = currParent
-        parents.add(parsedNode)
-        closed = false
-        break
-      of xmlCharData:
-        let textNode = new(HtmlNode)
-        textNode.nodeType = NODE_TEXT
-        textNode.rawtext = x.rawData
-        currParent.childNodes.add(textNode)
-        textNode.parentNode = currParent
-        if currParent.isElemNode():
-          textNode.parentElement = HtmlElement(currParent)
-        eprint x.rawData, currParent.nodeType
-      of xmlEntity:
-        eprint "entity", x.rawData
-      of xmlEof: break
-      else: discard
+      s &= ">"
+      eprint s
+      parsedNode.applyNodeText()
+      state.processHtmlElement(parsedNode)
+    of xmlCharData:
+      eprint x.rawdata
+      let textNode = new(HtmlNode)
+      textNode.nodeType = NODE_TEXT
+      state.parents[^1].childNodes.add(textNode)
+      textNode.parentNode = state.parents[^1]
+      if state.parents[^1].isElemNode():
+        textNode.parentElement = HtmlElement(state.parents[^1])
+      textNode.rawtext = x.rawData
+      textNode.applyNodeText()
+    of xmlEntity: discard #TODO
+    of xmlEof: break
+    else: discard
   return document
diff --git a/twtstr.nim b/twtstr.nim
index 1f6b49d4..09b19cc0 100644
--- a/twtstr.nim
+++ b/twtstr.nim
@@ -88,7 +88,7 @@ func findChar*(str: string, c: Rune, start: int = 0): int =
 #Measure length of rune. Transpiled from https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
 
 #auxiliary function for binary search in interval table
-#TODO: use binary search in stdlib?
+#TODO: use binary search from stdlib?
 func bisearch(ucs: Rune, table: openarray[(int, int)]): bool =
   var max = table.high
   var min = 0
author	bptato <nincsnevem662@gmail.com>	2021-01-31 22:23:59 +0100
committer	bptato <nincsnevem662@gmail.com>	2021-01-31 22:23:59 +0100
commit	e3a2ad473e2239bef3726e3ad6657f5f41c9462c (patch)
tree	cf2c16ee833c74826eda397780ff8b51d0689e97
parent	5d79801b655aae7801fb27fc3a9422bdd5764882 (diff)
download	chawan-e3a2ad473e2239bef3726e3ad6657f5f41c9462c.tar.gz