diff options
author | bptato <nincsnevem662@gmail.com> | 2021-03-13 13:40:23 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2021-03-13 13:40:23 +0100 |
commit | 97f19da347b27a4d12f54784fa8bcbf304aa4fea (patch) | |
tree | c4bd5c601f0f8774402e4182bde8af6f5bcd5c3b | |
parent | 6084e104d5c3868196c9e2d3748c6627cf983470 (diff) | |
download | chawan-97f19da347b27a4d12f54784fa8bcbf304aa4fea.tar.gz |
Moved stuff etc
-rw-r--r-- | Makefile | 8 | ||||
-rw-r--r-- | as | 5 | ||||
-rw-r--r-- | default.css | 0 | ||||
-rw-r--r-- | readme.md | 9 | ||||
-rw-r--r-- | src/a.nim | 1 | ||||
-rw-r--r-- | src/buffer.nim (renamed from buffer.nim) | 40 | ||||
-rw-r--r-- | src/config (renamed from config) | 0 | ||||
-rw-r--r-- | src/config.nim (renamed from config.nim) | 0 | ||||
-rw-r--r-- | src/default.css | 20 | ||||
-rw-r--r-- | src/display.nim (renamed from display.nim) | 69 | ||||
-rw-r--r-- | src/dom.nim (renamed from dom.nim) | 51 | ||||
-rwxr-xr-x | src/entity | bin | 0 -> 85288 bytes | |||
-rw-r--r-- | src/entity.json (renamed from entity.json) | 0 | ||||
-rw-r--r-- | src/entity.nim | 18 | ||||
-rw-r--r-- | src/enums.nim (renamed from enums.nim) | 10 | ||||
-rw-r--r-- | src/main.nim (renamed from main.nim) | 2 | ||||
-rw-r--r-- | src/parser.nim (renamed from parser.nim) | 167 | ||||
-rw-r--r-- | src/radixtree.nim (renamed from radixtree.nim) | 62 | ||||
-rw-r--r-- | src/style.nim | 42 | ||||
-rw-r--r-- | src/termattrs.nim (renamed from termattrs.nim) | 0 | ||||
-rw-r--r-- | src/twtio.nim (renamed from twtio.nim) | 4 | ||||
-rw-r--r-- | src/twtstr.nim (renamed from twtstr.nim) | 101 | ||||
-rw-r--r-- | style.nim | 0 |
23 files changed, 396 insertions, 213 deletions
diff --git a/Makefile b/Makefile index 74872b8a..569cb2b5 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ debug: - nim compile -d:ssl -o:twt main.nim + nim compile -d:ssl -o:twt src/main.nim release: - nim compile -d:release -d:ssl -o:twt main.nim + nim compile -d:release -d:ssl -o:twt src/main.nim release_opt: - nim compile -d:danger -d:ssl -o:twt main.nim + nim compile -d:danger -d:ssl -o:twt src/main.nim clean: - rm ./twt ./dtwt ./twt_opt + rm ./twt all: debug release release_opt diff --git a/as b/as deleted file mode 100644 index 1b43b788..00000000 --- a/as +++ /dev/null @@ -1,5 +0,0 @@ -<html> -<body> -Hello? -</body> -</html> diff --git a/default.css b/default.css deleted file mode 100644 index e69de29b..00000000 --- a/default.css +++ /dev/null diff --git a/readme.md b/readme.md index 32c6a7dc..7c95e958 100644 --- a/readme.md +++ b/readme.md @@ -16,7 +16,8 @@ Currently implemented features are: Planned features (roughly in order of importance): -* improved html rendering and parsing +* stylesheets +* improved html rendering (like, actually functioning) * form * table * cookie @@ -25,11 +26,11 @@ Planned features (roughly in order of importance): * image (sixel/kitty) * audio * JavaScript -* extension API (adblock support?) * video (sixel/kitty) -* custom charsets? +* extension API? +* non-unicode charsets? * async? -* markdown? (with pandoc or built-in parser?) +* markdown? (with built-in parser) * gopher? * gemini? diff --git a/src/a.nim b/src/a.nim new file mode 100644 index 00000000..da4e4903 --- /dev/null +++ b/src/a.nim @@ -0,0 +1 @@ +echo int(high(char)) diff --git a/buffer.nim b/src/buffer.nim index cb67c417..5eaa4717 100644 --- a/buffer.nim +++ b/src/buffer.nim @@ -1,5 +1,3 @@ -#beware, awful code ahead - import options import uri import tables @@ -29,8 +27,8 @@ type nodes*: seq[Node] links*: seq[Node] clickables*: seq[Node] - elements*: seq[HtmlElement] - idelements*: Table[string, HtmlElement] + elements*: seq[Element] + idelements*: Table[string, Element] selectedlink*: Node printwrite*: bool attrs*: TermAttributes @@ -92,7 +90,7 @@ func findSelectedElement*(buffer: Buffer): Option[HtmlElement] = func canScroll*(buffer: Buffer): bool = return buffer.lastLine() > buffer.height -func getElementById*(buffer: Buffer, id: string): HtmlElement = +func getElementById*(buffer: Buffer, id: string): Element = if buffer.idelements.hasKey(id): return buffer.idelements[id] return nil @@ -104,28 +102,26 @@ proc findSelectedNode*(buffer: Buffer): Option[Node] = return some(node) return none(Node) -proc addNode*(buffer: Buffer, htmlNode: Node) = - buffer.nodes.add(htmlNode) +proc addNode*(buffer: Buffer, node: Node) = + buffer.nodes.add(node) - #TODO - if htmlNode.isTextNode() and htmlNode.parentElement != nil and HtmlElement(htmlNode.parentElement).islink: - buffer.links.add(htmlNode) + if node.isTextNode() and node.parentElement != nil and node.parentElement.getStyle().islink: + buffer.links.add(node) - if htmlNode.isElemNode(): - case HtmlElement(htmlNode).tagType + if node.isElemNode(): + case Element(node).tagType of TAG_INPUT, TAG_OPTION: - if not HtmlElement(htmlNode).hidden: - buffer.clickables.add(htmlNode) + if not Element(node).hidden: + buffer.clickables.add(node) else: discard - elif htmlNode.isTextNode(): - #TODO - if htmlNode.parentElement != nil and HtmlElement(htmlNode.parentElement).islink: - let anchor = htmlNode.ancestor(TAG_A) + elif node.isTextNode(): + if node.parentElement != nil and node.parentElement.style.islink: + let anchor = node.ancestor(TAG_A) assert(anchor != nil) buffer.clickables.add(anchor) - if htmlNode.isElemNode(): - let elem = HtmlElement(htmlNode) + if node.isElemNode(): + let elem = Element(node) buffer.elements.add(elem) if elem.id != "" and not buffer.idelements.hasKey(elem.id): buffer.idelements[elem.id] = elem @@ -411,7 +407,7 @@ proc checkLinkSelection*(buffer: Buffer): bool = return false else: let anchor = buffer.selectedlink.ancestor(TAG_A) - anchor.selected = false + anchor.style.selected = false buffer.selectedlink.fmttext = buffer.selectedlink.getFmtText() buffer.selectedlink = nil buffer.hovertext = "" @@ -427,7 +423,7 @@ proc checkLinkSelection*(buffer: Buffer): bool = buffer.selectedlink = node let anchor = node.ancestor(TAG_A) assert(anchor != nil) - anchor.selected = true + anchor.style.selected = true buffer.hovertext = HtmlAnchorElement(anchor).href var stack: seq[Node] stack.add(anchor) diff --git a/config b/src/config index 99397288..99397288 100644 --- a/config +++ b/src/config diff --git a/config.nim b/src/config.nim index 6d185a1d..6d185a1d 100644 --- a/config.nim +++ b/src/config.nim diff --git a/src/default.css b/src/default.css new file mode 100644 index 00000000..db11f036 --- /dev/null +++ b/src/default.css @@ -0,0 +1,20 @@ +head { + display: none +} + +a, abbr, b, bdo, br, button, cite, code, +del, dfn, em, font, i, img, ins, input, +iframe, kbd, label, map, object, q, samp, +script, select, small, span, strong, sub, sup, +textarea, tt, var, font, iframe, u, s, strike, +frame, img, input { + display: block +} + +li { + display: list-item +} + +br::before { + content: "\\" +} diff --git a/display.nim b/src/display.nim index ff5e63f1..d7d69c4a 100644 --- a/display.nim +++ b/src/display.nim @@ -119,86 +119,75 @@ proc writeWrappedText(buffer: Buffer, state: var RenderState, node: Node) = state.lastwidth = max(state.lastwidth, state.x) proc preAlignNode(buffer: Buffer, node: Node, state: var RenderState) = - let elem = node.nodeAttr() + let style = node.getStyle() if state.rawline.len > 0 and node.firstNode() and state.blanklines == 0: buffer.flushLine(state) if node.firstNode(): - while state.blanklines < max(node.parentElement.margin, node.parentElement.margintop): + while state.blanklines < max(style.margin, style.margintop): buffer.flushLine(state) - if elem.parentNode.nodeType == ELEMENT_NODE: - state.indent += elem.parentElement.indent + state.indent += style.indent if state.rawline.len > 0 and state.blanklines == 0 and node.displayed(): buffer.addSpaces(state, state.nextspaces) state.nextspaces = 0 - if state.blankspaces < max(elem.margin, elem.marginleft): - buffer.addSpaces(state, max(elem.margin, elem.marginleft) - state.blankspaces) + if state.blankspaces < max(style.margin, style.marginleft): + buffer.addSpaces(state, max(style.margin, style.marginleft) - state.blankspaces) - if elem.centered and state.rawline.len == 0 and node.displayed(): + if style.centered and state.rawline.len == 0 and node.displayed(): buffer.addSpaces(state, max(buffer.width div 2 - state.centerlen div 2, 0)) state.centerlen = 0 - if node.isElemNode() and elem.display == DISPLAY_LIST_ITEM and state.indent > 0: + if node.isElemNode() and style.display == DISPLAY_LIST_ITEM and state.indent > 0: if state.blanklines == 0: buffer.flushLine(state) - var listchar = "" - case elem.parentElement.tagType - of TAG_UL: - listchar = "•" - of TAG_OL: - inc state.listval - listchar = $state.listval & ")" - else: - return + var listchar = "•" + #case elem.parentElement.tagType + #of TAG_UL: + # listchar = "•" + #of TAG_OL: + # inc state.listval + # listchar = $state.listval & ")" + #else: + # return buffer.addSpaces(state, state.indent) state.write(listchar) state.x += listchar.runeLen() buffer.addSpaces(state, 1) proc postAlignNode(buffer: Buffer, node: Node, state: var RenderState) = - let elem = node.nodeAttr() + let style = node.getStyle() if node.getRawLen() > 0: state.blanklines = 0 state.blankspaces = 0 if state.rawline.len > 0 and state.blanklines == 0: - state.nextspaces += max(elem.margin, elem.marginright) + state.nextspaces += max(style.margin, style.marginright) #if node.lastNode() and (node.isTextNode() or elem.childNodes.len == 0): # buffer.flushLine(state) if node.lastNode(): - while state.blanklines < max(node.parentElement.margin, node.parentElement.marginbottom): + while state.blanklines < max(style.margin, style.marginbottom): buffer.flushLine(state) - if elem.parentElement != nil: - state.indent -= elem.parentElement.indent - - if elem.tagType == TAG_BR and not node.firstNode(): - buffer.flushLine(state) + state.indent -= style.indent - if elem.display == DISPLAY_LIST_ITEM and node.lastNode(): + if style.display == DISPLAY_LIST_ITEM and node.lastNode(): buffer.flushLine(state) proc renderNode(buffer: Buffer, node: Node, state: var RenderState) = if not (node.nodeType in {ELEMENT_NODE, TEXT_NODE}): return - if node.parentNode.nodeType != ELEMENT_NODE: - return - let elem = node.nodeAttr() - if elem.tagType in {TAG_SCRIPT, TAG_STYLE, TAG_NOSCRIPT}: - return - if elem.tagType == TAG_TITLE: - if node.isTextNode(): - buffer.title = node.getRawText() - return - else: discard - if elem.hidden: return + let style = node.getStyle() + if node.nodeType == ELEMENT_NODE: + if Element(node).tagType in {TAG_SCRIPT, TAG_STYLE, TAG_NOSCRIPT, TAG_TITLE}: + return + if style.hidden: return if not state.docenter: - if elem.centered: + if style.centered: state.centerqueue.add(node) - if node.lastNode() or elem.tagType == TAG_BR: + if node.lastNode(): state.docenter = true state.centerlen = 0 for node in state.centerqueue: @@ -331,7 +320,7 @@ proc inputLoop(attrs: TermAttributes, buffer: Buffer): bool = reshape = true redraw = true else: discard - if selectedElem.get().islink: + if selectedElem.get().getStyle().islink: let anchor = HtmlAnchorElement(buffer.selectedlink.ancestor(TAG_A)).href buffer.gotoLocation(parseUri(anchor)) return true diff --git a/dom.nim b/src/dom.nim index b018e0f2..9585456a 100644 --- a/dom.nim +++ b/src/dom.nim @@ -7,6 +7,7 @@ import tables import twtstr import twtio import enums +import style type EventTarget* = ref EventTargetObj @@ -71,22 +72,7 @@ type id*: string classList*: seq[string] attributes*: Table[string, Attr] - - margintop*: int - marginbottom*: int - marginleft*: int - marginright*: int - margin*: int - centered*: bool - display*: DisplayType - innerText*: string - bold*: bool - italic*: bool - underscore*: bool - islink*: bool - selected*: bool - numChildNodes*: int - indent*: int + style*: CSS2Properties HTMLElement* = ref HTMLElementObj HTMLElementObj = object of ElementObj @@ -155,8 +141,14 @@ func nodeAttr*(node: Node): HtmlElement = of ELEMENT_NODE: return HtmlElement(node) else: assert(false) +func getStyle*(node: Node): CSS2Properties = + case node.nodeType + of TEXT_NODE: return node.parentElement.style + of ELEMENT_NODE: return Element(node).style + else: assert(false) + func displayed*(node: Node): bool = - return node.rawtext.len > 0 and node.nodeAttr().display != DISPLAY_NONE + return node.rawtext.len > 0 and node.getStyle().display != DISPLAY_NONE func isTextNode*(node: Node): bool = return node.nodeType == TEXT_NODE @@ -215,7 +207,7 @@ func toInputSize*(str: string): int = if str.len == 0: return 20 for c in str: - if not c.isDigit: + if not c.isDigit(): return 20 return str.parseInt() @@ -242,9 +234,6 @@ func ancestor*(htmlNode: Node, tagType: TagType): HtmlElement = while result != nil and result.tagType != tagType: result = HtmlElement(result.parentElement) -func displayWhitespace*(htmlElem: HtmlElement): bool = - return htmlElem.display == DISPLAY_INLINE or htmlElem.display == DISPLAY_INLINE_BLOCK - proc getRawText*(htmlNode: Node): string = if htmlNode.isElemNode(): case HtmlElement(htmlNode).tagType @@ -255,11 +244,8 @@ proc getRawText*(htmlNode: Node): string = if htmlNode.parentElement != nil and htmlNode.parentElement.tagType != TAG_PRE: result = chardata.data.remove("\n") if unicode.strip(result).runeLen() > 0: - if htmlNode.nodeAttr().display != DISPLAY_INLINE: - if htmlNode.previousSibling == nil or htmlNode.previousSibling.nodeAttr().displayWhitespace(): - result = unicode.strip(result, true, false) - if htmlNode.nextSibling == nil or htmlNode.nextSibling.nodeAttr().displayWhitespace(): - result = unicode.strip(result, false, true) + if htmlNode.getStyle().display != DISPLAY_INLINE: + result = unicode.strip(result) else: result = "" else: @@ -278,22 +264,20 @@ func getFmtText*(htmlNode: Node): seq[string] = let chardata = CharacterData(htmlNode) result &= chardata.data if htmlNode.parentElement != nil: - #TODO - if HtmlElement(htmlNode.parentElement).islink: + if htmlNode.parentElement.style.islink: result = result.ansiFgColor(fgBlue).ansiReset() let anchor = htmlNode.ancestor(TAG_A) - if anchor != nil and anchor.selected: + if anchor != nil and anchor.style.selected: result = result.ansiStyle(styleUnderscore).ansiReset() if htmlNode.parentElement.tagType == TAG_OPTION: result = result.ansiFgColor(fgRed).ansiReset() - #TODO - if HtmlElement(htmlNode.parentElement).bold: + if htmlNode.parentElement.style.bold: result = result.ansiStyle(styleBright).ansiReset() - if HtmlElement(htmlNode.parentElement).italic: + if htmlNode.parentElement.style.italic: result = result.ansiStyle(styleItalic).ansiReset() - if HtmlElement(htmlNode.parentElement).underscore: + if htmlNode.parentElement.style.underscore: result = result.ansiStyle(styleUnderscore).ansiReset() else: assert(false, "Uhhhh I'm pretty sure we should have parent elements for text nodes?" & htmlNode.rawtext) @@ -331,6 +315,7 @@ func newHtmlElement*(tagType: TagType): HTMLElement = result.nodeType = ELEMENT_NODE result.tagType = tagType + result.style = new(CSS2Properties) func newAttr*(parent: Element, key: string, value: string): Attr = new(result) diff --git a/src/entity b/src/entity new file mode 100755 index 00000000..05f6705f --- /dev/null +++ b/src/entity Binary files differdiff --git a/entity.json b/src/entity.json index 557170b4..557170b4 100644 --- a/entity.json +++ b/src/entity.json diff --git a/src/entity.nim b/src/entity.nim new file mode 100644 index 00000000..3ebe9df0 --- /dev/null +++ b/src/entity.nim @@ -0,0 +1,18 @@ +import radixtree +import json +import tables +import strutils +import unicode +import twtstr + +proc genEntityMap(): RadixTree[string] = + let entity = staticRead"entity.json" + let entityJson = parseJson(entity) + var entityMap = newRadixTree[string]() + + for k, v in entityJson: + entityMap[k.substr(1)] = v{"characters"}.getStr() + + return entityMap + +const entityMap* = genEntityMap() diff --git a/enums.nim b/src/enums.nim index 5b765d01..a0606332 100644 --- a/enums.nim +++ b/src/enums.nim @@ -55,6 +55,16 @@ type TAG_DIALOG, TAG_MENU, TAG_SUMMARY, TAG_BLINK, TAG_CENTER, TAG_CONTENT, TAG_DIR, TAG_FONT, TAG_FRAME, TAG_NOFRAMES, TAG_FRAMESET, TAG_STRIKE, TAG_TT + CSSTokenType* = + enum + CSS_NO_TOKEN, CSS_IDENT_TOKEN, CSS_FUNCTION_TOKEN, CSS_AT_KEYWORD_TOKEN, + CSS_HASH_TOKEN, CSS_STRING_TOKEN, CSS_BAD_STRING_TOKEN, CSS_URL_TOKEN, + CSS_BAD_URL_TOKEN, CSS_DELIM_TOKEN, CSS_NUMBER_TOKEN, CSS_PERCENTAGE_TOKEN, + CSS_DIMENSION_TOKEN, CSS_WHITESPACE_TOKEN, CSS_CDO_TOKEN, CSS_CDC_TOKEN, + CSS_COLON_TOKEN, CSS_SEMICOLON_TOKEN, CSS_COMMA_TOKEN, CSS_OBRACKET_TOKEN, + CSS_CBRACKET_TOKEN, CSS_OPAREN_TOKEN, CSS_CPAREN_TOKEN, CSS_OBRACE_TOKEN, + CSS_CBRACE_TOKEN + const DisplayInlineTags* = { TAG_A, TAG_ABBR, TAG_B, TAG_BDO, TAG_BR, TAG_BUTTON, TAG_CITE, TAG_CODE, TAG_DEL, TAG_DFN, TAG_EM, TAG_FONT, TAG_I, TAG_IMG, TAG_INS, TAG_INPUT, diff --git a/main.nim b/src/main.nim index 677b812f..bc212ef0 100644 --- a/main.nim +++ b/src/main.nim @@ -61,5 +61,5 @@ proc main*() = buffer.renderHtml() lastUri = newUri -#waitFor loadPage("https://lite.duckduckgo.com/lite/?q=hello%20world") main() +#parseCSS(newFileStream("default.css", fmRead)) diff --git a/parser.nim b/src/parser.nim index 11c39c76..724d2b30 100644 --- a/parser.nim +++ b/src/parser.nim @@ -9,10 +9,11 @@ import enums import twtstr import dom import radixtree +import style +import entity type - ParseState = object - stream: Stream + HTMLParseState = object closed: bool parents: seq[Node] parsedNode: Node @@ -26,6 +27,9 @@ type parentNode: Node textNode: Text + CSSParseState = object + in_comment: bool + #func newHtmlElement(tagType: TagType, parentNode: Node): HtmlElement = # case tagType # of TAG_INPUT: result = new(HtmlInputElement) @@ -99,35 +103,6 @@ func inputSize*(str: string): int = return 20 return str.parseInt() -proc genEntityMap(): RadixTree[string] = - let entity = staticRead"entity.json" - let entityJson = parseJson(entity) - var entityMap = newRadixTree[string]() - - for k, v in entityJson: - entityMap[k.substr(1)] = v{"characters"}.getStr() - - return entityMap - -const entityMap = genEntityMap() - -func genHexCharMap(): seq[int] = - for i in 0..255: - case chr(i) - of '0'..'9': result &= i - ord('0') - of 'a'..'f': result &= i - ord('a') + 10 - of 'A'..'F': result &= i - ord('A') + 10 - else: result &= -1 - -func genDecCharMap(): seq[int] = - for i in 0..255: - case chr(i) - of '0'..'9': result &= i - ord('0') - else: result &= -1 - -const hexCharMap = genHexCharMap() -const decCharMap = genDecCharMap() - #w3m's getescapecmd and parse_tag, transpiled to nim. #(C) Copyright 1994-2002 by Akinori Ito #(C) Copyright 2002-2011 by Akinori Ito, Hironori Sakamoto, Fumitoshi Ukai @@ -152,22 +127,22 @@ proc getescapecmd(buf: string, at: var int): string = at = i return "" - num = hexCharMap[int(buf[i])] + num = hexValue(buf[i]) inc i - while i < buf.len and hexCharMap[int(buf[i])] != -1: + while i < buf.len and hexValue(buf[i]) != -1: num *= 0x10 - num += hexCharMap[int(buf[i])] + num += hexValue(buf[i]) inc i else: #dec if not isDigit(buf[i]): at = i return "" - num = decCharMap[int(buf[i])] + num = decValue(buf[i]) inc i while i < buf.len and isDigit(buf[i]): num *= 10 - num += decCharMap[int(buf[i])] + num += decValue(buf[i]) inc i if buf[i] == ';': @@ -177,15 +152,14 @@ proc getescapecmd(buf: string, at: var int): string = elif not isAlphaAscii(buf[i]): return "" - var n: uint16 = 0 + var n = 0 var s = "" while true: - let c = buf[i] - s &= c + s &= buf[i] if not entityMap.hasPrefix(s, n): break let pn = n - n = entityMap.getPrefix(s, n) + n = entityMap[s, n] if n != pn: s = "" inc i @@ -212,14 +186,14 @@ proc parse_tag(buf: string, at: var int): DOMParsedTag = if buf[at] == '/': inc at tag.open = false - skipBlanks(buf, at) + at = skipBlanks(buf, at) while at < buf.len and not buf[at].isWhitespace() and not (tag.open and buf[at] == '/') and buf[at] != '>': tagname &= buf[at].tolower() at += buf.runeLenAt(at) tag.tagid = tagType(tagname) - skipBlanks(buf, at) + at = skipBlanks(buf, at) while at < buf.len and buf[at] != '>': var value = "" @@ -228,10 +202,10 @@ proc parse_tag(buf: string, at: var int): DOMParsedTag = attrname &= buf[at].tolower() at += buf.runeLenAt(at) - skipBlanks(buf, at) + at = skipBlanks(buf, at) if buf[at] == '=': inc at - skipBlanks(buf, at) + at = skipBlanks(buf, at) if at < buf.len and (buf[at] == '"' or buf[at] == '\''): let startc = buf[at] inc at @@ -281,23 +255,23 @@ proc insertNode(parent: Node, node: Node) = elif parent.nodeType == DOCUMENT_NODE: node.ownerDocument = Document(parent) -proc processDocumentStartNode(state: var ParseState, newNode: Node) = +proc processDocumentStartNode(state: var HTMLParseState, newNode: Node) = insertNode(state.parentNode, newNode) state.parentNode = newNode -proc processDocumentEndNode(state: var ParseState) = +proc processDocumentEndNode(state: var HTMLParseState) = if state.parentNode == nil or state.parentNode.parentNode == nil: return state.parentNode = state.parentNode.parentNode -proc processDocumentText(state: var ParseState) = +proc processDocumentText(state: var HTMLParseState) = if state.textNode == nil: state.textNode = newText() processDocumentStartNode(state, state.textNode) processDocumentEndNode(state) -proc processDocumentStartElement(state: var ParseState, element: Element, tag: DOMParsedTag) = +proc processDocumentStartElement(state: var HTMLParseState, element: Element, tag: DOMParsedTag) = for k, v in tag.attrs: element.attributes[k] = element.newAttr(k, v) @@ -350,7 +324,7 @@ proc processDocumentStartElement(state: var ParseState, element: Element, tag: D if element.tagType in VoidTagTypes: processDocumentEndNode(state) -proc processDocumentEndElement(state: var ParseState, tag: DOMParsedTag) = +proc processDocumentEndElement(state: var HTMLParseState, tag: DOMParsedTag) = if tag.tagid in VoidTagTypes: return if state.parentNode.nodeType == ELEMENT_NODE: @@ -359,7 +333,7 @@ proc processDocumentEndElement(state: var ParseState, tag: DOMParsedTag) = processDocumentEndNode(state) -proc processDocumentTag(state: var ParseState, tag: DOMParsedTag) = +proc processDocumentTag(state: var HTMLParseState, tag: DOMParsedTag) = if state.in_script: if tag.tagid == TAG_SCRIPT: state.in_script = false @@ -382,9 +356,8 @@ proc processDocumentTag(state: var ParseState, tag: DOMParsedTag) = processDocumentStartElement(state, newHtmlElement(tag.tagid), tag) else: processDocumentEndElement(state, tag) - #XXX PROCDOCCASE stuff... good lord I'll never finish this thing -proc processDocumentPart(state: var ParseState, buf: string) = +proc processDocumentPart(state: var HTMLParseState, buf: string) = var at = 0 var max = 0 var was_script = false @@ -465,7 +438,7 @@ proc processDocumentPart(state: var ParseState, buf: string) = proc parseHtml*(inputStream: Stream): Document = let document = newDocument() - var state = ParseState(stream: inputStream) + var state = HTMLParseState() state.parentNode = document var till_when = false @@ -474,8 +447,6 @@ proc parseHtml*(inputStream: Stream): Document = var lineBuf: string while not inputStream.atEnd(): lineBuf = inputStream.readLine() - if lineBuf.len == 0: - break buf &= lineBuf var at = 0 @@ -496,3 +467,89 @@ proc parseHtml*(inputStream: Stream): Document = inputStream.close() return document + +proc consumeCSSString(state: var CSSParseState, line: seq[Rune], at: var int): CSSToken = + var s: seq[Rune] + let ending = line[at] + inc at + if at >= line.len: + return CSSToken(tokenType: CSS_STRING_TOKEN, value: s) + + while line[at] != ending: + inc at + + if at >= line.len: + return CSSToken(tokenType: CSS_STRING_TOKEN, value: s) + + s &= line[at] + + case line[at] + of Rune('\n'): + return CSSToken(tokenType: CSS_BAD_STRING_TOKEN) + of Rune('\\'): + inc at + if at > line.len: + break + var num = hexValue(line[at]) + if num != -1: + let ca = at + inc at + while at < line.len and hexValue(line[at]) != -1 and ca - at <= 5: + num *= 0x10 + num += hexValue(line[at]) + inc at + if num == 0 or num > 0x10FFFF or num in {0xD800..0xDFFF}: + s &= Rune(0xFFFD) + else: + s &= Rune(num) + else: discard + +proc consumeCSSNumberSign(state: var CSSParseState, line: seq[Rune], at: var int): CSSToken = + inc at + if at < line.len: + if isNameCodePoint(line[at]): + discard + else: + discard + + +proc consumeCSSToken(state: var CSSParseState, line: seq[Rune], at: var int): CSSToken = + case line[at] + of Rune('\n'), Rune('\t'), Rune(' '): + while at < line.len and line[at].isWhitespace(): + inc at + return CSSToken(tokenType: CSS_WHITESPACE_TOKEN) + of Rune('"'): + return consumeCSSString(state, line, at) + of Rune('#'): + return consumeCSSNumberSign(state, line, at) + else: inc at + +proc tokenizeCSS*(inputStream: Stream): seq[CSSToken] = + var line: seq[Rune] + var state: CSSParseState + while not inputStream.atEnd(): + line = inputStream.readLine().toRunes() + var cline: seq[Rune] = @[] + var lfc = false + for r in line: + case r + of Rune('\r'), Rune('\f'), Rune('\n'): + lfc = true + of Rune(0), Rune(0xD800)..Rune(0xDFFF): + cline &= Rune(0xFFFD) + else: + if lfc: + cline &= Rune('\n') + cline &= r + + cline &= Rune('\n') + var lat = 0 + while lat < cline.len: + result.add(consumeCSSToken(state, cline, lat)) + + inputStream.close() + +proc parseCSS*(inputStream: Stream) = + for t in tokenizeCSS(inputStream): + eprint t.tokenType diff --git a/radixtree.nim b/src/radixtree.nim index 7933f7a3..b04e1c22 100644 --- a/radixtree.nim +++ b/src/radixtree.nim @@ -1,10 +1,14 @@ +# Radix tree implementation, with some caveats: +# * insertion takes forever, so try to insert only during compile-time +# * it isn't that much faster than a hash table, even when used for e.g. parsing + import tables import strutils import json type RadixNode[T] = object - children*: Table[string, uint16] + children*: Table[string, int] case leaf*: bool of true: value*: T of false: discard @@ -16,8 +20,8 @@ func newRadixTree*[T](): RadixTree[T] = result.nodes.add(RadixNode[T](leaf: false)) proc `[]=`*[T](tree: var RadixTree[T], key: string, value: T) = - var n: uint16 = 0 - var p: uint16 = 0 + var n = 0 + var p = 0 var i = 0 var j = 0 var s = "" @@ -26,14 +30,14 @@ proc `[]=`*[T](tree: var RadixTree[T], key: string, value: T) = # find last matching node while i < key.len: s &= key[i] + inc i if s in tree.nodes[n].children: p = n n = tree.nodes[n].children[s] t &= s - j += s.len + j = i nodeKey = s s = "" - inc i for k in tree.nodes[n].children.keys: if s.len > 0 and k.startsWith(s[0]): @@ -46,7 +50,7 @@ proc `[]=`*[T](tree: var RadixTree[T], key: string, value: T) = # if first node, just add normally if n == 0: tree.nodes.add(RadixNode[T](leaf: true, value: value)) - tree.nodes[n].children[key] = uint16(tree.nodes.len - 1) + tree.nodes[n].children[key] = int(tree.nodes.len - 1) else: i = 0 var conflict = false @@ -66,10 +70,10 @@ proc `[]=`*[T](tree: var RadixTree[T], key: string, value: T) = # * remove old from parent assert(i != 0) - tree.nodes[p].children[key.substr(j, i - 1)] = uint16(tree.nodes.len) + tree.nodes[p].children[key.substr(j, i - 1)] = int(tree.nodes.len) tree.nodes.add(RadixNode[T](leaf: false)) tree.nodes[^1].children[t.substr(i)] = n - tree.nodes[^1].children[key.substr(i)] = uint16(tree.nodes.len) + tree.nodes[^1].children[key.substr(i)] = int(tree.nodes.len) tree.nodes.add(RadixNode[T](leaf: true, value: value)) tree.nodes[p].children.del(nodeKey) else: # new is either substr of old or old is substr of new @@ -80,51 +84,45 @@ proc `[]=`*[T](tree: var RadixTree[T], key: string, value: T) = tree.nodes[n].children = children elif i == j: # new is longer than the old, so add child to old - tree.nodes[n].children[key.substr(i)] = uint16(tree.nodes.len) + tree.nodes[n].children[key.substr(i)] = int(tree.nodes.len) tree.nodes.add(RadixNode[T](leaf: true, value: value)) elif i > 0: # new is shorter than old, so: # * add new to parent # * add old to new # * remove old from parent - tree.nodes[p].children[key.substr(j, i - 1)] = uint16(tree.nodes.len) + tree.nodes[p].children[key.substr(j, i - 1)] = int(tree.nodes.len) tree.nodes.add(RadixNode[T](leaf: true, value: value)) tree.nodes[^1].children[t.substr(i)] = n tree.nodes[p].children.del(nodeKey) -func getPrefix*[T](tree: RadixTree[T], prefix: string, at: uint16 = 0): uint16 = - var s = "" - var t = "" - var n = at - var i = 0 - while t.len < prefix.len: - s &= prefix[i] - t &= prefix[i] - if s in tree.nodes[n].children: - n = tree.nodes[n].children[s] - s = "" - inc i - - return n +func `[]`*[T](tree: RadixTree[T], key: string, at: int = 0): int = + return tree.nodes[at].children.getOrDefault(key, at) -func hasPrefix*[T](tree: RadixTree[T], prefix: string, at: uint16 = 0): bool = - var s = "" - var t = "" +func hasPrefix*[T](tree: RadixTree[T], prefix: string, at: int = 0): bool = var n = at var i = 0 + var j = 0 + var s = "" while i < prefix.len: s &= prefix[i] + inc i if s in tree.nodes[n].children: n = tree.nodes[n].children[s] - t &= s - s = "" - inc i + j = i - if t.len == prefix.len: + if j == prefix.len: return true for k in tree.nodes[n].children.keys: - if k.startsWith(prefix.substr(t.len)): + if prefix.len - j < k.len and k[0] == prefix[j]: + i = 1 + inc j + while j < prefix.len: + inc i + inc j + if k[i] != k[j]: + return false return true return false diff --git a/src/style.nim b/src/style.nim new file mode 100644 index 00000000..7de1e767 --- /dev/null +++ b/src/style.nim @@ -0,0 +1,42 @@ +import enums +import unicode + +type + CSS2Properties* = ref object + rawtext*: string + fmttext*: seq[string] + x*: int + y*: int + ex*: int + ey*: int + width*: int + height*: int + hidden*: bool + before*: CSS2Properties + after*: CSS2Properties + margintop*: int + marginbottom*: int + marginleft*: int + marginright*: int + margin*: int + centered*: bool + display*: DisplayType + bold*: bool + italic*: bool + underscore*: bool + islink*: bool + selected*: bool + indent*: int + + CSSToken* = object + case tokenType*: CSSTokenType + of CSS_IDENT_TOKEN, CSS_FUNCTION_TOKEN, CSS_AT_KEYWORD_TOKEN, + CSS_HASH_TOKEN, CSS_STRING_TOKEN, CSS_URL_TOKEN: + value*: seq[Rune] + tflaga*: bool #id / unrestricted + of CSS_DELIM_TOKEN: + rvalue*: Rune + of CSS_NUMBER_TOKEN, CSS_PERCENTAGE_TOKEN, CSS_DIMENSION_TOKEN: + ivalue*: int + tflagb*: bool #integer / number + else: discard diff --git a/termattrs.nim b/src/termattrs.nim index d49800ae..d49800ae 100644 --- a/termattrs.nim +++ b/src/termattrs.nim diff --git a/twtio.nim b/src/twtio.nim index db811f40..0fdbed7c 100644 --- a/twtio.nim +++ b/src/twtio.nim @@ -48,7 +48,7 @@ proc readLine*(prompt: string, current: var string, termwidth: int): bool = var feedNext = false var escNext = false var comp = false - var compi: uint16 = 0 + var compi = 0 var compa = 0 var comps = "" var cursor = news.len @@ -141,7 +141,7 @@ proc readLine*(prompt: string, current: var string, termwidth: int): bool = feedNext = true elif comp: comps &= c - let n = composeRemap.getPrefix(comps, compi) + let n = composeRemap[comps, compi] if n != compi: compi = n compa += comps.len diff --git a/twtstr.nim b/src/twtstr.nim index 42a64333..52db36cf 100644 --- a/twtstr.nim +++ b/src/twtstr.nim @@ -48,25 +48,36 @@ func remove*(str: string, c: string): string = if rem != rune: result &= $rune -const ControlChars = {chr(0x00)..chr(0x1F), chr(0x7F)} - -const Whitespace = { ' ', '\n', '\r', '\t' } - func isWhitespace*(c: char): bool = - return c in Whitespace + case c + of ' ', '\n', '\r', '\t', '\f': return true + else: return false func isControlChar*(c: char): bool = - return c in ControlChars + case c + of chr(0x00)..chr(0x1F): return true + of chr(0x7F): return true + else: return false func isControlChar*(r: Rune): bool = - return int(r) <= int(high(char)) and char(r) in ControlChars + case r + of Rune(0x00)..Rune(0x1F): return true + of Rune(0x7F): return true + else: return false + +func genControlCharMap*(): string = + for c in low(char)..high(char): + if c >= 'a': + result &= char(int(c) - int('a') + 1) + elif c == '?': + result &= char(127) + else: + result &= char(0) + +const controlCharMap = genControlCharMap() func getControlChar*(c: char): char = - if c >= 'a': - return char(int(c) - int('a') + 1) - elif c == '?': - return char(127) - assert(false) + return controlCharMap[int(c)] func getControlLetter*(c: char): char = if int(c) <= 0x1F: @@ -112,9 +123,57 @@ const breakWord = [ Rune('?'), Rune('.'), Rune(';') ] +func genHexCharMap(): seq[int] = + for i in 0..255: + case chr(i) + of '0'..'9': result &= i - ord('0') + of 'a'..'f': result &= i - ord('a') + 10 + of 'A'..'F': result &= i - ord('A') + 10 + else: result &= -1 + +func genDecCharMap(): seq[int] = + for i in 0..255: + case chr(i) + of '0'..'9': result &= i - ord('0') + else: result &= -1 + +const hexCharMap = genHexCharMap() +const decCharMap = genDecCharMap() + +func hexValue*(c: char): int = + return hexCharMap[int(c)] + +func decValue*(c: char): int = + return decCharMap[int(c)] + +func isAscii*(r: Rune): bool = + return int(r) <= int(high(char)) + +func hexValue*(r: Rune): int = + if isAscii(r): + return hexValue(char(r)) + return -1 + +func decValue*(r: Rune): int = + if isAscii(r): + return decValue(char(r)) + return -1 + func breaksWord*(r: Rune): bool = return r in breakWord +func isAlphaAscii*(r: Rune): bool = + return isAscii(r) and isAlphaAscii(char(r)) + +func isDigitAscii*(r: Rune): bool = + return isAscii(r) and isDigit(char(r)) + +func isNameStartCodePoint*(r: Rune): bool = + return not isAscii(r) or r == Rune('_') or isAlphaAscii(r) + +func isNameCodePoint*(r: Rune): bool = + return isNameStartCodePoint(r) or isDigitAscii(r) or r == Rune('-') + func substr*(s: seq[Rune], i: int, j: int): seq[Rune] = if s.len == 0: return @[] @@ -346,7 +405,19 @@ func mk_wcswidth_cjk(s: string): int = result += mk_wcwidth_cjk(r) return result +func skipBlanks*(buf: string, at: int): int = + result = at + while result < buf.len and buf[result].isWhitespace(): + inc result + +iterator split*(s: seq[Rune], sep: Rune): seq[Rune] = + var i = 0 + var prev = 0 + while i < s.len: + if s[i] == sep: + yield s.substr(prev, i) + prev = i + inc i -proc skipBlanks*(buf: string, at: var int) = - while at < buf.len and buf[at].isWhitespace(): - inc at + if prev < i: + yield s.substr(prev, i) diff --git a/style.nim b/style.nim deleted file mode 100644 index e69de29b..00000000 --- a/style.nim +++ /dev/null |