From fa4560f63e38886b2b7541642b9aa1656dc40508 Mon Sep 17 00:00:00 2001 From: bptato Date: Sat, 7 Aug 2021 12:16:29 +0200 Subject: Reorganize imports --- makefile | 6 +- src/config.nim | 164 ----------------- src/config/config.nim | 165 +++++++++++++++++ src/css/box.nim | 5 +- src/css/cssparser.nim | 6 +- src/css/selector.nim | 7 +- src/css/style.nim | 10 +- src/html/dom.nim | 19 +- src/html/entity.nim | 4 +- src/html/htmlparser.nim | 459 ------------------------------------------------ src/html/parser.nim | 457 +++++++++++++++++++++++++++++++++++++++++++++++ src/io/buffer.nim | 23 +-- src/io/lineedit.nim | 10 +- src/main.nim | 8 +- src/utils/twtstr.nim | 2 +- 15 files changed, 660 insertions(+), 685 deletions(-) delete mode 100644 src/config.nim create mode 100644 src/config/config.nim delete mode 100644 src/html/htmlparser.nim create mode 100644 src/html/parser.nim diff --git a/makefile b/makefile index 1686ac64..92ab5048 100644 --- a/makefile +++ b/makefile @@ -1,12 +1,12 @@ NIMC = nim compile -FLAGS = -d:ssl -o:twt +FLAGS = -d:ssl -o:twt -p:src/ -p:. --import:utils/eprint FILES = src/main.nim debug: $(NIMC) $(FLAGS) $(FILES) release: - $(NIMC) $(FLAGS) -d:release $(FILES) + $(NIMC) $(FLAGS) -d:release -d:strip $(FILES) danger: - $(NIMC) $(FLAGS) -d:danger $(FILES) + $(NIMC) $(FLAGS) -d:danger -d:strip $(FILES) clean: rm ./twt diff --git a/src/config.nim b/src/config.nim deleted file mode 100644 index 1d2e2a46..00000000 --- a/src/config.nim +++ /dev/null @@ -1,164 +0,0 @@ -import tables -import os -import strutils - -import utils/twtstr -import utils/radixtree - -type - TwtAction* = - enum - NO_ACTION, - ACTION_FEED_NEXT, - ACTION_QUIT, - ACTION_CURSOR_UP, ACTION_CURSOR_DOWN, ACTION_CURSOR_LEFT, ACTION_CURSOR_RIGHT, - ACTION_CURSOR_LINEEND, ACTION_CURSOR_LINEBEGIN, - ACTION_CURSOR_NEXT_WORD, ACTION_CURSOR_PREV_WORD, - ACTION_CURSOR_NEXT_NODE, ACTION_CURSOR_PREV_NODE, - ACTION_CURSOR_NEXT_LINK, ACTION_CURSOR_PREV_LINK, - ACTION_PAGE_DOWN, ACTION_PAGE_UP, ACTION_PAGE_LEFT, ACTION_PAGE_RIGHT, - ACTION_HALF_PAGE_DOWN, ACTION_HALF_PAGE_UP, - ACTION_SCROLL_DOWN, ACTION_SCROLL_UP, ACTION_SCROLL_LEFT, ACTION_SCROLL_RIGHT, - ACTION_CLICK, - ACTION_CHANGE_LOCATION, - ACTION_RELOAD, ACTION_RESHAPE, ACTION_REDRAW, - ACTION_CURSOR_FIRST_LINE, ACTION_CURSOR_LAST_LINE, - ACTION_CURSOR_TOP, ACTION_CURSOR_MIDDLE, ACTION_CURSOR_BOTTOM, - ACTION_CENTER_LINE, ACTION_LINE_INFO, - ACTION_LINED_SUBMIT, ACTION_LINED_CANCEL, - ACTION_LINED_BACKSPACE, ACTION_LINED_DELETE, - ACTION_LINED_CLEAR, ACTION_LINED_KILL, ACTION_LINED_KILL_WORD, - ACTION_LINED_BACK, ACTION_LINED_FORWARD, - ACTION_LINED_PREV_WORD, ACTION_LINED_NEXT_WORD, - ACTION_LINED_BEGIN, ACTION_LINED_END, - ACTION_LINED_COMPOSE_TOGGLE, ACTION_LINED_ESC - - ActionMap = Table[string, TwtAction] - StaticConfig = object - nmap: ActionMap - lemap: ActionMap - cmap: Table[string, string] - - Config = object - nmap*: ActionMap - lemap*: ActionMap - cmap*: RadixNode[string] - -func getConfig(s: StaticConfig): Config = - return Config(nmap: s.nmap, lemap: s.lemap, cmap: s.cmap.toRadixTree()) - -func getRealKey(key: string): string = - var realk: string - var currchar: char - var control = 0 - var meta = 0 - var skip = false - for c in key: - if c == '\\': - skip = true - elif skip: - realk &= c - skip = false - elif c == 'M': - inc meta - currchar = c - elif c == 'C': - inc control - currchar = c - elif c == '-' and control == 1: - inc control - elif c == '-' and meta == 1: - inc meta - elif meta == 1: - realk &= 'M' & c - meta = 0 - elif meta == 2: - realk &= '\e' - realk &= c - elif control == 1: - realk &= 'C' & c - control = 0 - elif control == 2: - realk &= getControlChar(c) - control = 0 - else: - realk &= c - if control == 1: - realk &= 'C' - if meta == 1: - realk &= 'M' - return realk - -func constructActionTable*(origTable: ActionMap): ActionMap = - var newTable: ActionMap - var strs: seq[string] - for k in origTable.keys: - let realk = getRealKey(k) - var teststr = "" - for c in realk: - teststr &= c - strs.add(teststr) - - for k, v in origTable: - let realk = getRealKey(k) - var teststr = "" - for c in realk: - teststr &= c - if strs.contains(teststr): - newTable[teststr] = ACTION_FEED_NEXT - newTable[realk] = v - return newTable - -proc parseConfigLine[T](line: string, config: var T) = - if line.len == 0 or line[0] == '#': - return - let cmd = line.split(' ') - if cmd.len == 3: - if cmd[0] == "nmap": - config.nmap[getRealKey(cmd[1])] = parseEnum[TwtAction]("ACTION_" & cmd[2]) - elif cmd[0] == "lemap": - config.lemap[getRealKey(cmd[1])] = parseEnum[TwtAction]("ACTION_" & cmd[2]) - elif cmd[0] == "comp": - config.cmap[getRealKey(cmd[1])] = cmd[2] - -proc staticReadConfig(): StaticConfig = - let default = staticRead"../res/config" - for line in default.split('\n'): - parseConfigLine(line, result) - - result.nmap = constructActionTable(result.nmap) - result.lemap = constructActionTable(result.lemap) - -const defaultConfig = staticReadConfig() -var gconfig* = getConfig(defaultConfig) - -proc readConfig(filename: string) = - var f: File - let status = f.open(filename, fmRead) - var nmap: ActionMap - var lemap: ActionMap - var compose: Table[string, string] - if status: - var line: TaintedString - while f.readLine(line): - parseConfigLine(line, gconfig) - - gconfig.nmap = constructActionTable(nmap) - gconfig.lemap = constructActionTable(lemap) - gconfig.cmap = compose.toRadixTree() - -proc readConfig*() = - when defined(debug): - readConfig("res" / "config") - readConfig(getConfigDir() / "twt" / "config") - -proc getNormalAction*(s: string): TwtAction = - if gconfig.nmap.hasKey(s): - return gconfig.nmap[s] - return NO_ACTION - -proc getLinedAction*(s: string): TwtAction = - if gconfig.lemap.hasKey(s): - return gconfig.lemap[s] - return NO_ACTION - diff --git a/src/config/config.nim b/src/config/config.nim new file mode 100644 index 00000000..5a49bb34 --- /dev/null +++ b/src/config/config.nim @@ -0,0 +1,165 @@ +import tables +import os +import strutils + +import utils/twtstr +import utils/radixtree + +type + TwtAction* = + enum + NO_ACTION, + ACTION_FEED_NEXT, + ACTION_QUIT, + ACTION_CURSOR_UP, ACTION_CURSOR_DOWN, ACTION_CURSOR_LEFT, ACTION_CURSOR_RIGHT, + ACTION_CURSOR_LINEEND, ACTION_CURSOR_LINEBEGIN, + ACTION_CURSOR_NEXT_WORD, ACTION_CURSOR_PREV_WORD, + ACTION_CURSOR_NEXT_NODE, ACTION_CURSOR_PREV_NODE, + ACTION_CURSOR_NEXT_LINK, ACTION_CURSOR_PREV_LINK, + ACTION_PAGE_DOWN, ACTION_PAGE_UP, ACTION_PAGE_LEFT, ACTION_PAGE_RIGHT, + ACTION_HALF_PAGE_DOWN, ACTION_HALF_PAGE_UP, + ACTION_SCROLL_DOWN, ACTION_SCROLL_UP, ACTION_SCROLL_LEFT, ACTION_SCROLL_RIGHT, + ACTION_CLICK, + ACTION_CHANGE_LOCATION, + ACTION_RELOAD, ACTION_RESHAPE, ACTION_REDRAW, + ACTION_CURSOR_FIRST_LINE, ACTION_CURSOR_LAST_LINE, + ACTION_CURSOR_TOP, ACTION_CURSOR_MIDDLE, ACTION_CURSOR_BOTTOM, + ACTION_CENTER_LINE, ACTION_LINE_INFO, + ACTION_LINED_SUBMIT, ACTION_LINED_CANCEL, + ACTION_LINED_BACKSPACE, ACTION_LINED_DELETE, + ACTION_LINED_CLEAR, ACTION_LINED_KILL, ACTION_LINED_KILL_WORD, + ACTION_LINED_BACK, ACTION_LINED_FORWARD, + ACTION_LINED_PREV_WORD, ACTION_LINED_NEXT_WORD, + ACTION_LINED_BEGIN, ACTION_LINED_END, + ACTION_LINED_COMPOSE_TOGGLE, ACTION_LINED_ESC + + ActionMap = Table[string, TwtAction] + StaticConfig = object + nmap: ActionMap + lemap: ActionMap + cmap: Table[string, string] + + Config = object + nmap*: ActionMap + lemap*: ActionMap + cmap*: RadixNode[string] + +func getConfig(s: StaticConfig): Config = + return Config(nmap: s.nmap, lemap: s.lemap, cmap: s.cmap.toRadixTree()) + +func getRealKey(key: string): string = + var realk: string + var currchar: char + var control = 0 + var meta = 0 + var skip = false + for c in key: + if c == '\\': + skip = true + elif skip: + realk &= c + skip = false + elif c == 'M': + inc meta + currchar = c + elif c == 'C': + inc control + currchar = c + elif c == '-' and control == 1: + inc control + elif c == '-' and meta == 1: + inc meta + elif meta == 1: + realk &= 'M' & c + meta = 0 + elif meta == 2: + realk &= '\e' + realk &= c + meta = 0 + elif control == 1: + realk &= 'C' & c + control = 0 + elif control == 2: + realk &= getControlChar(c) + control = 0 + else: + realk &= c + if control == 1: + realk &= 'C' + if meta == 1: + realk &= 'M' + return realk + +func constructActionTable*(origTable: ActionMap): ActionMap = + var newTable: ActionMap + var strs: seq[string] + for k in origTable.keys: + let realk = getRealKey(k) + var teststr = "" + for c in realk: + teststr &= c + strs.add(teststr) + + for k, v in origTable: + let realk = getRealKey(k) + var teststr = "" + for c in realk: + teststr &= c + if strs.contains(teststr): + newTable[teststr] = ACTION_FEED_NEXT + newTable[realk] = v + return newTable + +proc parseConfigLine[T](line: string, config: var T) = + if line.len == 0 or line[0] == '#': + return + let cmd = line.split(' ') + if cmd.len == 3: + if cmd[0] == "nmap": + config.nmap[getRealKey(cmd[1])] = parseEnum[TwtAction]("ACTION_" & cmd[2]) + elif cmd[0] == "lemap": + config.lemap[getRealKey(cmd[1])] = parseEnum[TwtAction]("ACTION_" & cmd[2]) + elif cmd[0] == "comp": + config.cmap[getRealKey(cmd[1])] = cmd[2] + +proc staticReadConfig(): StaticConfig = + let default = staticRead"res/config" + for line in default.split('\n'): + parseConfigLine(line, result) + + result.nmap = constructActionTable(result.nmap) + result.lemap = constructActionTable(result.lemap) + +const defaultConfig = staticReadConfig() +var gconfig* = getConfig(defaultConfig) + +proc readConfig(filename: string) = + var f: File + let status = f.open(filename, fmRead) + var nmap: ActionMap + var lemap: ActionMap + var compose: Table[string, string] + if status: + var line: TaintedString + while f.readLine(line): + parseConfigLine(line, gconfig) + + gconfig.nmap = constructActionTable(nmap) + gconfig.lemap = constructActionTable(lemap) + gconfig.cmap = compose.toRadixTree() + +proc readConfig*() = + when defined(debug): + readConfig("res" / "config") + readConfig(getConfigDir() / "twt" / "config") + +proc getNormalAction*(s: string): TwtAction = + if gconfig.nmap.hasKey(s): + return gconfig.nmap[s] + return NO_ACTION + +proc getLinedAction*(s: string): TwtAction = + if gconfig.lemap.hasKey(s): + return gconfig.lemap[s] + return NO_ACTION + diff --git a/src/css/box.nim b/src/css/box.nim index c3f0280d..c2974215 100644 --- a/src/css/box.nim +++ b/src/css/box.nim @@ -1,8 +1,7 @@ import unicode -import ../types/enums - -import ../utils/twtstr +import types/enums +import utils/twtstr type CSSRect* = object diff --git a/src/css/cssparser.nim b/src/css/cssparser.nim index 90b8be46..afbdf413 100644 --- a/src/css/cssparser.nim +++ b/src/css/cssparser.nim @@ -8,10 +8,8 @@ import math import options import sugar -import ../utils/twtstr -import ../utils/eprint - -import ../types/enums +import utils/twtstr +import types/enums type CSSTokenizerState = object diff --git a/src/css/selector.nim b/src/css/selector.nim index a474bd8f..840086d9 100644 --- a/src/css/selector.nim +++ b/src/css/selector.nim @@ -1,9 +1,8 @@ import unicode -import ../types/enums -import ../types/tagtypes - -import ./cssparser +import types/enums +import types/tagtypes +import css/cssparser type SelectorType* = enum diff --git a/src/css/style.nim b/src/css/style.nim index 3b70263d..c1820b12 100644 --- a/src/css/style.nim +++ b/src/css/style.nim @@ -2,13 +2,9 @@ import unicode import terminal import tables - -import ../utils/twtstr -import ../utils/eprint - -import ../types/enums - -import ./cssparser +import utils/twtstr +import types/enums +import css/cssparser type CSSLength* = object diff --git a/src/html/dom.nim b/src/html/dom.nim index 96bbd8a1..2279725f 100644 --- a/src/html/dom.nim +++ b/src/html/dom.nim @@ -8,17 +8,14 @@ import sequtils import sugar import algorithm -import ../css/style -import ../css/cssparser -import ../css/selector -import ../css/box - -import ../types/enums - -import ../utils/twtstr -import ../utils/eprint - -const css = staticRead"../../res/default.css" +import css/style +import css/cssparser +import css/selector +import css/box +import types/enums +import utils/twtstr + +const css = staticRead"res/default.css" let stylesheet = parseCSS(newStringStream(css)) type diff --git a/src/html/entity.nim b/src/html/entity.nim index 72c5c452..775ea94f 100644 --- a/src/html/entity.nim +++ b/src/html/entity.nim @@ -1,8 +1,8 @@ import json -import ../utils/radixtree +import utils/radixtree -const entity = staticRead"../../res/entity.json" +const entity = staticRead"res/entity.json" proc genEntityMap(data: seq[tuple[a: string, b: string]]): RadixNode[string] = result = newRadixTree[string]() for pair in data: diff --git a/src/html/htmlparser.nim b/src/html/htmlparser.nim deleted file mode 100644 index c060d666..00000000 --- a/src/html/htmlparser.nim +++ /dev/null @@ -1,459 +0,0 @@ -import streams -import unicode -import strutils -import tables -import json - -import ../types/enums -import ../types/tagtypes - -import ../utils/twtstr -import ../utils/radixtree - -import dom -import entity - -type - HTMLParseState = object - closed: bool - parents: seq[Node] - parsedNode: Node - a: string - b: string - attrs: seq[string] - in_comment: bool - in_script: bool - in_style: bool - in_noscript: bool - in_body: bool - elementNode: Element - textNode: Text - commentNode: Comment - -func inputSize*(str: string): int = - if str.len == 0: - return 20 - for c in str: - if not c.isDigit: - return 20 - return str.parseInt() - -#w3m's getescapecmd and parse_tag, transpiled to nim and heavily modified. -#(C) Copyright 1994-2002 by Akinori Ito -#(C) Copyright 2002-2011 by Akinori Ito, Hironori Sakamoto, Fumitoshi Ukai -# -#Use, modification and redistribution of this software is hereby granted, -#provided that this entire copyright notice is included on any copies of -#this software and applications and derivations thereof. -# -#This software is provided on an "as is" basis, without warranty of any -#kind, either expressed or implied, as to any matter including, but not -#limited to warranty of fitness of purpose, or merchantability, or -#results obtained from use of this software. -proc getescapecmd(buf: string, at: var int): string = - var i = at - - if buf[i] == '#': #num - inc i - var num: int - if buf[i].tolower() == 'x': #hex - inc i - if not isdigit(buf[i]): - at = i - return "" - - num = hexValue(buf[i]) - inc i - while i < buf.len and hexValue(buf[i]) != -1: - num *= 0x10 - num += hexValue(buf[i]) - inc i - else: #dec - if not isDigit(buf[i]): - at = i - return "" - - num = decValue(buf[i]) - inc i - while i < buf.len and isDigit(buf[i]): - num *= 10 - num += decValue(buf[i]) - inc i - - if buf[i] == ';': - inc i - at = i - return $(Rune(num)) - elif not isAlphaAscii(buf[i]): - return "" - - var n = entityMap - var s = "" - while true: - s &= buf[i] - if not entityMap.hasPrefix(s, n): - break - let pn = n - n = n{s} - if n != pn: - s = "" - inc i - - if n.leaf: - at = i - return n.value - - return "" - -type - DOMParsedTag = object - tagid: TagType - attrs: Table[string, string] - open: bool - -proc parse_tag(buf: string, at: var int): DOMParsedTag = - var tag = DOMParsedTag() - tag.open = true - - #Parse tag name - var tagname = "" - inc at - if buf[at] == '/': - inc at - tag.open = false - at = skipBlanks(buf, at) - - while at < buf.len and not buf[at].isWhitespace() and not (tag.open and buf[at] == '/') and buf[at] != '>': - tagname &= buf[at].tolower() - at += buf.runeLenAt(at) - - tag.tagid = tagType(tagname) - at = skipBlanks(buf, at) - - while at < buf.len and buf[at] != '>': - var value = "" - var attrname = "" - while at < buf.len and buf[at] != '=' and not buf[at].isWhitespace() and buf[at] != '>': - attrname &= buf[at].tolower() - at += buf.runeLenAt(at) - - at = skipBlanks(buf, at) - if buf[at] == '=': - inc at - at = skipBlanks(buf, at) - if at < buf.len and (buf[at] == '"' or buf[at] == '\''): - let startc = buf[at] - inc at - while at < buf.len and buf[at] != startc: - if buf[at + 1] == '&': - inc at - value &= getescapecmd(buf, at) - else: - var r: Rune - fastRuneAt(buf, at, r) - value &= r - if at < buf.len: - inc at - elif at < buf.len: - while at < buf.len and not buf[at].isWhitespace() and buf[at] != '>': - value &= buf[at] - at += buf.runeLenAt(at) - - if attrname.len > 0: - tag.attrs[attrname] = value - - while at < buf.len and buf[at] != '>': - at += buf.runeLenAt(at) - - if at < buf.len and buf[at] == '>': - inc at - return tag - -proc insertNode(parent: Node, node: Node) = - parent.childNodes.add(node) - - if parent.childNodes.len > 1: - let prevSibling = parent.childNodes[^1] - prevSibling.nextSibling = node - node.previousSibling = prevSibling - - node.parentNode = parent - if parent.nodeType == ELEMENT_NODE: - node.parentElement = (Element)parent - - if parent.ownerDocument != nil: - node.ownerDocument = parent.ownerDocument - elif parent.nodeType == DOCUMENT_NODE: - node.ownerDocument = (Document)parent - - if node.nodeType == ELEMENT_NODE: - parent.children.add((Element)node) - - let element = ((Element)node) - if element.ownerDocument != nil: - node.ownerDocument.all_elements.add((Element)node) - element.ownerDocument.type_elements[element.tagType].add(element) - if element.id != "": - if not (element.id in element.ownerDocument.id_elements): - element.ownerDocument.id_elements[element.id] = newSeq[Element]() - element.ownerDocument.id_elements[element.id].add(element) - - for c in element.classList: - if not (c in element.ownerDocument.class_elements): - element.ownerDocument.class_elements[c] = newSeq[Element]() - element.ownerDocument.class_elements[c].add(element) - -proc processDocumentBody(state: var HTMLParseState) = - if not state.in_body: - state.in_body = true - if state.elementNode.ownerDocument != nil: - state.elementNode = state.elementNode.ownerDocument.body - -proc processDocumentAddNode(state: var HTMLParseState, newNode: Node) = - if state.elementNode.nodeType == ELEMENT_NODE and state.elementNode.tagType == TAG_HTML: - if state.in_body: - state.elementNode = state.elementNode.ownerDocument.body - else: - state.elementNode = state.elementNode.ownerDocument.head - - insertNode(state.elementNode, newNode) - -proc processDocumentEndNode(state: var HTMLParseState) = - if state.elementNode == nil or state.elementNode.nodeType == DOCUMENT_NODE: - return - state.elementNode = state.elementNode.parentElement - -proc processDocumentText(state: var HTMLParseState) = - if state.textNode != nil and state.textNode.data.len > 0: - processDocumentBody(state) - if state.textNode == nil: - state.textNode = newText() - - processDocumentAddNode(state, state.textNode) - -proc processDocumentStartElement(state: var HTMLParseState, element: Element, tag: DOMParsedTag) = - var add = true - - for k, v in tag.attrs: - element.attributes[k] = element.newAttr(k, v) - - element.id = element.getAttrValue("id") - if element.attributes.hasKey("class"): - for w in unicode.split(element.attributes["class"].value, Rune(' ')): - element.classList.add(w) - - case element.tagType - of TAG_SCRIPT: - state.in_script = true - of TAG_NOSCRIPT: - state.in_noscript = true - of TAG_STYLE: - state.in_style = true - of TAG_SELECT: - HTMLSelectElement(element).name = element.getAttrValue("name") - HTMLSelectElement(element).value = element.getAttrValue("value") - of TAG_INPUT: - HTMLInputElement(element).value = element.getAttrValue("value") - HTMLInputElement(element).itype = element.getAttrValue("type").inputType() - HTMLInputElement(element).size = element.getAttrValue("size").inputSize() - of TAG_A: - HTMLAnchorElement(element).href = element.getAttrValue("href") - of TAG_OPTION: - HTMLOptionElement(element).value = element.getAttrValue("href") - of TAG_HTML: - add = false - of TAG_HEAD: - add = false - of TAG_BODY: - add = false - processDocumentBody(state) - else: discard - - if state.elementNode.nodeType == ELEMENT_NODE: - case element.tagType - of SelfClosingTagTypes: - if state.elementNode.tagType == element.tagType: - processDocumentEndNode(state) - of TAG_H1: - HTMLHeadingElement(element).rank = 1 - of TAG_H2: - HTMLHeadingElement(element).rank = 2 - of TAG_H3: - HTMLHeadingElement(element).rank = 3 - of TAG_H4: - HTMLHeadingElement(element).rank = 4 - of TAG_H5: - HTMLHeadingElement(element).rank = 5 - of TAG_H6: - HTMLHeadingElement(element).rank = 6 - else: discard - - if state.elementNode.tagType == TAG_P and element.tagType in PClosingTagTypes: - processDocumentEndNode(state) - - if add: - processDocumentAddNode(state, element) - state.elementNode = element - - if element.tagType in VoidTagTypes: - processDocumentEndNode(state) - -proc processDocumentEndElement(state: var HTMLParseState, tag: DOMParsedTag) = - if tag.tagid in VoidTagTypes: - return - if tag.tagid == TAG_HEAD: - state.in_body = true - return - if tag.tagid == TAG_BODY: - return - if state.elementNode.nodeType == ELEMENT_NODE and tag.tagid != state.elementNode.tagType: - if state.elementNode.tagType in SelfClosingTagTypes: - processDocumentEndNode(state) - - processDocumentEndNode(state) - -proc processDocumentTag(state: var HTMLParseState, tag: DOMParsedTag) = - if state.in_script: - if tag.tagid == TAG_SCRIPT: - state.in_script = false - else: - return - - if state.in_style: - if tag.tagid == TAG_STYLE: - state.in_style = false - else: - return - - if state.in_noscript: - if tag.tagid == TAG_NOSCRIPT: - state.in_noscript = false - else: - return - - if tag.open: - processDocumentStartElement(state, newHtmlElement(tag.tagid), tag) - else: - processDocumentEndElement(state, tag) - -proc processDocumentPart(state: var HTMLParseState, buf: string) = - var at = 0 - var max = 0 - var was_script = false - - max = buf.len - - while at < max: - case buf[at] - of '&': - inc at - let p = getescapecmd(buf, at) - if state.in_comment: - state.commentNode.data &= p - else: - processDocumentText(state) - state.textNode.data &= p - of '<': - if state.in_comment: - state.commentNode.data &= buf[at] - inc at - else: - var p = at - inc p - if p < max and buf[p] == '!': - inc p - if p < max and buf[p] == '-': - inc p - if p < max and buf[p] == '-': - inc p - at = p - state.in_comment = true - let comment = newComment() - state.commentNode = comment - processDocumentAddNode(state, comment) - if state.textNode != nil: - state.textNode.rawtext = state.textNode.getRawText() - state.textNode = nil - else: - #TODO for doctype - while p < max and buf[p] != '>': - inc p - at = p + 1 - continue - - if not state.in_comment: - if state.textNode != nil: - state.textNode.rawtext = state.textNode.getRawText() - state.textNode = nil - p = at - var tag = parse_tag(buf, at) - was_script = state.in_script - - processDocumentTag(state, tag) -# if (was_script) { -# if (state->in_script) { -# ptr = p; -# processDocumentText(&state->parentNode, &state->textNode); -# Strcat_char(((CharacterData *)state->textNode)->data, *ptr++); -# } else if (buffer->javascript_enabled) { -# loadJSToBuffer(buffer, childTextContentNode(state->parentNode->lastChild)->ptr, "", state->document); -# } -# } - elif buf[at] == '-' and state.in_comment: - var p = at - inc p - if p < max and buf[p] == '-': - inc p - if p < max and buf[p] == '>': - inc p - at = p - state.commentNode = nil - state.in_comment = false - - if state.in_comment: - state.commentNode.data &= buf[at] - inc at - else: - var r: Rune - fastRuneAt(buf, at, r) - if state.in_comment: - state.commentNode.data &= $r - else: - processDocumentText(state) - state.textNode.data &= $r - -proc parseHtml*(inputStream: Stream): Document = - let document = newDocument() - insertNode(document, document.root) - insertNode(document.root, document.head) - insertNode(document.root, document.body) - - var state = HTMLParseState() - state.elementNode = document.root - - var till_when = false - - var buf = "" - var lineBuf: string - while not inputStream.atEnd(): - lineBuf = inputStream.readLine() - buf &= lineBuf - - var at = 0 - while at < lineBuf.len: - case lineBuf[at] - of '<': - till_when = true - of '>': - till_when = false - else: discard - at += lineBuf.runeLenAt(at) - - if till_when: - continue - - processDocumentPart(state, buf) - buf = "" - - inputStream.close() - return document diff --git a/src/html/parser.nim b/src/html/parser.nim new file mode 100644 index 00000000..44b31d4a --- /dev/null +++ b/src/html/parser.nim @@ -0,0 +1,457 @@ +import streams +import unicode +import strutils +import tables +import json + +import types/enums +import types/tagtypes +import utils/twtstr +import utils/radixtree +import html/dom +import html/entity + +type + HTMLParseState = object + closed: bool + parents: seq[Node] + parsedNode: Node + a: string + b: string + attrs: seq[string] + in_comment: bool + in_script: bool + in_style: bool + in_noscript: bool + in_body: bool + elementNode: Element + textNode: Text + commentNode: Comment + +func inputSize*(str: string): int = + if str.len == 0: + return 20 + for c in str: + if not c.isDigit: + return 20 + return str.parseInt() + +#w3m's getescapecmd and parse_tag, transpiled to nim and heavily modified. +#(C) Copyright 1994-2002 by Akinori Ito +#(C) Copyright 2002-2011 by Akinori Ito, Hironori Sakamoto, Fumitoshi Ukai +# +#Use, modification and redistribution of this software is hereby granted, +#provided that this entire copyright notice is included on any copies of +#this software and applications and derivations thereof. +# +#This software is provided on an "as is" basis, without warranty of any +#kind, either expressed or implied, as to any matter including, but not +#limited to warranty of fitness of purpose, or merchantability, or +#results obtained from use of this software. +proc getescapecmd(buf: string, at: var int): string = + var i = at + + if buf[i] == '#': #num + inc i + var num: int + if buf[i].tolower() == 'x': #hex + inc i + if not isdigit(buf[i]): + at = i + return "" + + num = hexValue(buf[i]) + inc i + while i < buf.len and hexValue(buf[i]) != -1: + num *= 0x10 + num += hexValue(buf[i]) + inc i + else: #dec + if not isDigit(buf[i]): + at = i + return "" + + num = decValue(buf[i]) + inc i + while i < buf.len and isDigit(buf[i]): + num *= 10 + num += decValue(buf[i]) + inc i + + if buf[i] == ';': + inc i + at = i + return $(Rune(num)) + elif not isAlphaAscii(buf[i]): + return "" + + var n = entityMap + var s = "" + while true: + s &= buf[i] + if not entityMap.hasPrefix(s, n): + break + let pn = n + n = n{s} + if n != pn: + s = "" + inc i + + if n.leaf: + at = i + return n.value + + return "" + +type + DOMParsedTag = object + tagid: TagType + attrs: Table[string, string] + open: bool + +proc parse_tag(buf: string, at: var int): DOMParsedTag = + var tag = DOMParsedTag() + tag.open = true + + #Parse tag name + var tagname = "" + inc at + if buf[at] == '/': + inc at + tag.open = false + at = skipBlanks(buf, at) + + while at < buf.len and not buf[at].isWhitespace() and not (tag.open and buf[at] == '/') and buf[at] != '>': + tagname &= buf[at].tolower() + at += buf.runeLenAt(at) + + tag.tagid = tagType(tagname) + at = skipBlanks(buf, at) + + while at < buf.len and buf[at] != '>': + var value = "" + var attrname = "" + while at < buf.len and buf[at] != '=' and not buf[at].isWhitespace() and buf[at] != '>': + attrname &= buf[at].tolower() + at += buf.runeLenAt(at) + + at = skipBlanks(buf, at) + if buf[at] == '=': + inc at + at = skipBlanks(buf, at) + if at < buf.len and (buf[at] == '"' or buf[at] == '\''): + let startc = buf[at] + inc at + while at < buf.len and buf[at] != startc: + if buf[at + 1] == '&': + inc at + value &= getescapecmd(buf, at) + else: + var r: Rune + fastRuneAt(buf, at, r) + value &= r + if at < buf.len: + inc at + elif at < buf.len: + while at < buf.len and not buf[at].isWhitespace() and buf[at] != '>': + value &= buf[at] + at += buf.runeLenAt(at) + + if attrname.len > 0: + tag.attrs[attrname] = value + + while at < buf.len and buf[at] != '>': + at += buf.runeLenAt(at) + + if at < buf.len and buf[at] == '>': + inc at + return tag + +proc insertNode(parent: Node, node: Node) = + parent.childNodes.add(node) + + if parent.childNodes.len > 1: + let prevSibling = parent.childNodes[^1] + prevSibling.nextSibling = node + node.previousSibling = prevSibling + + node.parentNode = parent + if parent.nodeType == ELEMENT_NODE: + node.parentElement = (Element)parent + + if parent.ownerDocument != nil: + node.ownerDocument = parent.ownerDocument + elif parent.nodeType == DOCUMENT_NODE: + node.ownerDocument = (Document)parent + + if node.nodeType == ELEMENT_NODE: + parent.children.add((Element)node) + + let element = ((Element)node) + if element.ownerDocument != nil: + node.ownerDocument.all_elements.add((Element)node) + element.ownerDocument.type_elements[element.tagType].add(element) + if element.id != "": + if not (element.id in element.ownerDocument.id_elements): + element.ownerDocument.id_elements[element.id] = newSeq[Element]() + element.ownerDocument.id_elements[element.id].add(element) + + for c in element.classList: + if not (c in element.ownerDocument.class_elements): + element.ownerDocument.class_elements[c] = newSeq[Element]() + element.ownerDocument.class_elements[c].add(element) + +proc processDocumentBody(state: var HTMLParseState) = + if not state.in_body: + state.in_body = true + if state.elementNode.ownerDocument != nil: + state.elementNode = state.elementNode.ownerDocument.body + +proc processDocumentAddNode(state: var HTMLParseState, newNode: Node) = + if state.elementNode.nodeType == ELEMENT_NODE and state.elementNode.tagType == TAG_HTML: + if state.in_body: + state.elementNode = state.elementNode.ownerDocument.body + else: + state.elementNode = state.elementNode.ownerDocument.head + + insertNode(state.elementNode, newNode) + +proc processDocumentEndNode(state: var HTMLParseState) = + if state.elementNode == nil or state.elementNode.nodeType == DOCUMENT_NODE: + return + state.elementNode = state.elementNode.parentElement + +proc processDocumentText(state: var HTMLParseState) = + if state.textNode != nil and state.textNode.data.len > 0: + processDocumentBody(state) + if state.textNode == nil: + state.textNode = newText() + + processDocumentAddNode(state, state.textNode) + +proc processDocumentStartElement(state: var HTMLParseState, element: Element, tag: DOMParsedTag) = + var add = true + + for k, v in tag.attrs: + element.attributes[k] = element.newAttr(k, v) + + element.id = element.getAttrValue("id") + if element.attributes.hasKey("class"): + for w in unicode.split(element.attributes["class"].value, Rune(' ')): + element.classList.add(w) + + case element.tagType + of TAG_SCRIPT: + state.in_script = true + of TAG_NOSCRIPT: + state.in_noscript = true + of TAG_STYLE: + state.in_style = true + of TAG_SELECT: + HTMLSelectElement(element).name = element.getAttrValue("name") + HTMLSelectElement(element).value = element.getAttrValue("value") + of TAG_INPUT: + HTMLInputElement(element).value = element.getAttrValue("value") + HTMLInputElement(element).itype = element.getAttrValue("type").inputType() + HTMLInputElement(element).size = element.getAttrValue("size").inputSize() + of TAG_A: + HTMLAnchorElement(element).href = element.getAttrValue("href") + of TAG_OPTION: + HTMLOptionElement(element).value = element.getAttrValue("href") + of TAG_HTML: + add = false + of TAG_HEAD: + add = false + of TAG_BODY: + add = false + processDocumentBody(state) + else: discard + + if state.elementNode.nodeType == ELEMENT_NODE: + case element.tagType + of SelfClosingTagTypes: + if state.elementNode.tagType == element.tagType: + processDocumentEndNode(state) + of TAG_H1: + HTMLHeadingElement(element).rank = 1 + of TAG_H2: + HTMLHeadingElement(element).rank = 2 + of TAG_H3: + HTMLHeadingElement(element).rank = 3 + of TAG_H4: + HTMLHeadingElement(element).rank = 4 + of TAG_H5: + HTMLHeadingElement(element).rank = 5 + of TAG_H6: + HTMLHeadingElement(element).rank = 6 + else: discard + + if state.elementNode.tagType == TAG_P and element.tagType in PClosingTagTypes: + processDocumentEndNode(state) + + if add: + processDocumentAddNode(state, element) + state.elementNode = element + + if element.tagType in VoidTagTypes: + processDocumentEndNode(state) + +proc processDocumentEndElement(state: var HTMLParseState, tag: DOMParsedTag) = + if tag.tagid in VoidTagTypes: + return + if tag.tagid == TAG_HEAD: + state.in_body = true + return + if tag.tagid == TAG_BODY: + return + if state.elementNode.nodeType == ELEMENT_NODE and tag.tagid != state.elementNode.tagType: + if state.elementNode.tagType in SelfClosingTagTypes: + processDocumentEndNode(state) + + processDocumentEndNode(state) + +proc processDocumentTag(state: var HTMLParseState, tag: DOMParsedTag) = + if state.in_script: + if tag.tagid == TAG_SCRIPT: + state.in_script = false + else: + return + + if state.in_style: + if tag.tagid == TAG_STYLE: + state.in_style = false + else: + return + + if state.in_noscript: + if tag.tagid == TAG_NOSCRIPT: + state.in_noscript = false + else: + return + + if tag.open: + processDocumentStartElement(state, newHtmlElement(tag.tagid), tag) + else: + processDocumentEndElement(state, tag) + +proc processDocumentPart(state: var HTMLParseState, buf: string) = + var at = 0 + var max = 0 + var was_script = false + + max = buf.len + + while at < max: + case buf[at] + of '&': + inc at + let p = getescapecmd(buf, at) + if state.in_comment: + state.commentNode.data &= p + else: + processDocumentText(state) + state.textNode.data &= p + of '<': + if state.in_comment: + state.commentNode.data &= buf[at] + inc at + else: + var p = at + inc p + if p < max and buf[p] == '!': + inc p + if p < max and buf[p] == '-': + inc p + if p < max and buf[p] == '-': + inc p + at = p + state.in_comment = true + let comment = newComment() + state.commentNode = comment + processDocumentAddNode(state, comment) + if state.textNode != nil: + state.textNode.rawtext = state.textNode.getRawText() + state.textNode = nil + else: + #TODO for doctype + while p < max and buf[p] != '>': + inc p + at = p + 1 + continue + + if not state.in_comment: + if state.textNode != nil: + state.textNode.rawtext = state.textNode.getRawText() + state.textNode = nil + p = at + var tag = parse_tag(buf, at) + was_script = state.in_script + + processDocumentTag(state, tag) +# if (was_script) { +# if (state->in_script) { +# ptr = p; +# processDocumentText(&state->parentNode, &state->textNode); +# Strcat_char(((CharacterData *)state->textNode)->data, *ptr++); +# } else if (buffer->javascript_enabled) { +# loadJSToBuffer(buffer, childTextContentNode(state->parentNode->lastChild)->ptr, "", state->document); +# } +# } + elif buf[at] == '-' and state.in_comment: + var p = at + inc p + if p < max and buf[p] == '-': + inc p + if p < max and buf[p] == '>': + inc p + at = p + state.commentNode = nil + state.in_comment = false + + if state.in_comment: + state.commentNode.data &= buf[at] + inc at + else: + var r: Rune + fastRuneAt(buf, at, r) + if state.in_comment: + state.commentNode.data &= $r + else: + processDocumentText(state) + state.textNode.data &= $r + +proc parseHtml*(inputStream: Stream): Document = + let document = newDocument() + insertNode(document, document.root) + insertNode(document.root, document.head) + insertNode(document.root, document.body) + + var state = HTMLParseState() + state.elementNode = document.root + + var till_when = false + + var buf = "" + var lineBuf: string + while not inputStream.atEnd(): + lineBuf = inputStream.readLine() + buf &= lineBuf + + var at = 0 + while at < lineBuf.len: + case lineBuf[at] + of '<': + till_when = true + of '>': + till_when = false + else: discard + at += lineBuf.runeLenAt(at) + + if till_when: + continue + + processDocumentPart(state, buf) + buf = "" + + inputStream.close() + return document diff --git a/src/io/buffer.nim b/src/io/buffer.nim index 20d7c497..f3fbbd38 100644 --- a/src/io/buffer.nim +++ b/src/io/buffer.nim @@ -5,20 +5,14 @@ import tables import strutils import unicode -import ../types/color -import ../types/enums - -import ../utils/twtstr -import ../utils/eprint - -import ../html/dom - -import ../css/box - -import ../config - -import ./term -import ./lineedit +import types/color +import types/enums +import utils/twtstr +import html/dom +import css/box +import config/config +import io/term +import io/lineedit type Cell = object of RootObj @@ -774,6 +768,7 @@ proc inputLoop(attrs: TermAttributes, buffer: Buffer): bool = of ACTION_CHANGE_LOCATION: var url = $buffer.location + termGoto(0, buffer.height) let status = readLine("URL: ", url, buffer.width) if status: buffer.setLocation(parseUri(url)) diff --git a/src/io/lineedit.nim b/src/io/lineedit.nim index cff7383e..e1a552bd 100644 --- a/src/io/lineedit.nim +++ b/src/io/lineedit.nim @@ -4,13 +4,9 @@ import strutils import sequtils import sugar -import ../utils/twtstr -import ../utils/radixtree -import ../utils/eprint - -import ../config - -import ./terminal +import utils/twtstr +import utils/radixtree +import config/config type LineState = object news: seq[Rune] diff --git a/src/main.nim b/src/main.nim index fe7eeb99..6f940b50 100644 --- a/src/main.nim +++ b/src/main.nim @@ -3,15 +3,11 @@ import uri import os import streams -import utils/eprint - -import html/htmlparser +import html/parser import html/dom - import io/buffer import io/term - -import config +import config/config let clientInstance = newHttpClient() proc loadRemotePage*(url: string): string = diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim index 8acc2343..707b09a9 100644 --- a/src/utils/twtstr.nim +++ b/src/utils/twtstr.nim @@ -483,7 +483,7 @@ func nohandakuten*(r: Rune): Rune = return cast[Rune](cast[int](r) - 2) # Halfwidth to fullwidth & vice versa -const widthconv = staticRead"../../res/widthconv.json" +const widthconv = staticRead"res/widthconv.json" proc genHalfWidthTable(): Table[Rune, Rune] = let widthconvjson = parseJson(widthconv) for k, v in widthconvjson: -- cgit 1.4.1-2-gfad0