From 34b023515599bc746c10c597467ecb07f53c49fe Mon Sep 17 00:00:00 2001 From: bptato Date: Fri, 30 Jul 2021 20:23:34 +0200 Subject: CSS selectors and re-organization --- src/buffer.nim | 17 +- src/config.nim | 42 +-- src/css/cssparser.nim | 813 ++++++++++++++++++++++++++++++++++++++++++++++ src/css/selector.nim | 151 +++++++++ src/css/style.nim | 321 +++++++++++++++++++ src/cssparser.nim | 833 ------------------------------------------------ src/display.nim | 395 ----------------------- src/dom.nim | 358 --------------------- src/entity.nim | 29 -- src/enums.nim | 96 ------ src/html/dom.nim | 492 ++++++++++++++++++++++++++++ src/html/entity.nim | 28 ++ src/html/htmlparser.nim | 540 +++++++++++++++++++++++++++++++ src/htmlparser.nim | 471 --------------------------- src/io/display.nim | 399 +++++++++++++++++++++++ src/io/twtio.nim | 323 +++++++++++++++++++ src/main.nim | 28 +- src/radixtree.nim | 440 ------------------------- src/style.nim | 74 ----- src/termattrs.nim | 11 - src/twtio.nim | 263 --------------- src/twtstr.nim | 469 --------------------------- src/types/enums.nim | 101 ++++++ src/types/tagtypes.nim | 30 ++ src/utils/radixtree.nim | 309 ++++++++++++++++++ src/utils/termattrs.nim | 11 + src/utils/twtstr.nim | 455 ++++++++++++++++++++++++++ 27 files changed, 4020 insertions(+), 3479 deletions(-) create mode 100644 src/css/cssparser.nim create mode 100644 src/css/selector.nim create mode 100644 src/css/style.nim delete mode 100644 src/cssparser.nim delete mode 100644 src/display.nim delete mode 100644 src/dom.nim delete mode 100644 src/entity.nim delete mode 100644 src/enums.nim create mode 100644 src/html/dom.nim create mode 100644 src/html/entity.nim create mode 100644 src/html/htmlparser.nim delete mode 100644 src/htmlparser.nim create mode 100644 src/io/display.nim create mode 100644 src/io/twtio.nim delete mode 100644 src/radixtree.nim delete mode 100644 src/style.nim delete mode 100644 src/termattrs.nim delete mode 100644 src/twtio.nim delete mode 100644 src/twtstr.nim create mode 100644 src/types/enums.nim create mode 100644 src/types/tagtypes.nim create mode 100644 src/utils/radixtree.nim create mode 100644 src/utils/termattrs.nim create mode 100644 src/utils/twtstr.nim (limited to 'src') diff --git a/src/buffer.nim b/src/buffer.nim index 5eaa4717..dc171137 100644 --- a/src/buffer.nim +++ b/src/buffer.nim @@ -4,11 +4,14 @@ import tables import strutils import unicode -import termattrs -import dom -import twtio -import enums -import twtstr +import types/enums + +import utils/termattrs +import utils/twtstr + +import html/dom + +import io/twtio type Buffer* = ref BufferObj @@ -115,7 +118,7 @@ proc addNode*(buffer: Buffer, node: Node) = buffer.clickables.add(node) else: discard elif node.isTextNode(): - if node.parentElement != nil and node.parentElement.style.islink: + if node.parentElement != nil and node.getStyle().islink: let anchor = node.ancestor(TAG_A) assert(anchor != nil) buffer.clickables.add(anchor) @@ -407,7 +410,6 @@ proc checkLinkSelection*(buffer: Buffer): bool = return false else: let anchor = buffer.selectedlink.ancestor(TAG_A) - anchor.style.selected = false buffer.selectedlink.fmttext = buffer.selectedlink.getFmtText() buffer.selectedlink = nil buffer.hovertext = "" @@ -423,7 +425,6 @@ proc checkLinkSelection*(buffer: Buffer): bool = buffer.selectedlink = node let anchor = node.ancestor(TAG_A) assert(anchor != nil) - anchor.style.selected = true buffer.hovertext = HtmlAnchorElement(anchor).href var stack: seq[Node] stack.add(anchor) diff --git a/src/config.nim b/src/config.nim index d4713123..73dff47d 100644 --- a/src/config.nim +++ b/src/config.nim @@ -1,8 +1,8 @@ import tables import strutils -import twtstr -import radixtree +import utils/twtstr +import utils/radixtree type TwtAction* = @@ -43,21 +43,30 @@ func getRealKey(key: string): string = var realk: string var currchar: char var control = 0 + var meta = 0 var skip = false for c in key: if c == '\\': skip = true elif skip: - if c == 'e': - realk &= '\e' - else: - realk &= c + realk &= c skip = false + elif c == 'M': + inc meta + currchar = c elif c == 'C': inc control currchar = c elif c == '-' and control == 1: inc control + elif c == '-' and meta == 1: + inc meta + elif meta == 1: + realk &= 'C' & c + meta = 0 + elif meta == 2: + realk &= '\e' + realk &= c elif control == 1: realk &= 'C' & c control = 0 @@ -115,18 +124,11 @@ proc staticReadKeymap(): (ActionMap, ActionMap, Table[string, string]) = lemap = constructActionTable(lemap) return (nmap, lemap, compose) -const (normalActionMap, linedActionMap, composeMap) = staticReadKeymap() - -normalActionRemap = normalActionMap -linedActionRemap = linedActionMap -composeRemap = composeMap.toRadixTree() -proc traverseRemap[T](m: RadixNode[T], s: string) = - echo s - for k in m.keys: - assert(m{k, m} != m, s & " " & k) - m{k, m}.traverseRemap(s & k) - -composeRemap.traverseRemap("") +when not defined(small): + const (normalActionMap, linedActionMap, composeMap) = staticReadKeymap() + normalActionRemap = normalActionMap + linedActionRemap = linedActionMap + composeRemap = composeMap.toRadixTree() proc readConfig*(filename: string): bool = var f: File @@ -139,8 +141,8 @@ proc readConfig*(filename: string): bool = while f.readLine(line): parseConfigLine(line, nmap, lemap, compose) - normalActionRemap = constructActionTable(normalActionMap) - linedActionRemap = constructActionTable(linedActionMap) + normalActionRemap = constructActionTable(nmap) + linedActionRemap = constructActionTable(lemap) composeRemap = compose.toRadixTree() return true else: diff --git a/src/css/cssparser.nim b/src/css/cssparser.nim new file mode 100644 index 00000000..5ecb470a --- /dev/null +++ b/src/css/cssparser.nim @@ -0,0 +1,813 @@ +# CSS tokenizer and parser. The tokenizer is a mess, and may or may not work +# correctly. The parser should work, though the outputted object model is +# questionable at best. + +import unicode +import streams +import math +import options + +import ../io/twtio + +import ../utils/twtstr + +import ../types/enums + +type + CSSTokenizerState = object + at: int + stream: Stream + buf: seq[Rune] + + CSSParseState = object + tokens: seq[CSSParsedItem] + at: int + top_level: bool + + tflaga = enum + TFLAGA_UNRESTRICTED, TFLAGA_ID + tflagb = enum + TFLAGB_INTEGER, TFLAGB_NUMBER + + CSSParsedItem* = ref object of RootObj + CSSComponentValue* = ref object of CSSParsedItem + + CSSToken* = ref object of CSSComponentValue + case tokenType*: CSSTokenType + of CSS_IDENT_TOKEN, CSS_FUNCTION_TOKEN, CSS_AT_KEYWORD_TOKEN, + CSS_HASH_TOKEN, CSS_STRING_TOKEN, CSS_URL_TOKEN: + value*: seq[Rune] + tflaga*: tflaga + of CSS_DELIM_TOKEN: + rvalue*: Rune + of CSS_NUMBER_TOKEN, CSS_PERCENTAGE_TOKEN, CSS_DIMENSION_TOKEN: + nvalue*: float64 + tflagb*: tflagb + unit*: seq[Rune] + else: discard + + CSSRule* = ref object of CSSParsedItem + prelude*: seq[CSSComponentValue] + oblock*: CSSSimpleBlock + + CSSAtRule* = ref object of CSSRule + name*: seq[Rune] + + CSSQualifiedRule* = ref object of CSSRule + + CSSDeclaration* = ref object of CSSComponentValue + name*: seq[Rune] + value*: seq[CSSComponentValue] + important*: bool + + CSSFunction* = ref object of CSSComponentValue + name*: seq[Rune] + value*: seq[CSSComponentValue] + + CSSSimpleBlock* = ref object of CSSComponentValue + token*: CSSToken + value*: seq[CSSComponentValue] + + CSSStylesheet* = object + value*: seq[CSSRule] + + SyntaxError = object of ValueError + +func `==`*(a: CSSParsedItem, b: CSSTokenType): bool = + return a of CSSToken and CSSToken(a).tokenType == b + +func toNumber(s: seq[Rune]): float64 = + var sign = 1 + var t = 1 + var d = 0 + var integer: float64 = 0 + var f: float64 = 0 + var e: float64 = 0 + + var i = 0 + if i < s.len and s[i] == Rune('-'): + sign = -1 + inc i + elif i < s.len and s[i] == Rune('+'): + inc i + + while i < s.len and isDigitAscii(s[i]): + integer *= 10 + integer += float64(decValue(s[i])) + inc i + + if i < s.len and s[i] == Rune('.'): + inc i + while i < s.len and isDigitAscii(s[i]): + f *= 10 + f += float64(decValue(s[i])) + inc i + inc d + + if i < s.len and (s[i] == Rune('e') or s[i] == Rune('E')): + inc i + if i < s.len and s[i] == Rune('-'): + t = -1 + inc i + elif i < s.len and s[i] == Rune('+'): + inc i + + while i < s.len and isDigitAscii(s[i]): + e *= 10 + e += float64(decValue(s[i])) + inc i + + return float64(sign) * (integer + f * pow(10, float64(-d))) * pow(10, (float64(t) * e)) + +func isNameStartCodePoint*(r: Rune): bool = + return not isAscii(r) or r == Rune('_') or isAlphaAscii(r) + +func isNameCodePoint*(r: Rune): bool = + return isNameStartCodePoint(r) or isDigitAscii(r) or r == Rune('-') + +proc consume(state: var CSSTokenizerState): Rune = + result = state.buf[state.at] + inc state.at + +proc reconsume(state: var CSSTokenizerState) = + dec state.at + +func peek(state: CSSTokenizerState, i: int): Rune = + return state.buf[state.at + i] + +proc has(state: var CSSTokenizerState, i: int): bool = + if state.at + i >= state.buf.len and not state.stream.atEnd(): + state.buf &= state.stream.readLine().toRunes() & Rune('\n') + return state.at + i < state.buf.len + +func curr(state: CSSTokenizerState): Rune = + return state.buf[state.at] + +proc isValidEscape*(state: var CSSTokenizerState): bool = + return state.has(1) and state.curr() == Rune('\\') and state.peek(1) != Rune('\n') + +proc has(state: var CSSTokenizerState): bool = + if state.at >= state.buf.len and not state.stream.atEnd(): + state.buf &= state.stream.readLine().toRunes() & Rune('\n') + return state.at < state.buf.len + +proc startsWithIdentifier*(state: var CSSTokenizerState): bool = + if not state.has(): + return false + + if isNameStartCodePoint(state.curr()): + return true + if state.curr() == Rune('-'): + if state.has(1) and state.peek(1).isNameStartCodePoint(): + return true + if state.isValidEscape(): + return true + return false + elif state.curr() == Rune('\\'): + return state.isValidEscape() + + return false + +proc startsWithNumber*(state: var CSSTokenizerState): bool = + if state.has(): + case state.curr() + of Rune('+'), Rune('-'): + if state.has(1): + if isDigitAscii(state.peek(1)): + return true + elif state.peek(1) == Rune('.'): + if state.has(2) and isDigitAscii(state.peek(2)): + return true + of Rune('.'): + if isDigitAscii(state.peek(1)): + return true + elif isDigitAscii(state.curr()): + return true + else: + return false + return false + +proc consumeEscape(state: var CSSTokenizerState): Rune = + let r = state.consume() + var num = hexValue(r) + if num != -1: + var i = 0 + while state.has() and i <= 5: + let r = state.consume() + if hexValue(r) == -1: + state.reconsume() + break + num *= 0x10 + num += hexValue(r) + inc i + if num == 0 or num > 0x10FFFF or num in {0xD800..0xDFFF}: + return Rune(0xFFFD) + else: + return Rune(num) + else: + return r + +proc consumeString(state: var CSSTokenizerState): CSSToken = + var s: seq[Rune] + state.reconsume() + let ending = state.consume() + + while state.has(): + let r = state.consume() + case r + of Rune('\n'): + return CSSToken(tokenType: CSS_BAD_STRING_TOKEN) + of Rune('\\'): + s &= consumeEscape(state) + elif r == ending: + break + else: + s &= r + return CSSToken(tokenType: CSS_STRING_TOKEN, value: s) + +proc consumeName(state: var CSSTokenizerState): seq[Rune] = + while state.has(): + let r = state.consume() + if state.isValidEscape(): + result &= state.consumeEscape() + elif isNameCodePoint(r): + result &= r + else: + state.reconsume() + return result + +proc consumeNumberSign(state: var CSSTokenizerState): CSSToken = + if state.has(): + let r = state.consume() + if isNameCodePoint(r) or state.isValidEscape(): + result = CSSToken(tokenType: CSS_HASH_TOKEN) + if state.startsWithIdentifier(): + result.tflaga = TFLAGA_ID + + state.reconsume() + result.value = consumeName(state) + else: + let r = state.consume() + result = CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) + +proc consumeNumber(state: var CSSTokenizerState): tuple[t: tflagb, val: float64] = + var t = TFLAGB_INTEGER + var repr: seq[Rune] + if state.has(): + if state.curr() == Rune('+') or state.curr() == Rune('-'): + repr &= state.consume() + + while state.has() and isDigitAscii(state.curr()): + repr &= state.consume() + + if state.has(1): + if state.curr() == Rune('.') and isDigitAscii(state.peek(1)): + repr &= state.consume() + repr &= state.consume() + t = TFLAGB_NUMBER + while state.has() and isDigitAscii(state.curr()): + repr &= state.consume() + + if state.has(1): + if state.curr() == Rune('E') or state.curr() == Rune('e'): + var j = 2 + if state.peek(1) == Rune('-') or state.peek(1) == Rune('+'): + inc j + if state.has(j) and isDigitAscii(state.peek(j)): + while j > 0: + repr &= state.consume() + dec j + + while state.has() and isDigitAscii(state.curr()): + repr &= state.consume() + + let val = toNumber(repr) + return (t, val) + +proc consumeNumericToken(state: var CSSTokenizerState): CSSToken = + let num = state.consumeNumber() + if state.startsWithIdentifier(): + result = CSSToken(tokenType: CSS_DIMENSION_TOKEN, nvalue: num.val, tflagb: num.t) + result.unit = state.consumeName() + elif state.has() and state.curr() == Rune('%'): + discard state.consume() + result = CSSToken(tokenType: CSS_PERCENTAGE_TOKEN, nvalue: num.val) + else: + result = CSSToken(tokenType: CSS_NUMBER_TOKEN, nvalue: num.val, tflagb: num.t) + +proc consumeBadURL(state: var CSSTokenizerState) = + while state.has(1): + let r = state.consume() + case r + of Rune(')'): + return + elif state.isValidEscape(): + discard state.consumeEscape() + else: discard + +proc consumeURL(state: var CSSTokenizerState): CSSToken = + result = CSSToken(tokenType: CSS_URL_TOKEN) + while state.has(1) and state.peek(1).isWhitespace(): + discard state.consume() + + while state.has(1): + let r = state.consume() + case r + of Rune(')'): + return result + of Rune('"'), Rune('\''), Rune('('): + state.consumeBadURL() + return CSSToken(tokenType: CSS_BAD_URL_TOKEN) + of Rune('\\'): + state.reconsume() + if state.isValidEscape(): + result.value &= state.consumeEscape() + else: + state.consumeBadURL() + return CSSToken(tokenType: CSS_BAD_URL_TOKEN) + elif r.isWhitespace(): + while state.has(1) and state.peek(1).isWhitespace(): + discard state.consume() + else: + result.value &= r + +proc consumeIdentLikeToken(state: var CSSTokenizerState): CSSToken = + let s = state.consumeName() + if s.toAsciiLower() == "url" and state.has() and state.curr() == Rune('('): + discard state.consume() + while state.has(1) and state.curr().isWhitespace() and state.peek(1).isWhitespace(): + discard state.consume() + if state.curr() == Rune('\'') or state.curr() == Rune('"') or state.curr().isWhitespace(): + return CSSToken(tokenType: CSS_FUNCTION_TOKEN, value: s) + else: + return state.consumeURL() + elif state.has() and state.curr() == Rune('('): + discard state.consume() + return CSSToken(tokenType: CSS_FUNCTION_TOKEN, value: s) + + return CSSToken(tokenType: CSS_IDENT_TOKEN, value: s) + +proc consumeComments(state: var CSSTokenizerState) = + if state.has(2) and state.peek(1) == Rune('/') and state.peek(2) == Rune('*'): + discard state.consume() + discard state.consume() + while state.has(2) and not (state.peek(1) == Rune('*') and state.peek(2) == Rune('/')): + discard state.consume() + + if state.has(2): + discard state.consume() + if state.has(1): + discard state.consume() + +proc consumeToken(state: var CSSTokenizerState): CSSToken = + state.consumeComments() + let r = state.consume() + case r + of Rune('\n'), Rune('\t'), Rune(' '), Rune('\f'), Rune('\r'): + while state.has() and state.curr().isWhitespace(): + discard state.consume() + return CSSToken(tokenType: CSS_WHITESPACE_TOKEN) + of Rune('"'), Rune('\''): + return consumeString(state) + of Rune('#'): + return consumeNumberSign(state) + of Rune('('): + return CSSToken(tokenType: CSS_LPAREN_TOKEN) + of Rune(')'): + return CSSToken(tokenType: CSS_RPAREN_TOKEN) + of Rune('['): + return CSSToken(tokenType: CSS_LBRACKET_TOKEN) + of Rune(']'): + return CSSToken(tokenType: CSS_RBRACKET_TOKEN) + of Rune('{'): + return CSSToken(tokenType: CSS_LBRACE_TOKEN) + of Rune('}'): + return CSSToken(tokenType: CSS_RBRACE_TOKEN) + of Rune(','): + return CSSToken(tokenType: CSS_COMMA_TOKEN) + of Rune(':'): + return CSSToken(tokenType: CSS_COLON_TOKEN) + of Rune(';'): + return CSSToken(tokenType: CSS_SEMICOLON_TOKEN) + of Rune('+'): + if state.startsWithNumber(): + state.reconsume() + return state.consumeNumericToken() + else: + return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) + of Rune('-'): + if state.startsWithNumber(): + state.reconsume() + return state.consumeNumericToken() + else: + if state.has(2) and state.peek(1) == Rune('-') and state.peek(2) == Rune('>'): + discard state.consume() + discard state.consume() + return CSSToken(tokenType: CSS_CDC_TOKEN) + elif state.startsWithIdentifier(): + state.reconsume() + result = state.consumeIdentLikeToken() + eprint result.value + else: + return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) + of Rune('.'): + if state.startsWithNumber(): + state.reconsume() + return state.consumeNumericToken() + else: + return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) + of Rune('<'): + if state.has(3) and state.peek(1) == Rune('!') and state.peek(2) == Rune('-') and state.peek(3) == Rune('-'): + discard state.consume() + discard state.consume() + discard state.consume() + return CSSToken(tokenType: CSS_CDO_TOKEN) + else: + return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) + of Rune('@'): + if state.startsWithIdentifier(): + let name = state.consumeName() + return CSSToken(tokenType: CSS_AT_KEYWORD_TOKEN, value: name) + else: + return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) + elif isDigitAscii(r): + state.reconsume() + return state.consumeNumericToken() + elif isNameStartCodePoint(r): + state.reconsume() + return state.consumeIdentLikeToken() + else: + return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) + +proc tokenizeCSS*(inputStream: Stream): seq[CSSParsedItem] = + var state: CSSTokenizerState + state.stream = inputStream + state.buf = state.stream.readLine().toRunes() + while state.has(): + result.add(state.consumeToken()) + + inputStream.close() + +proc consume(state: var CSSParseState): CSSParsedItem = + result = state.tokens[state.at] + inc state.at + +proc reconsume(state: var CSSParseState) = + dec state.at + +func has(state: CSSParseState, i: int): bool = + return state.at + i < state.tokens.len + +func curr(state: CSSParseState): CSSParsedItem = + return state.tokens[state.at] + +func has(state: CSSParseState): bool = + return state.at < state.tokens.len + +proc consumeSimpleBlock(state: var CSSParseState): CSSSimpleBlock = + state.reconsume() + let t = CSSToken(state.consume()) + var ending: CSSTokenType + case t.tokenType + of CSS_LBRACE_TOKEN: ending = CSS_RBRACE_TOKEN + of CSS_LPAREN_TOKEN: ending = CSS_RPAREN_TOKEN + of CSS_LBRACKET_TOKEN: ending = CSS_RBRACKET_TOKEN + else: raise newException(Exception, "Parse error!") + + result = CSSSimpleBlock(token: t) + while state.at < state.tokens.len: + let t = state.consume() + if t == ending: + return result + else: + if t == CSS_LBRACE_TOKEN or t == CSS_LBRACKET_TOKEN or t == CSS_LPAREN_TOKEN: + result.value.add(state.consumeSimpleBlock()) + else: + result.value.add(CSSComponentValue(t)) + return result + +proc consumeComponentValue*(state: var CSSParseState): CSSComponentValue + +proc consumeFunction(state: var CSSParseState): CSSFunction = + let t = (CSSToken)state.consume() + result = CSSFunction(name: t.value) + while state.at < state.tokens.len: + let t = state.consume() + if t == CSS_RPAREN_TOKEN: + return result + else: + state.reconsume() + result.value.add(state.consumeComponentValue()) + +proc consumeComponentValue(state: var CSSParseState): CSSComponentValue = + let t = state.consume() + if t == CSS_LBRACE_TOKEN or t == CSS_LBRACKET_TOKEN or t == CSS_LPAREN_TOKEN: + return state.consumeSimpleBlock() + elif t == CSS_FUNCTION_TOKEN: + state.reconsume() + return state.consumeFunction() + return CSSComponentValue(t) + +proc consumeQualifiedRule(state: var CSSParseState): Option[CSSQualifiedRule] = + var r = CSSQualifiedRule() + while state.has(): + let t = state.consume() + if t of CSSSimpleBlock: + r.oblock = state.consumeSimpleBlock() + return some(r) + elif t == CSS_LBRACE_TOKEN: + r.oblock = state.consumeSimpleBlock() + return some(r) + else: + state.reconsume() + r.prelude.add(state.consumeComponentValue()) + return none(CSSQualifiedRule) + + +proc consumeAtRule(state: var CSSParseState): CSSAtRule = + let t = CSSToken(state.consume()) + result = CSSAtRule(name: t.value) + + while state.at < state.tokens.len: + let t = state.consume() + if t of CSSSimpleBlock: + result.oblock = state.consumeSimpleBlock() + elif t == CSS_SEMICOLON_TOKEN: + return result + elif t == CSS_LBRACE_TOKEN: + result.oblock = state.consumeSimpleBlock() + return result + else: + state.reconsume() + result.prelude.add(state.consumeComponentValue()) + +proc consumeDeclaration(state: var CSSParseState): Option[CSSDeclaration] = + let t = CSSToken(state.consume()) + var decl = CSSDeclaration(name: t.value) + while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: + discard state.consume() + if not state.has() or state.curr() != CSS_COLON_TOKEN: + return none(CSSDeclaration) + discard state.consume() + while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: + discard state.consume() + + while state.has(): + decl.value.add(state.consumeComponentValue()) + + var i = decl.value.len - 1 + var j = 2 + var k = 0 + var l = 0 + while i >= 0 and j > 0: + if decl.value[i] != CSS_WHITESPACE_TOKEN: + dec j + if decl.value[i] == CSS_IDENT_TOKEN and k == 0: + if CSSToken(decl.value[i]).value.toAsciiLower() == "important": + inc k + l = i + elif k == 1 and decl.value[i] == CSS_DELIM_TOKEN: + if CSSToken(decl.value[i]).rvalue == Rune('!'): + decl.important = true + decl.value.del(l) + decl.value.del(i) + break + dec i + + while decl.value.len > 0 and decl.value[^1] == CSS_WHITESPACE_TOKEN: + decl.value.del(decl.value.len - 1) + return some(decl) + +#> Note: Despite the name, this actually parses a mixed list of declarations +#> and at-rules, as CSS 2.1 does for @page. Unexpected at-rules (which could be +#> all of them, in a given context) are invalid and should be ignored by the +#> consumer. +#Wow this is ugly. +proc consumeListOfDeclarations(state: var CSSParseState): seq[CSSParsedItem] = + while state.has(): + let t = state.consume() + if t == CSS_wHITESPACE_TOKEN or t == CSS_SEMICOLON_TOKEN: + continue + elif t == CSS_AT_KEYWORD_TOKEN: + state.reconsume() + result.add(state.consumeAtRule()) + elif t == CSS_IDENT_TOKEN: + var tempList: seq[CSSParsedItem] + tempList.add(CSSToken(t)) + while state.has() and state.curr() != CSS_SEMICOLON_TOKEN: + tempList.add(state.consumeComponentValue()) + + var tempState = CSSParseState(at: 0, tokens: tempList) + let decl = tempState.consumeDeclaration() + if decl.isSome: + result.add(decl.get) + else: + state.reconsume() + if state.curr() != CSS_SEMICOLON_TOKEN: + discard state.consumeComponentValue() + +proc consumeListOfRules(state: var CSSParseState): seq[CSSRule] = + while state.at < state.tokens.len: + let t = state.consume() + if t == CSS_WHITESPACE_TOKEN: + continue + elif t == CSS_CDO_TOKEN or t == CSS_CDC_TOKEN: + if state.top_level: + continue + else: + state.reconsume() + let q = state.consumeQualifiedRule() + if q.isSome: + result.add(q.get) + elif t == CSS_AT_KEYWORD_TOKEN: + state.reconsume() + result.add(state.consumeAtRule()) + else: + state.reconsume() + let q = state.consumeQualifiedRule() + if q.isSome: + result.add(q.get) + +proc parseStylesheet(state: var CSSParseState): CSSStylesheet = + state.top_level = true + result.value.add(state.consumeListOfRules()) + +proc parseStylesheet(inputStream: Stream): CSSStylesheet = + var state = CSSParseState() + state.tokens = tokenizeCSS(inputStream) + return state.parseStylesheet() + +proc parseListOfRules(state: var CSSParseState): seq[CSSRule] = + return state.consumeListOfRules() + +proc parseListOfRules(inputStream: Stream): seq[CSSRule] = + var state = CSSParseState() + state.tokens = tokenizeCSS(inputStream) + return state.parseListOfRules() + +proc parseRule(state: var CSSParseState): CSSRule = + while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: + discard state.consume() + if not state.has(): + raise newException(SyntaxError, "EOF reached!") + + if state.curr() == CSS_AT_KEYWORD_TOKEN: + result = state.consumeAtRule() + else: + let q = state.consumeQualifiedRule() + if q.isSome: + result = q.get + else: + raise newException(SyntaxError, "No qualified rule found!") + + while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: + discard state.consume() + if state.has(): + raise newException(SyntaxError, "EOF not reached!") + +proc parseRule(inputStream: Stream): CSSRule = + var state = CSSParseState() + state.tokens = tokenizeCSS(inputStream) + return state.parseRule() + +proc parseDeclaration(state: var CSSParseState): CSSDeclaration = + while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: + discard state.consume() + + if not state.has() or state.curr() != CSS_IDENT_TOKEN: + raise newException(SyntaxError, "No ident token found!") + + let d = state.consumeDeclaration() + if d.isSome: + return d.get + + raise newException(SyntaxError, "No declaration found!") + +proc parseCSSDeclaration*(inputStream: Stream): CSSDeclaration = + var state = CSSParseState() + state.tokens = tokenizeCSS(inputStream) + return state.parseDeclaration() + +proc parseListOfDeclarations(state: var CSSParseState): seq[CSSParsedItem] = + return state.consumeListOfDeclarations() + +proc parseCSSListOfDeclarations*(inputStream: Stream): seq[CSSParsedItem] = + var state = CSSParseState() + state.tokens = tokenizeCSS(inputStream) + return state.parseListOfDeclarations() + +proc parseComponentValue(state: var CSSParseState): CSSComponentValue = + while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: + discard state.consume() + if not state.has(): + raise newException(SyntaxError, "EOF reached!") + + result = state.consumeComponentValue() + + while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: + discard state.consume() + if state.has(): + raise newException(SyntaxError, "EOF not reached!") + +proc parseCSSComponentValue*(inputStream: Stream): CSSComponentValue = + var state = CSSParseState() + state.tokens = tokenizeCSS(inputStream) + return state.parseComponentValue() + +proc parseListOfComponentValues(state: var CSSParseState): seq[CSSComponentValue] = + while state.has(): + result.add(state.consumeComponentValue()) + +proc parseCSSListOfComponentValues*(inputStream: Stream): seq[CSSComponentValue] = + var state = CSSParseState() + state.tokens = tokenizeCSS(inputStream) + return state.parseListOfComponentValues() + +proc parseCommaSeparatedListOfComponentValues(state: var CSSParseState): seq[CSSComponentValue] = + while state.has(1): + let cvl = state.consumeComponentValue() + if cvl != CSS_COMMA_TOKEN: + result.add(state.consumeComponentValue()) + +proc parseCommaSeparatedListOfComponentValues(inputStream: Stream): seq[CSSComponentValue] = + var state = CSSParseState() + state.tokens = tokenizeCSS(inputStream) + return state.parseCommaSeparatedListOfComponentValues() + +proc printc*(c: CSSComponentValue) = + if c of CSSToken: + case CSSToken(c).tokenType: + of CSS_FUNCTION_TOKEN, CSS_AT_KEYWORD_TOKEN, CSS_URL_TOKEN: + eprint CSSToken(c).tokenType, CSSToken(c).value + of CSS_HASH_TOKEN: + stderr.write('#' & $CSSToken(c).value) + of CSS_IDENT_TOKEN: + stderr.write(CSSToken(c).value) + of CSS_STRING_TOKEN: + stderr.write("\"" & $CSSToken(c).value & "\"") + of CSS_DELIM_TOKEN: + stderr.write(CSSToken(c).rvalue) + of CSS_DIMENSION_TOKEN: + eprint CSSToken(c).tokenType, CSSToken(c).nvalue, "unit", CSSToken(c).unit, CSSToken(c).tflagb + of CSS_NUMBER_TOKEN: + stderr.write($CSSToken(c).nvalue & $CSSToken(c).unit) + of CSS_PERCENTAGE_TOKEN: + stderr.write($CSSToken(c).nvalue & "%") + of CSS_COLON_TOKEN: + stderr.write(":") + of CSS_WHITESPACE_TOKEN: + stderr.write(" ") + of CSS_SEMICOLON_TOKEN: + stderr.write(";\n") + of CSS_COMMA_TOKEN: + stderr.write(",") + else: + eprint CSSToken(c).tokenType + elif c of CSSDeclaration: + stderr.write(CSSDeclaration(c).name) + stderr.write(": ") + for s in CSSDeclaration(c).value: + printc(s) + stderr.write(";\n") + elif c of CSSFunction: + stderr.write($CSSFunction(c).name & "(") + for s in CSSFunction(c).value: + printc(s) + stderr.write(")") + elif c of CSSSimpleBlock: + case CSSSimpleBlock(c).token.tokenType + of CSS_LBRACE_TOKEN: eprint "{" + of CSS_LPAREN_TOKEN: stderr.write("(") + of CSS_LBRACKET_TOKEN: stderr.write("[") + else: discard + for s in CSSSimpleBlock(c).value: + printc(s) + case CSSSimpleBlock(c).token.tokenType + of CSS_LBRACE_TOKEN: eprint "}" + of CSS_LPAREN_TOKEN: stderr.write(")") + of CSS_LBRACKET_TOKEN: stderr.write("]") + else: discard + +proc parseCSS*(inputStream: Stream): CSSStylesheet = + return inputstream.parseStylesheet() + +proc debugparseCSS*(inputStream: Stream) = + let ss = inputStream.parseStylesheet() + for v in ss.value: + if v of CSSAtRule: + eprint CSSAtRule(v).name + else: + for c in CSSQualifiedRule(v).prelude: + printc(c) + case v.oblock.token.tokenType + of CSS_LBRACE_TOKEN: eprint "\n{" + of CSS_LPAREN_TOKEN: eprint "(" + of CSS_LBRACKET_TOKEN: eprint "[" + else: discard + for s in v.oblock.value: + printc(s) + case v.oblock.token.tokenType + of CSS_LBRACE_TOKEN: eprint "\n}" + of CSS_LPAREN_TOKEN: eprint ")" + of CSS_LBRACKET_TOKEN: eprint "]" + else: discard diff --git a/src/css/selector.nim b/src/css/selector.nim new file mode 100644 index 00000000..1ca417dd --- /dev/null +++ b/src/css/selector.nim @@ -0,0 +1,151 @@ +import unicode + +import ../types/enums +import ../types/tagtypes + +import cssparser + +type + SelectorType* = enum + TYPE_SELECTOR, ID_SELECTOR, ATTR_SELECTOR, CLASS_SELECTOR, + UNIVERSAL_SELECTOR, PSEUDO_SELECTOR, PSELEM_SELECTOR, FUNC_SELECTOR + + QueryMode* = enum + QUERY_TYPE, QUERY_CLASS, QUERY_ATTR, QUERY_DELIM, QUERY_VALUE, + QUERY_PSEUDO, QUERY_PSELEM + + SelectorParser = object + selectors: seq[SelectorList] + query: QueryMode + negate: bool + + #TODO combinators + Selector* = object + case t*: SelectorType + of TYPE_SELECTOR: + tag*: TagType + of ID_SELECTOR: + id*: string + of ATTR_SELECTOR: + attr*: string + value*: string + rel*: char + of CLASS_SELECTOR: + class*: string + of UNIVERSAL_SELECTOR: #TODO namespaces? + discard + of PSEUDO_SELECTOR: + pseudo*: string + of PSELEM_SELECTOR: + elem*: string + of FUNC_SELECTOR: + name*: string + selectors*: SelectorList + + SelectorList* = ref object + sels*: seq[Selector] + parent*: SelectorList + +proc add*(sellist: SelectorList, sel: Selector) = sellist.sels.add(sel) +proc add*(sellist: SelectorList, sels: SelectorList) = sellist.sels.add(sels.sels) +proc setLen*(sellist: SelectorList, i: int) = sellist.sels.setLen(i) +proc `[]`*(sellist: SelectorList, i: int): Selector = sellist.sels[i] +proc len*(sellist: SelectorList): int = sellist.sels.len + +proc parseSelectorToken(state: var SelectorParser, csstoken: CSSToken) = + case csstoken.tokenType + of CSS_IDENT_TOKEN: + var sel: Selector + case state.query + of QUERY_CLASS: + state.selectors[^1].add(Selector(t: CLASS_SELECTOR, class: $csstoken.value)) + of QUERY_TYPE: + state.selectors[^1].add(Selector(t: TYPE_SELECTOR, tag: tagType($csstoken.value))) + of QUERY_PSEUDO: + state.selectors[^1].add(Selector(t: PSEUDO_SELECTOR, pseudo: $csstoken.value)) + of QUERY_PSELEM: + state.selectors[^1].add(Selector(t: PSELEM_SELECTOR, elem: $csstoken.value)) + else: discard + state.query = QUERY_TYPE + of CSS_DELIM_TOKEN: + if csstoken.rvalue == Rune('.'): + state.query = QUERY_CLASS + of CSS_HASH_TOKEN: + state.selectors[^1].add(Selector(t: ID_SELECTOR, id: $csstoken.value)) + of CSS_COMMA_TOKEN: + if state.selectors[^1].len > 0: + state.selectors.add(SelectorList()) + of CSS_COLON_TOKEN: + if state.query == QUERY_PSEUDO: + state.query = QUERY_PSELEM + else: + state.query = QUERY_PSEUDO + else: discard + +proc parseSelectorSimpleBlock(state: var SelectorParser, cssblock: CSSSimpleBlock) = + case cssblock.token.tokenType + of CSS_LBRACKET_TOKEN: + state.query = QUERY_ATTR + for cval in cssblock.value: + if cval of CSSToken: + let csstoken = (CSSToken)cval + case csstoken.tokenType + of CSS_IDENT_TOKEN: + case state.query + of QUERY_ATTR: + state.query = QUERY_DELIM + state.selectors[^1].add(Selector(t: ATTR_SELECTOR, attr: $csstoken.value, rel: ' ')) + of QUERY_VALUE: + state.selectors[^1].sels[^1].value = $csstoken.value + break + else: discard + of CSS_STRING_TOKEN: + case state.query + of QUERY_VALUE: + state.selectors[^1].sels[^1].value = $csstoken.value + break + else: discard + of CSS_DELIM_TOKEN: + case csstoken.rvalue + of Rune('~'), Rune('|'), Rune('^'), Rune('$'), Rune('*'): + if state.query == QUERY_DELIM: + state.selectors[^1].sels[^1].rel = char(csstoken.rvalue) + of Rune('='): + if state.query == QUERY_DELIM: + state.query = QUERY_VALUE + else: discard + else: discard + state.query = QUERY_TYPE + else: discard + +proc parseSelectorFunction(state: var SelectorParser, cssfunction: CSSFunction) = + case $cssfunction.name + of "not": + if state.query != QUERY_PSEUDO: + return + state.query = QUERY_TYPE + else: return + var fun = Selector(t: FUNC_SELECTOR, name: $cssfunction.name) + fun.selectors = SelectorList(parent: state.selectors[^1]) + state.selectors[^1].add(fun) + state.selectors[^1] = fun.selectors + for cval in cssfunction.value: + if cval of CSSToken: + state.parseSelectorToken((CSSToken)cval) + elif cval of CSSSimpleBlock: + state.parseSelectorSimpleBlock((CSSSimpleBlock)cval) + elif cval of CSSFunction: + state.parseSelectorFunction((CSSFunction)cval) + state.selectors[^1] = fun.selectors.parent + +func parseSelectors*(cvals: seq[CSSComponentValue]): seq[SelectorList] = + var state = SelectorParser() + state.selectors.add(SelectorList()) + for cval in cvals: + if cval of CSSToken: + state.parseSelectorToken((CSSToken)cval) + elif cval of CSSSimpleBlock: + state.parseSelectorSimpleBlock((CSSSimpleBlock)cval) + elif cval of CSSFunction: + state.parseSelectorFunction((CSSFunction)cval) + return state.selectors diff --git a/src/css/style.nim b/src/css/style.nim new file mode 100644 index 00000000..56e6b00b --- /dev/null +++ b/src/css/style.nim @@ -0,0 +1,321 @@ +import streams +import unicode +import terminal +import tables + +import ../io/twtio + +import ../utils/twtstr + +import ../types/enums + +import cssparser + +type + CSSLength* = object + num*: float64 + unit*: CSSUnit + auto*: bool + + CSS2Properties* = ref object + rawtext*: string + fmttext*: seq[string] + x*: int + y*: int + ex*: int + ey*: int + width*: int + height*: int + hidden*: bool + before*: CSS2Properties + after*: CSS2Properties + margintop*: CSSLength + marginbottom*: CSSLength + marginleft*: CSSLength + marginright*: CSSLength + centered*: bool + display*: DisplayType + bold*: bool + italic*: bool + underscore*: bool + islink*: bool + selected*: bool + indent*: int + color*: CSSColor + position*: CSSPosition + + CSSCanvas* = object + rootBox*: CSSBox + width*: int + height*: int + + CSSRect* = object + x1*: int + y1*: int + x2*: int + y2*: int + + CSSBox* = ref object + display*: DisplayType + x*: int + y*: int + innerEdge*: CSSRect + paddingEdge*: CSSRect + borderEdge*: CSSRect + marginEdge*: CSSRect + color*: CSSColor + props*: CSS2Properties + content*: seq[Rune] + dispcontent*: string + children*: seq[CSSBox] + + CSSColor* = tuple[r: uint8, g: uint8, b: uint8, a: uint8] + +func `+`(a: CSSRect, b: CSSRect): CSSRect = + result.x1 = a.x1 + b.x1 + result.y1 = a.y1 + b.y1 + result.x2 = a.x2 + b.x2 + result.y2 = a.y2 + b.y2 + +proc `+=`(a: var CSSRect, b: CSSRect) = + a = a + b + +func cells(l: CSSLength): int = + case l.unit + of EM_UNIT: + return int(l.num) + else: + #TODO + return int(l.num / 8) + +const colors = { + "maroon": (0x80u8, 0x00u8, 0x00u8, 0x00u8), + "red": (0xffu8, 0x00u8, 0x00u8, 0x00u8), + "orange": (0xffu8, 0xa5u8, 0x00u8, 0x00u8), + "yellow": (0xffu8, 0xffu8, 0x00u8, 0x00u8), + "olive": (0x80u8, 0x80u8, 0x00u8, 0x00u8), + "purple": (0x80u8, 0x00u8, 0x80u8, 0x00u8), + "fuchsia": (0xffu8, 0x00u8, 0x00u8, 0x00u8), + "white": (0xffu8, 0xffu8, 0xffu8, 0x00u8), + "lime": (0x00u8, 0xffu8, 0x00u8, 0x00u8), + "green": (0x00u8, 0x80u8, 0x00u8, 0x00u8), + "navy": (0x00u8, 0x00u8, 0x80u8, 0x00u8), + "blue": (0x00u8, 0x00u8, 0xffu8, 0x00u8), + "aqua": (0x00u8, 0xffu8, 0xffu8, 0x00u8), + "teal": (0x00u8, 0x80u8, 0x80u8, 0x00u8), + "black": (0x00u8, 0x00u8, 0x00u8, 0x00u8), + "silver": (0xc0u8, 0xc0u8, 0xc0u8, 0x00u8), + "gray": (0x80u8, 0x80u8, 0x80u8, 0x00u8), +}.toTable() + +const defaultColor = (0xffu8, 0xffu8, 0xffu8, 0x00u8) + +func cssLength(val: float64, unit: string): CSSLength = + case unit + of "%": return CSSLength(num: val, unit: PERC_UNIT) + of "cm": return CSSLength(num: val, unit: CM_UNIT) + of "mm": return CSSLength(num: val, unit: MM_UNIT) + of "in": return CSSLength(num: val, unit: IN_UNIT) + of "px": return CSSLength(num: val, unit: PX_UNIT) + of "pt": return CSSLength(num: val, unit: PT_UNIT) + of "pc": return CSSLength(num: val, unit: PC_UNIT) + of "em": return CSSLength(num: val, unit: EM_UNIT) + of "ex": return CSSLength(num: val, unit: EX_UNIT) + of "ch": return CSSLength(num: val, unit: CH_UNIT) + of "rem": return CSSLength(num: val, unit: REM_UNIT) + of "vw": return CSSLength(num: val, unit: VW_UNIT) + of "vh": return CSSLength(num: val, unit: VH_UNIT) + of "vmin": return CSSLength(num: val, unit: VMIN_UNIT) + of "vmax": return CSSLength(num: val, unit: VMAX_UNIT) + else: return CSSLength(num: 0, unit: EM_UNIT) + +func cssColor*(d: CSSDeclaration): CSSColor = + if d.value.len > 0: + if d.value[0] of CSSToken: + let tok = CSSToken(d.value[0]) + case tok.tokenType + of CSS_HASH_TOKEN: + let s = tok.value + if s.len == 3: + for r in s: + if hexValue(r) == -1: + return + let r = hexValue(s[0]) * 0x10 + hexValue(s[0]) + let g = hexValue(s[1]) * 0x10 + hexValue(s[1]) + let b = hexValue(s[2]) * 0x10 + hexValue(s[2]) + + return (uint8(r), uint8(g), uint8(b), 0x00u8) + elif s.len == 6: + for r in s: + if hexValue(r) == -1: + return + let r = hexValue(s[0]) * 0x10 + hexValue(s[1]) + let g = hexValue(s[2]) * 0x10 + hexValue(s[3]) + let b = hexValue(s[4]) * 0x10 + hexValue(s[5]) + return (uint8(r), uint8(g), uint8(b), 0x00u8) + else: + return defaultColor + of CSS_IDENT_TOKEN: + let s = tok.value + eprint "ident", s + if $s in colors: + return colors[$s] + else: + return defaultColor + else: + eprint "else", tok.tokenType + return defaultColor + elif d of CSSFunction: + let f = CSSFunction(d.value[0]) + eprint "func", f.name + #todo calc etc (cssnumber function or something) + case $f.name + of "rgb": + if f.value.len != 3: + return defaultColor + for c in f.value: + if c != CSS_NUMBER_TOKEN: + return defaultColor + let r = CSSToken(f.value[0]).nvalue + let g = CSSToken(f.value[1]).nvalue + let b = CSSToken(f.value[2]).nvalue + return (uint8(r), uint8(g), uint8(b), 0x00u8) + of "rgba": + if f.value.len != 4: + eprint "too few args" + return defaultColor + for c in f.value: + if c != CSS_NUMBER_TOKEN: + eprint "not number" + return defaultColor + let r = CSSToken(f.value[0]).nvalue + let g = CSSToken(f.value[1]).nvalue + let b = CSSToken(f.value[2]).nvalue + let a = CSSToken(f.value[3]).nvalue + return (uint8(r), uint8(g), uint8(b), uint8(a)) + else: + eprint "not rgba" + return defaultColor + + return defaultColor + +func cssLength(d: CSSDeclaration): CSSLength = + if d.value.len > 0 and d.value[0] of CSSToken: + let tok = CSSToken(d.value[0]) + case tok.tokenType + of CSS_PERCENTAGE_TOKEN: + return cssLength(tok.nvalue, "%") + of CSS_DIMENSION_TOKEN: + return cssLength(tok.nvalue, $tok.unit) + of CSS_IDENT_TOKEN: + if $tok.value == "auto": + return CSSLength(num: 0, unit: EM_UNIT, auto: true) + else: + return CSSLength(num: 0, unit: EM_UNIT) + + return CSSLength(num: 0, unit: EM_UNIT) + +func hasColor*(style: CSS2Properties): bool = + return style.color.r != 0 or style.color.b != 0 or style.color.g != 0 or style.color.a != 0 + +func termColor*(style: CSS2Properties): ForegroundColor = + if style.color.r > 120: + return fgRed + elif style.color.b > 120: + return fgBlue + elif style.color.g > 120: + return fgGreen + else: + return fgWhite + +proc applyProperties*(box: CSSBox, s: string) = + let decls = parseCSSListOfDeclarations(newStringStream(s)) + if box.props == nil: + box.props = CSS2Properties() + let props = box.props + + for item in decls: + if item of CSSDeclaration: + let d = CSSDeclaration(item) + case $d.name + of "color": + props.color = cssColor(d) + eprint props.color #TODO + of "margin": + let l = cssLength(d) + props.margintop = l + props.marginbottom = l + props.marginleft = l + props.marginright = l + of "margin-top": + props.margintop = cssLength(d) + of "margin-left": + props.marginleft = cssLength(d) + of "margin-right": + props.marginright = cssLength(d) + of "margin-bottom": + props.marginbottom = cssLength(d) + else: + printc(d) #TODO + +func getLength(s: seq[Rune], start: int, wlimit: int): tuple[wrap: bool, len: int, width: int] = + var len = 0 + var width = 0 + var i = start + while i < s.len: + let r = s[i] + let cw = r.width() + if width + cw > wlimit: + return (wrap: true, len: len, width: width) + width += cw + len += 1 + + return (wrap: false, len: len, width: width) + +proc arrangeBoxes*(canvas: CSSCanvas) = + var stack: seq[CSSBox] + stack.add(canvas.rootBox) + var x = 0 + var y = 0 + + while stack.len > 0: + let box = stack.pop() + + #arrange box + box.marginEdge.x1 = x + box.marginEdge.y1 = y + x += box.props.marginleft.cells() + y += box.props.margintop.cells() + + if box.display == DISPLAY_BLOCK: + x = 0 + inc y + + if x > canvas.width: + x = 0 + inc y + + box.x = x + box.y = y + + var l = 0 + while l < box.content.len: + let (wrap, wraplen, wrapwidth) = box.content.getLength(l, canvas.width - x) + var wrapbox = new(CSSBox) + wrapbox.content = box.content.substr(l, l + wraplen) + box.children.add(wrapbox) + l += wraplen + x += wrapwidth + if wrap: + inc y + x = 0 + + x += box.props.marginright.cells() + y += box.props.marginbottom.cells() + box.marginEdge.x2 = x + box.marginEdge.y2 = y + + var i = box.children.len - 1 + while i >= 0: + stack.add(box.children[i]) + i -= 1 diff --git a/src/cssparser.nim b/src/cssparser.nim deleted file mode 100644 index ce5b5037..00000000 --- a/src/cssparser.nim +++ /dev/null @@ -1,833 +0,0 @@ -# CSS tokenizer and parser. The tokenizer is a mess, and may or may not work -# correctly. The parser should work, though the outputted object model is -# questionable at best. - -import unicode -import streams -import math -import options - -import twtstr -import twtio -import enums - -type - CSSTokenizerState = object - at: int - stream: Stream - buf: seq[Rune] - - CSSParseState = object - tokens: seq[CSSParsedItem] - at: int - top_level: bool - - tflaga = enum - TFLAGA_UNRESTRICTED, TFLAGA_ID - tflagb = enum - TFLAGB_INTEGER, TFLAGB_NUMBER - - CSSParsedItem* = ref object of RootObj - CSSComponentValue* = ref object of CSSParsedItem - - CSSToken* = ref object of CSSComponentValue - case tokenType*: CSSTokenType - of CSS_IDENT_TOKEN, CSS_FUNCTION_TOKEN, CSS_AT_KEYWORD_TOKEN, - CSS_HASH_TOKEN, CSS_STRING_TOKEN, CSS_URL_TOKEN: - value*: seq[Rune] - tflaga*: tflaga - of CSS_DELIM_TOKEN: - rvalue*: Rune - of CSS_NUMBER_TOKEN, CSS_PERCENTAGE_TOKEN, CSS_DIMENSION_TOKEN: - nvalue*: float64 - tflagb*: tflagb - unit*: seq[Rune] - else: discard - - CSSRule* = ref object of CSSParsedItem - prelude*: seq[CSSComponentValue] - oblock*: CSSSimpleBlock - - CSSAtRule* = ref object of CSSRule - name*: seq[Rune] - - CSSQualifiedRule* = ref object of CSSRule - - CSSDeclaration* = ref object of CSSComponentValue - name*: seq[Rune] - value*: seq[CSSComponentValue] - important*: bool - - CSSFunction* = ref object of CSSComponentValue - name*: seq[Rune] - value*: seq[CSSComponentValue] - - CSSSimpleBlock* = ref object of CSSComponentValue - token*: CSSToken - value*: seq[CSSComponentValue] - - CSSStylesheet* = object - value*: seq[CSSRule] - - SyntaxError = object of ValueError - - CSSColor* = tuple[r: uint8, g: uint8, b: uint8, a: uint8] - -func `==`(a: CSSParsedItem, b: CSSTokenType): bool = - return a of CSSToken and CSSToken(a).tokenType == b - -func toNumber(s: seq[Rune]): float64 = - var sign = 1 - var t = 1 - var d = 0 - var integer: float64 = 0 - var f: float64 = 0 - var e: float64 = 0 - - var i = 0 - if i < s.len and s[i] == Rune('-'): - sign = -1 - inc i - elif i < s.len and s[i] == Rune('+'): - inc i - - while i < s.len and isDigitAscii(s[i]): - integer *= 10 - integer += float64(decValue(s[i])) - inc i - - if i < s.len and s[i] == Rune('.'): - inc i - while i < s.len and isDigitAscii(s[i]): - f *= 10 - f += float64(decValue(s[i])) - inc i - inc d - - if i < s.len and (s[i] == Rune('e') or s[i] == Rune('E')): - inc i - if i < s.len and s[i] == Rune('-'): - t = -1 - inc i - elif i < s.len and s[i] == Rune('+'): - inc i - - while i < s.len and isDigitAscii(s[i]): - e *= 10 - e += float64(decValue(s[i])) - inc i - - return float64(sign) * (integer + f * pow(10, float64(-d))) * pow(10, (float64(t) * e)) - -func toColor*(s: seq[Rune]): CSSColor = - if s.len == 3: - for r in s: - if hexValue(r) == -1: - return - let r = hexValue(s[0]) * 0x10 + hexValue(s[0]) - let g = hexValue(s[1]) * 0x10 + hexValue(s[1]) - let b = hexValue(s[2]) * 0x10 + hexValue(s[2]) - - result.r = uint8(r) - result.g = uint8(g) - result.b = uint8(b) - result.a = 0 - elif s.len == 6: - for r in s: - if hexValue(r) == -1: - return - let r = hexValue(s[0]) * 0x10 + hexValue(s[1]) - let g = hexValue(s[2]) * 0x10 + hexValue(s[3]) - let b = hexValue(s[4]) * 0x10 + hexValue(s[5]) - result.r = uint8(r) - result.g = uint8(g) - result.b = uint8(b) - result.a = 0 - -func isNameStartCodePoint*(r: Rune): bool = - return not isAscii(r) or r == Rune('_') or isAlphaAscii(r) - -func isNameCodePoint*(r: Rune): bool = - return isNameStartCodePoint(r) or isDigitAscii(r) or r == Rune('-') - -proc consume(state: var CSSTokenizerState): Rune = - result = state.buf[state.at] - inc state.at - -proc reconsume(state: var CSSTokenizerState) = - dec state.at - -func peek(state: CSSTokenizerState, i: int): Rune = - return state.buf[state.at + i] - -proc has(state: var CSSTokenizerState, i: int): bool = - if state.at + i >= state.buf.len and not state.stream.atEnd(): - state.buf &= state.stream.readLine().toRunes() & Rune('\n') - return state.at + i < state.buf.len - -func curr(state: CSSTokenizerState): Rune = - return state.buf[state.at] - -proc isValidEscape*(state: var CSSTokenizerState): bool = - return state.has(1) and state.curr() == Rune('\\') and state.peek(1) != Rune('\n') - -proc has(state: var CSSTokenizerState): bool = - if state.at >= state.buf.len and not state.stream.atEnd(): - state.buf &= state.stream.readLine().toRunes() & Rune('\n') - return state.at < state.buf.len - -proc startsWithIdentifier*(state: var CSSTokenizerState): bool = - if not state.has(): - return false - - if isNameStartCodePoint(state.curr()): - return true - if state.curr() == Rune('-'): - if state.has(1) and state.peek(1).isNameStartCodePoint(): - return true - if state.isValidEscape(): - return true - return false - elif state.curr() == Rune('\\'): - return state.isValidEscape() - - return false - -proc startsWithNumber*(state: var CSSTokenizerState): bool = - if state.has(): - case state.curr() - of Rune('+'), Rune('-'): - if state.has(1): - if isDigitAscii(state.peek(1)): - return true - elif state.peek(1) == Rune('.'): - if state.has(2) and isDigitAscii(state.peek(2)): - return true - of Rune('.'): - if isDigitAscii(state.peek(1)): - return true - elif isDigitAscii(state.curr()): - return true - else: - return false - return false - -proc consumeEscape(state: var CSSTokenizerState): Rune = - let r = state.consume() - var num = hexValue(r) - if num != -1: - var i = 0 - while state.has() and i <= 5: - let r = state.consume() - if hexValue(r) == -1: - state.reconsume() - break - num *= 0x10 - num += hexValue(r) - inc i - if num == 0 or num > 0x10FFFF or num in {0xD800..0xDFFF}: - return Rune(0xFFFD) - else: - return Rune(num) - else: - return r - -proc consumeString(state: var CSSTokenizerState): CSSToken = - var s: seq[Rune] - state.reconsume() - let ending = state.consume() - - while state.has(): - let r = state.consume() - case r - of Rune('\n'): - return CSSToken(tokenType: CSS_BAD_STRING_TOKEN) - of Rune('\\'): - s &= consumeEscape(state) - elif r == ending: - break - else: - s &= r - return CSSToken(tokenType: CSS_STRING_TOKEN, value: s) - -proc consumeName(state: var CSSTokenizerState): seq[Rune] = - while state.has(): - let r = state.consume() - if state.isValidEscape(): - result &= state.consumeEscape() - elif isNameCodePoint(r): - result &= r - else: - state.reconsume() - return result - -proc consumeNumberSign(state: var CSSTokenizerState): CSSToken = - if state.has(): - let r = state.consume() - if isNameCodePoint(r) or state.isValidEscape(): - result = CSSToken(tokenType: CSS_HASH_TOKEN) - if state.startsWithIdentifier(): - result.tflaga = TFLAGA_ID - - state.reconsume() - result.value = consumeName(state) - else: - let r = state.consume() - result = CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) - -proc consumeNumber(state: var CSSTokenizerState): tuple[t: tflagb, val: float64] = - var t = TFLAGB_INTEGER - var repr: seq[Rune] - if state.has(): - if state.curr() == Rune('+') or state.curr() == Rune('-'): - repr &= state.consume() - - while state.has() and isDigitAscii(state.curr()): - repr &= state.consume() - - if state.has(1): - if state.curr() == Rune('.') and isDigitAscii(state.peek(1)): - repr &= state.consume() - repr &= state.consume() - t = TFLAGB_NUMBER - while state.has() and isDigitAscii(state.curr()): - repr &= state.consume() - - if state.has(1): - if state.curr() == Rune('E') or state.curr() == Rune('e'): - var j = 2 - if state.peek(1) == Rune('-') or state.peek(1) == Rune('+'): - inc j - if state.has(j) and isDigitAscii(state.peek(j)): - while j > 0: - repr &= state.consume() - dec j - - while state.has() and isDigitAscii(state.curr()): - repr &= state.consume() - - let val = toNumber(repr) - return (t, val) - -proc consumeNumericToken(state: var CSSTokenizerState): CSSToken = - let num = state.consumeNumber() - if state.startsWithIdentifier(): - result = CSSToken(tokenType: CSS_DIMENSION_TOKEN, nvalue: num.val, tflagb: num.t) - result.unit = state.consumeName() - elif state.has() and state.curr() == Rune('%'): - discard state.consume() - result = CSSToken(tokenType: CSS_PERCENTAGE_TOKEN, nvalue: num.val) - else: - result = CSSToken(tokenType: CSS_NUMBER_TOKEN, nvalue: num.val, tflagb: num.t) - -proc consumeBadURL(state: var CSSTokenizerState) = - while state.has(1): - let r = state.consume() - case r - of Rune(')'): - return - elif state.isValidEscape(): - discard state.consumeEscape() - else: discard - -proc consumeURL(state: var CSSTokenizerState): CSSToken = - result = CSSToken(tokenType: CSS_URL_TOKEN) - while state.has(1) and state.peek(1).isWhitespace(): - discard state.consume() - - while state.has(1): - let r = state.consume() - case r - of Rune(')'): - return result - of Rune('"'), Rune('\''), Rune('('): - state.consumeBadURL() - return CSSToken(tokenType: CSS_BAD_URL_TOKEN) - of Rune('\\'): - state.reconsume() - if state.isValidEscape(): - result.value &= state.consumeEscape() - else: - state.consumeBadURL() - return CSSToken(tokenType: CSS_BAD_URL_TOKEN) - elif r.isWhitespace(): - while state.has(1) and state.peek(1).isWhitespace(): - discard state.consume() - else: - result.value &= r - -proc consumeIdentLikeToken(state: var CSSTokenizerState): CSSToken = - let s = state.consumeName() - if s.toAsciiLower() == "url" and state.has() and state.curr() == Rune('('): - discard state.consume() - while state.has(1) and state.curr().isWhitespace() and state.peek(1).isWhitespace(): - discard state.consume() - if state.curr() == Rune('\'') or state.curr() == Rune('"') or state.curr().isWhitespace(): - return CSSToken(tokenType: CSS_FUNCTION_TOKEN, value: s) - else: - return state.consumeURL() - elif state.has() and state.curr() == Rune('('): - discard state.consume() - return CSSToken(tokenType: CSS_FUNCTION_TOKEN, value: s) - - return CSSToken(tokenType: CSS_IDENT_TOKEN, value: s) - -proc consumeComments(state: var CSSTokenizerState) = - if state.has(2) and state.peek(1) == Rune('/') and state.peek(2) == Rune('*'): - discard state.consume() - discard state.consume() - while state.has(2) and not (state.peek(1) == Rune('*') and state.peek(2) == Rune('/')): - discard state.consume() - - if state.has(2): - discard state.consume() - if state.has(1): - discard state.consume() - -proc consumeToken(state: var CSSTokenizerState): CSSToken = - state.consumeComments() - let r = state.consume() - case r - of Rune('\n'), Rune('\t'), Rune(' '), Rune('\f'), Rune('\r'): - while state.has() and state.curr().isWhitespace(): - discard state.consume() - return CSSToken(tokenType: CSS_WHITESPACE_TOKEN) - of Rune('"'), Rune('\''): - return consumeString(state) - of Rune('#'): - return consumeNumberSign(state) - of Rune('('): - return CSSToken(tokenType: CSS_LPAREN_TOKEN) - of Rune(')'): - return CSSToken(tokenType: CSS_RPAREN_TOKEN) - of Rune('['): - return CSSToken(tokenType: CSS_LBRACKET_TOKEN) - of Rune(']'): - return CSSToken(tokenType: CSS_RBRACKET_TOKEN) - of Rune('{'): - return CSSToken(tokenType: CSS_LBRACE_TOKEN) - of Rune('}'): - return CSSToken(tokenType: CSS_RBRACE_TOKEN) - of Rune(','): - return CSSToken(tokenType: CSS_COMMA_TOKEN) - of Rune(':'): - return CSSToken(tokenType: CSS_COLON_TOKEN) - of Rune(';'): - return CSSToken(tokenType: CSS_SEMICOLON_TOKEN) - of Rune('+'): - if state.startsWithNumber(): - state.reconsume() - return state.consumeNumericToken() - else: - return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) - of Rune('-'): - if state.startsWithNumber(): - state.reconsume() - return state.consumeNumericToken() - else: - if state.has(2) and state.peek(1) == Rune('-') and state.peek(2) == Rune('>'): - discard state.consume() - discard state.consume() - return CSSToken(tokenType: CSS_CDC_TOKEN) - elif state.startsWithIdentifier(): - state.reconsume() - result = state.consumeIdentLikeToken() - eprint result.value - else: - return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) - of Rune('.'): - if state.startsWithNumber(): - state.reconsume() - return state.consumeNumericToken() - else: - return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) - of Rune('<'): - if state.has(3) and state.peek(1) == Rune('!') and state.peek(2) == Rune('-') and state.peek(3) == Rune('-'): - discard state.consume() - discard state.consume() - discard state.consume() - return CSSToken(tokenType: CSS_CDO_TOKEN) - else: - return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) - of Rune('@'): - if state.startsWithIdentifier(): - let name = state.consumeName() - return CSSToken(tokenType: CSS_AT_KEYWORD_TOKEN, value: name) - else: - return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) - elif isDigitAscii(r): - state.reconsume() - return state.consumeNumericToken() - elif isNameStartCodePoint(r): - state.reconsume() - return state.consumeIdentLikeToken() - else: - return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) - -proc tokenizeCSS*(inputStream: Stream): seq[CSSParsedItem] = - var state: CSSTokenizerState - state.stream = inputStream - state.buf = state.stream.readLine().toRunes() - while state.has(): - result.add(state.consumeToken()) - eprint "consume token", CSSToken(result[^1]).tokenType - - inputStream.close() - -proc consume(state: var CSSParseState): CSSParsedItem = - result = state.tokens[state.at] - inc state.at - -proc reconsume(state: var CSSParseState) = - dec state.at - -func has(state: CSSParseState, i: int): bool = - return state.at + i < state.tokens.len - -func curr(state: CSSParseState): CSSParsedItem = - return state.tokens[state.at] - -func has(state: CSSParseState): bool = - return state.at < state.tokens.len - -proc consumeSimpleBlock(state: var CSSParseState): CSSSimpleBlock = - state.reconsume() - let t = CSSToken(state.consume()) - var ending: CSSTokenType - case t.tokenType - of CSS_LBRACE_TOKEN: ending = CSS_RBRACE_TOKEN - of CSS_LPAREN_TOKEN: ending = CSS_RPAREN_TOKEN - of CSS_LBRACKET_TOKEN: ending = CSS_RBRACKET_TOKEN - else: raise newException(Exception, "Parse error!") - - result = CSSSimpleBlock(token: t) - while state.at < state.tokens.len: - let t = state.consume() - if t == ending: - return result - else: - if t == CSS_LBRACE_TOKEN or t == CSS_LBRACKET_TOKEN or t == CSS_LPAREN_TOKEN: - result.value.add(state.consumeSimpleBlock()) - else: - result.value.add(CSSComponentValue(t)) - return result - -proc consumeComponentValue(state: var CSSParseState): CSSComponentValue = - let t = state.consume() - if t == CSS_LBRACE_TOKEN or t == CSS_LBRACKET_TOKEN or t == CSS_LPAREN_TOKEN: - return state.consumeSimpleBlock() - return CSSComponentValue(t) - -proc consumeQualifiedRule(state: var CSSParseState): Option[CSSQualifiedRule] = - var r = CSSQualifiedRule() - while state.has(): - let t = state.consume() - if t of CSSSimpleBlock: - r.oblock = state.consumeSimpleBlock() - return some(r) - elif t == CSS_LBRACE_TOKEN: - r.oblock = state.consumeSimpleBlock() - return some(r) - else: - state.reconsume() - r.prelude.add(state.consumeComponentValue()) - return none(CSSQualifiedRule) - - -proc consumeAtRule(state: var CSSParseState): CSSAtRule = - let t = CSSToken(state.consume()) - result = CSSAtRule(name: t.value) - - while state.at < state.tokens.len: - let t = state.consume() - if t of CSSSimpleBlock: - result.oblock = state.consumeSimpleBlock() - elif t == CSS_SEMICOLON_TOKEN: - return result - elif t == CSS_LBRACE_TOKEN: - result.oblock = state.consumeSimpleBlock() - return result - else: - state.reconsume() - result.prelude.add(state.consumeComponentValue()) - -proc consumeDeclaration(state: var CSSParseState): Option[CSSDeclaration] = - let t = CSSToken(state.consume()) - var decl = CSSDeclaration(name: t.value) - while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: - discard state.consume() - if not state.has() or state.curr() != CSS_COLON_TOKEN: - return none(CSSDeclaration) - discard state.consume() - eprint state.tokens.len - while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: - eprint "ok...", CSSToken(state.curr()).tokenType - discard state.consume() - - while state.has(): - eprint "ok..." - decl.value.add(state.consumeComponentValue()) - eprint "helloo?", decl.value.len - - var i = decl.value.len - 1 - var j = 2 - var k = 0 - var l = 0 - while i >= 0 and j > 0: - if decl.value[i] != CSS_WHITESPACE_TOKEN: - dec j - if decl.value[i] == CSS_IDENT_TOKEN and k == 0: - if CSSToken(decl.value[i]).value.toAsciiLower() == "important": - inc k - l = i - elif k == 1 and decl.value[i] == CSS_DELIM_TOKEN: - if CSSToken(decl.value[i]).rvalue == Rune('!'): - decl.important = true - decl.value.del(l) - decl.value.del(i) - break - dec i - - while decl.value.len > 0 and decl.value[^1] == CSS_WHITESPACE_TOKEN: - decl.value.del(decl.value.len - 1) - return some(decl) - -#> Note: Despite the name, this actually parses a mixed list of declarations -#> and at-rules, as CSS 2.1 does for @page. Unexpected at-rules (which could be -#> all of them, in a given context) are invalid and should be ignored by the -#> consumer. -#Wow this is ugly. -proc consumeListOfDeclarations(state: var CSSParseState): seq[CSSParsedItem] = - while state.has(): - let t = state.consume() - if t == CSS_wHITESPACE_TOKEN or t == CSS_SEMICOLON_TOKEN: - continue - elif t == CSS_AT_KEYWORD_TOKEN: - state.reconsume() - result.add(state.consumeAtRule()) - elif t == CSS_IDENT_TOKEN: - var tempList: seq[CSSParsedItem] - tempList.add(CSSToken(t)) - while state.has() and state.curr() != CSS_SEMICOLON_TOKEN: - tempList.add(state.consumeComponentValue()) - - var tempState = CSSParseState(at: 0, tokens: tempList) - let decl = tempState.consumeDeclaration() - if decl.isSome: - result.add(decl.get) - else: - state.reconsume() - if state.curr() != CSS_SEMICOLON_TOKEN: - discard state.consumeComponentValue() - -proc consumeListOfRules(state: var CSSParseState): seq[CSSRule] = - while state.at < state.tokens.len: - let t = state.consume() - if t == CSS_WHITESPACE_TOKEN: - continue - elif t == CSS_CDO_TOKEN or t == CSS_CDC_TOKEN: - if state.top_level: - continue - else: - state.reconsume() - let q = state.consumeQualifiedRule() - if q.isSome: - result.add(q.get) - elif t == CSS_AT_KEYWORD_TOKEN: - state.reconsume() - result.add(state.consumeAtRule()) - else: - state.reconsume() - let q = state.consumeQualifiedRule() - if q.isSome: - result.add(q.get) - -proc consumeFunction(state: var CSSParseState): CSSFunction = - while state.at < state.tokens.len: - let t = state.consume() - if t == CSS_RPAREN_TOKEN: - return result - else: - state.reconsume() - result.value.add(state.consumeComponentValue()) - -proc parseStylesheet(state: var CSSParseState): CSSStylesheet = - state.top_level = true - result.value.add(state.consumeListOfRules()) - -proc parseStylesheet(inputStream: Stream): CSSStylesheet = - var state = CSSParseState() - state.tokens = tokenizeCSS(inputStream) - return state.parseStylesheet() - -proc parseListOfRules(state: var CSSParseState): seq[CSSRule] = - return state.consumeListOfRules() - -proc parseListOfRules(inputStream: Stream): seq[CSSRule] = - var state = CSSParseState() - state.tokens = tokenizeCSS(inputStream) - return state.parseListOfRules() - -proc parseRule(state: var CSSParseState): CSSRule = - while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: - discard state.consume() - if not state.has(): - raise newException(SyntaxError, "EOF reached!") - - if state.curr() == CSS_AT_KEYWORD_TOKEN: - result = state.consumeAtRule() - else: - let q = state.consumeQualifiedRule() - if q.isSome: - result = q.get - else: - raise newException(SyntaxError, "No qualified rule found!") - - while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: - discard state.consume() - if state.has(): - raise newException(SyntaxError, "EOF not reached!") - -proc parseRule(inputStream: Stream): CSSRule = - var state = CSSParseState() - state.tokens = tokenizeCSS(inputStream) - return state.parseRule() - -proc parseDeclaration(state: var CSSParseState): CSSDeclaration = - while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: - discard state.consume() - - if not state.has() or state.curr() != CSS_IDENT_TOKEN: - raise newException(SyntaxError, "No ident token found!") - - let d = state.consumeDeclaration() - if d.isSome: - return d.get - - raise newException(SyntaxError, "No declaration found!") - -proc parseCSSDeclaration*(inputStream: Stream): CSSDeclaration = - var state = CSSParseState() - state.tokens = tokenizeCSS(inputStream) - return state.parseDeclaration() - -proc parseListOfDeclarations(state: var CSSParseState): seq[CSSParsedItem] = - return state.consumeListOfDeclarations() - -proc parseCSSListOfDeclarations*(inputStream: Stream): seq[CSSParsedItem] = - var state = CSSParseState() - state.tokens = tokenizeCSS(inputStream) - return state.parseListOfDeclarations() - -proc parseComponentValue(state: var CSSParseState): CSSComponentValue = - while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: - discard state.consume() - if not state.has(): - raise newException(SyntaxError, "EOF reached!") - - result = state.consumeComponentValue() - - while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: - discard state.consume() - if state.has(): - raise newException(SyntaxError, "EOF not reached!") - -proc parseCSSComponentValue*(inputStream: Stream): CSSComponentValue = - var state = CSSParseState() - state.tokens = tokenizeCSS(inputStream) - return state.parseComponentValue() - -proc parseListOfComponentValues(state: var CSSParseState): seq[CSSComponentValue] = - while state.has(): - result.add(state.consumeComponentValue()) - -proc parseCSSListOfComponentValues*(inputStream: Stream): seq[CSSComponentValue] = - var state = CSSParseState() - state.tokens = tokenizeCSS(inputStream) - return state.parseListOfComponentValues() - -proc parseCommaSeparatedListOfComponentValues(state: var CSSParseState): seq[CSSComponentValue] = - while state.has(1): - let cvl = state.consumeComponentValue() - if cvl != CSS_COMMA_TOKEN: - result.add(state.consumeComponentValue()) - -proc parseCommaSeparatedListOfComponentValues(inputStream: Stream): seq[CSSComponentValue] = - var state = CSSParseState() - state.tokens = tokenizeCSS(inputStream) - return state.parseCommaSeparatedListOfComponentValues() - -proc printc*(c: CSSComponentValue) = - if c of CSSToken: - case CSSToken(c).tokenType: - of CSS_FUNCTION_TOKEN, CSS_AT_KEYWORD_TOKEN, CSS_URL_TOKEN: - eprint CSSToken(c).tokenType, CSSToken(c).value - of CSS_HASH_TOKEN: - stderr.write('#' & $CSSToken(c).value) - of CSS_IDENT_TOKEN: - stderr.write(CSSToken(c).value) - of CSS_STRING_TOKEN: - stderr.write("\"" & $CSSToken(c).value & "\"") - of CSS_DELIM_TOKEN: - stderr.write(CSSToken(c).rvalue) - of CSS_DIMENSION_TOKEN: - eprint CSSToken(c).tokenType, CSSToken(c).nvalue, "unit", CSSToken(c).unit, CSSToken(c).tflagb - of CSS_NUMBER_TOKEN: - stderr.write($CSSToken(c).nvalue & $CSSToken(c).unit) - of CSS_PERCENTAGE_TOKEN: - stderr.write($CSSToken(c).nvalue & "%") - of CSS_COLON_TOKEN: - stderr.write(":") - of CSS_WHITESPACE_TOKEN: - stderr.write(" ") - of CSS_SEMICOLON_TOKEN: - stderr.write(";\n") - of CSS_COMMA_TOKEN: - stderr.write(",") - else: - eprint CSSToken(c).tokenType - elif c of CSSDeclaration: - stderr.write(CSSDeclaration(c).name) - stderr.write(": ") - for s in CSSDeclaration(c).value: - printc(s) - stderr.write(";\n") - elif c of CSSFunction: - eprint "FUNCTION", CSSFunction(c).name - for s in CSSFunction(c).value: - printc(s) - elif c of CSSSimpleBlock: - case CSSSimpleBlock(c).token.tokenType - of CSS_LBRACE_TOKEN: eprint "{" - of CSS_LPAREN_TOKEN: stderr.write("(") - of CSS_LBRACKET_TOKEN: stderr.write("[") - else: discard - for s in CSSSimpleBlock(c).value: - printc(s) - case CSSSimpleBlock(c).token.tokenType - of CSS_LBRACE_TOKEN: eprint "}" - of CSS_LPAREN_TOKEN: stderr.write(")") - of CSS_LBRACKET_TOKEN: stderr.write("]") - else: discard - - -proc parseCSS*(inputStream: Stream) = - let ss = inputStream.parseStylesheet() - for v in ss.value: - if v of CSSAtRule: - eprint CSSAtRule(v).name - else: - for c in CSSQualifiedRule(v).prelude: - printc(c) - case v.oblock.token.tokenType - of CSS_LBRACE_TOKEN: eprint "\n{" - of CSS_LPAREN_TOKEN: eprint "(" - of CSS_LBRACKET_TOKEN: eprint "[" - else: discard - for s in v.oblock.value: - printc(s) - case v.oblock.token.tokenType - of CSS_LBRACE_TOKEN: eprint "\n}" - of CSS_LPAREN_TOKEN: eprint ")" - of CSS_LBRACKET_TOKEN: eprint "]" - else: discard diff --git a/src/display.nim b/src/display.nim deleted file mode 100644 index d7d69c4a..00000000 --- a/src/display.nim +++ /dev/null @@ -1,395 +0,0 @@ -import terminal -import options -import uri -import strutils -import unicode - -import buffer -import termattrs -import dom -import twtstr -import twtio -import config -import enums - -proc clearStatusMsg*(at: int) = - setCursorPos(0, at) - eraseLine() - -proc statusMsg*(str: string, at: int) = - clearStatusMsg(at) - print(str.ansiStyle(styleReverse).ansiReset()) - -type - RenderState = object - x: int - y: int - lastwidth: int - fmtline: string - rawline: string - centerqueue: seq[Node] - centerlen: int - blanklines: int - blankspaces: int - nextspaces: int - docenter: bool - indent: int - listval: int - -func newRenderState(): RenderState = - return RenderState(blanklines: 1) - -proc write(state: var RenderState, s: string) = - state.fmtline &= s - state.rawline &= s - -proc write(state: var RenderState, fs: string, rs: string) = - state.fmtline &= fs - state.rawline &= rs - -proc flushLine(buffer: Buffer, state: var RenderState) = - if state.rawline.len == 0: - inc state.blanklines - assert(state.rawline.runeLen() < buffer.width, "line too long:\n" & state.rawline) - buffer.writefmt(state.fmtline) - buffer.writeraw(state.rawline) - state.x = 0 - inc state.y - state.nextspaces = 0 - state.fmtline = "" - state.rawline = "" - -proc addSpaces(buffer: Buffer, state: var RenderState, n: int) = - if state.x + n > buffer.width: - buffer.flushLine(state) - return - state.blankspaces += n - state.write(' '.repeat(n)) - state.x += n - -proc writeWrappedText(buffer: Buffer, state: var RenderState, node: Node) = - state.lastwidth = 0 - var n = 0 - var fmtword = "" - var rawword = "" - var prevl = false - let fmttext = node.getFmtText() - for w in fmttext: - if w.len > 0 and w[0] == '\e': - fmtword &= w - continue - - for r in w.runes: - if r == Rune(' '): - if rawword.len > 0 and rawword[0] == ' ' and prevl: #first byte can't fool comparison to ascii - fmtword = fmtword.substr(1) - rawword = rawword.substr(1) - state.x -= 1 - prevl = false - state.write(fmtword, rawword) - fmtword = "" - rawword = "" - - if r == Rune('\n'): - state.write(fmtword, rawword) - buffer.flushLine(state) - rawword = "" - fmtword = "" - else: - fmtword &= r - rawword &= r - - state.x += r.width() - - if state.x >= buffer.width: - state.lastwidth = max(state.lastwidth, state.x) - buffer.flushLine(state) - state.x = rawword.width() - prevl = true - else: - state.lastwidth = max(state.lastwidth, state.x) - - inc n - - state.write(fmtword, rawword) - if prevl: - state.x += rawword.width() - prevl = false - - state.lastwidth = max(state.lastwidth, state.x) - -proc preAlignNode(buffer: Buffer, node: Node, state: var RenderState) = - let style = node.getStyle() - if state.rawline.len > 0 and node.firstNode() and state.blanklines == 0: - buffer.flushLine(state) - - if node.firstNode(): - while state.blanklines < max(style.margin, style.margintop): - buffer.flushLine(state) - state.indent += style.indent - - if state.rawline.len > 0 and state.blanklines == 0 and node.displayed(): - buffer.addSpaces(state, state.nextspaces) - state.nextspaces = 0 - if state.blankspaces < max(style.margin, style.marginleft): - buffer.addSpaces(state, max(style.margin, style.marginleft) - state.blankspaces) - - if style.centered and state.rawline.len == 0 and node.displayed(): - buffer.addSpaces(state, max(buffer.width div 2 - state.centerlen div 2, 0)) - state.centerlen = 0 - - if node.isElemNode() and style.display == DISPLAY_LIST_ITEM and state.indent > 0: - if state.blanklines == 0: - buffer.flushLine(state) - var listchar = "•" - #case elem.parentElement.tagType - #of TAG_UL: - # listchar = "•" - #of TAG_OL: - # inc state.listval - # listchar = $state.listval & ")" - #else: - # return - buffer.addSpaces(state, state.indent) - state.write(listchar) - state.x += listchar.runeLen() - buffer.addSpaces(state, 1) - -proc postAlignNode(buffer: Buffer, node: Node, state: var RenderState) = - let style = node.getStyle() - - if node.getRawLen() > 0: - state.blanklines = 0 - state.blankspaces = 0 - - if state.rawline.len > 0 and state.blanklines == 0: - state.nextspaces += max(style.margin, style.marginright) - #if node.lastNode() and (node.isTextNode() or elem.childNodes.len == 0): - # buffer.flushLine(state) - - if node.lastNode(): - while state.blanklines < max(style.margin, style.marginbottom): - buffer.flushLine(state) - state.indent -= style.indent - - if style.display == DISPLAY_LIST_ITEM and node.lastNode(): - buffer.flushLine(state) - -proc renderNode(buffer: Buffer, node: Node, state: var RenderState) = - if not (node.nodeType in {ELEMENT_NODE, TEXT_NODE}): - return - let style = node.getStyle() - if node.nodeType == ELEMENT_NODE: - if Element(node).tagType in {TAG_SCRIPT, TAG_STYLE, TAG_NOSCRIPT, TAG_TITLE}: - return - if style.hidden: return - - if not state.docenter: - if style.centered: - state.centerqueue.add(node) - if node.lastNode(): - state.docenter = true - state.centerlen = 0 - for node in state.centerqueue: - state.centerlen += node.getRawLen() - for node in state.centerqueue: - buffer.renderNode(node, state) - state.centerqueue.setLen(0) - state.docenter = false - return - else: - return - if state.centerqueue.len > 0: - state.docenter = true - state.centerlen = 0 - for node in state.centerqueue: - state.centerlen += node.getRawLen() - for node in state.centerqueue: - buffer.renderNode(node, state) - state.centerqueue.setLen(0) - state.docenter = false - - buffer.preAlignNode(node, state) - - node.x = state.x - node.y = state.y - buffer.writeWrappedText(state, node) - node.ex = state.x - node.ey = state.y - node.width = state.lastwidth - node.x - 1 - node.height = state.y - node.y + 1 - - buffer.postAlignNode(node, state) - -proc setLastHtmlLine(buffer: Buffer, state: var RenderState) = - if state.rawline.len != 0: - buffer.flushLine(state) - -proc renderHtml*(buffer: Buffer) = - var stack: seq[Node] - let first = buffer.document - stack.add(first) - - var state = newRenderState() - while stack.len > 0: - let currElem = stack.pop() - buffer.addNode(currElem) - buffer.renderNode(currElem, state) - var i = currElem.childNodes.len - 1 - while i >= 0: - stack.add(currElem.childNodes[i]) - i -= 1 - - buffer.setLastHtmlLine(state) - -proc drawHtml(buffer: Buffer) = - var state = newRenderState() - for node in buffer.nodes: - buffer.renderNode(node, state) - buffer.setLastHtmlLine(state) - -proc statusMsgForBuffer(buffer: Buffer) = - var msg = $(buffer.cursory + 1) & "/" & $(buffer.lastLine() + 1) & " (" & - $buffer.atPercentOf() & "%) " & - "<" & buffer.title & ">" - if buffer.hovertext.len > 0: - msg &= " " & buffer.hovertext - statusMsg(msg.maxString(buffer.width), buffer.height) - -proc cursorBufferPos(buffer: Buffer) = - var x = buffer.cursorx - var y = buffer.cursory - 1 - buffer.fromY - termGoto(x, y + 1) - -proc displayBuffer(buffer: Buffer) = - eraseScreen() - termGoto(0, 0) - - print(buffer.visibleText().ansiReset()) - -proc inputLoop(attrs: TermAttributes, buffer: Buffer): bool = - var s = "" - var feedNext = false - while true: - stdout.showCursor() - buffer.cursorBufferPos() - if not feedNext: - s = "" - else: - feedNext = false - let c = getch() - s &= c - let action = getNormalAction(s) - var redraw = false - var reshape = false - var nostatus = false - case action - of ACTION_QUIT: - eraseScreen() - return false - of ACTION_CURSOR_LEFT: redraw = buffer.cursorLeft() - of ACTION_CURSOR_DOWN: redraw = buffer.cursorDown() - of ACTION_CURSOR_UP: redraw = buffer.cursorUp() - of ACTION_CURSOR_RIGHT: redraw = buffer.cursorRight() - of ACTION_CURSOR_LINEBEGIN: buffer.cursorLineBegin() - of ACTION_CURSOR_LINEEND: buffer.cursorLineEnd() - of ACTION_CURSOR_NEXT_WORD: redraw = buffer.cursorNextWord() - of ACTION_CURSOR_PREV_WORD: redraw = buffer.cursorPrevWord() - of ACTION_CURSOR_NEXT_LINK: redraw = buffer.cursorNextLink() - of ACTION_CURSOR_PREV_LINK: redraw = buffer.cursorPrevLink() - of ACTION_PAGE_DOWN: redraw = buffer.pageDown() - of ACTION_PAGE_UP: redraw = buffer.pageUp() - of ACTION_HALF_PAGE_DOWN: redraw = buffer.halfPageDown() - of ACTION_HALF_PAGE_UP: redraw = buffer.halfPageUp() - of ACTION_CURSOR_FIRST_LINE: redraw = buffer.cursorFirstLine() - of ACTION_CURSOR_LAST_LINE: redraw = buffer.cursorLastLine() - of ACTION_CURSOR_TOP: redraw = buffer.cursorTop() - of ACTION_CURSOR_MIDDLE: redraw = buffer.cursorMiddle() - of ACTION_CURSOR_BOTTOM: redraw = buffer.cursorBottom() - of ACTION_CENTER_LINE: redraw = buffer.centerLine() - of ACTION_SCROLL_DOWN: redraw = buffer.scrollDown() - of ACTION_SCROLL_UP: redraw = buffer.scrollUp() - of ACTION_CLICK: - let selectedElem = buffer.findSelectedElement() - if selectedElem.isSome: - case selectedElem.get().tagType - of TAG_INPUT: - clearStatusMsg(buffer.height) - let status = readLine("TEXT: ", HtmlInputElement(selectedElem.get()).value, buffer.width) - if status: - reshape = true - redraw = true - else: discard - if selectedElem.get().getStyle().islink: - let anchor = HtmlAnchorElement(buffer.selectedlink.ancestor(TAG_A)).href - buffer.gotoLocation(parseUri(anchor)) - return true - of ACTION_CHANGE_LOCATION: - var url = $buffer.document.location - - clearStatusMsg(buffer.height) - let status = readLine("URL: ", url, buffer.width) - if status: - buffer.setLocation(parseUri(url)) - return true - of ACTION_LINE_INFO: - statusMsg("line " & $buffer.cursory & "/" & $buffer.lastLine() & " col " & $(buffer.cursorx + 1) & "/" & $buffer.currentLineLength(), buffer.width) - nostatus = true - of ACTION_FEED_NEXT: - feedNext = true - of ACTION_RELOAD: return true - of ACTION_RESHAPE: - reshape = true - redraw = true - of ACTION_REDRAW: redraw = true - else: discard - stdout.hideCursor() - - let prevlink = buffer.selectedlink - let sel = buffer.checkLinkSelection() - if sel: - buffer.clearText() - buffer.drawHtml() - termGoto(0, buffer.selectedlink.y - buffer.fromy) - stdout.eraseLine() - for i in buffer.selectedlink.y..buffer.selectedlink.ey: - if i < buffer.fromy + buffer.height - 1: - let line = buffer.fmttext[i] - print(line) - print('\n') - print("".ansiReset()) - - if prevlink != nil: - buffer.clearText() - buffer.drawHtml() - termGoto(0, prevlink.y - buffer.fromy) - for i in prevlink.y..prevlink.ey: - if i < buffer.fromy + buffer.height - 1: - let line = buffer.fmttext[i] - stdout.eraseLine() - print(line) - print('\n') - print("".ansiReset()) - - if buffer.refreshTermAttrs(): - redraw = true - reshape = true - - if reshape: - buffer.clearText() - buffer.drawHtml() - if redraw: - buffer.displayBuffer() - - if not nostatus: - buffer.statusMsgForBuffer() - else: - nostatus = false - -proc displayPage*(attrs: TermAttributes, buffer: Buffer): bool = - #buffer.printwrite = true - discard buffer.gotoAnchor() - buffer.displayBuffer() - buffer.statusMsgForBuffer() - return inputLoop(attrs, buffer) - diff --git a/src/dom.nim b/src/dom.nim deleted file mode 100644 index c9dc367e..00000000 --- a/src/dom.nim +++ /dev/null @@ -1,358 +0,0 @@ -import terminal -import uri -import unicode -import strutils -import tables - -import twtstr -import twtio -import enums -import style - -type - EventTarget* = ref EventTargetObj - EventTargetObj = object of RootObj - - Node* = ref NodeObj - NodeObj = object of EventTargetObj - nodeType*: NodeType - childNodes*: seq[Node] - firstChild*: Node - isConnected*: bool - lastChild*: Node - nextSibling*: Node - previousSibling*: Node - parentNode*: Node - parentElement*: Element - ownerDocument*: Document - - rawtext*: string - fmttext*: seq[string] - x*: int - y*: int - ex*: int - ey*: int - width*: int - height*: int - hidden*: bool - - Attr* = ref AttrObj - AttrObj = object of NodeObj - namespaceURI*: string - prefix*: string - localName*: string - name*: string - value*: string - ownerElement*: Element - - Document* = ref DocumentObj - DocumentObj = object of NodeObj - location*: Uri - id_elements*: Table[string, Element] - class_elements*: Table[string, seq[Element]] - - CharacterData* = ref CharacterDataObj - CharacterDataObj = object of NodeObj - data*: string - length*: int - - Text* = ref TextObj - TextObj = object of CharacterDataObj - wholeText*: string - - Comment* = ref CommentObj - CommentObj = object of CharacterDataObj - - Element* = ref ElementObj - ElementObj = object of NodeObj - namespaceURI*: string - prefix*: string - localName*: string - tagName*: string - tagType*: TagType - - id*: string - classList*: seq[string] - attributes*: Table[string, Attr] - style*: CSS2Properties - - HTMLElement* = ref HTMLElementObj - HTMLElementObj = object of ElementObj - - HTMLInputElement* = ref HTMLInputElementObj - HTMLInputElementObj = object of HTMLElementObj - itype*: InputType - autofocus*: bool - required*: bool - value*: string - size*: int - - HTMLAnchorElement* = ref HTMLAnchorElementObj - HTMLAnchorElementObj = object of HTMLElementObj - href*: string - - HTMLSelectElement* = ref HTMLSelectElementObj - HTMLSelectElementObj = object of HTMLElementObj - name*: string - value*: string - valueSet*: bool - - HTMLOptionElement* = ref HTMLOptionElementObj - HTMLOptionElementObj = object of HTMLElementObj - value*: string - - HTMLHeadingElement* = ref HTMLHeadingElementObj - HTMLHeadingElementObj = object of HTMLElementObj - rank*: uint16 - - HTMLBRElement* = ref HTMLBRElementObj - HTMLBRElementObj = object of HTMLElementObj - - -func getTagTypeMap(): Table[string, TagType] = - for i in low(TagType) .. high(TagType): - let enumname = $TagType(i) - let tagname = enumname.split('_')[1..^1].join("_").tolower() - result[tagname] = TagType(i) - -func getInputTypeMap(): Table[string, InputType] = - for i in low(InputType) .. high(InputType): - let enumname = $InputType(i) - let tagname = enumname.split('_')[1..^1].join("_").tolower() - result[tagname] = InputType(i) - -const tagTypeMap = getTagTypeMap() -const inputTypeMap = getInputTypeMap() - -func tagType*(s: string): TagType = - if tagTypeMap.hasKey(s): - return tagTypeMap[s] - else: - return TAG_UNKNOWN - -func inputType*(s: string): InputType = - if inputTypeMap.hasKey(s): - return inputTypeMap[s] - else: - return INPUT_UNKNOWN - -#TODO -func nodeAttr*(node: Node): HtmlElement = - case node.nodeType - of TEXT_NODE: return HtmlElement(node.parentElement) - of ELEMENT_NODE: return HtmlElement(node) - else: assert(false) - -func getStyle*(node: Node): CSS2Properties = - case node.nodeType - of TEXT_NODE: return node.parentElement.style - of ELEMENT_NODE: return Element(node).style - else: assert(false) - -func displayed*(node: Node): bool = - return node.rawtext.len > 0 and node.getStyle().display != DISPLAY_NONE - -func isTextNode*(node: Node): bool = - return node.nodeType == TEXT_NODE - -func isElemNode*(node: Node): bool = - return node.nodeType == ELEMENT_NODE - -func isComment*(node: Node): bool = - return node.nodeType == COMMENT_NODE - -func isCData*(node: Node): bool = - return node.nodeType == CDATA_SECTION_NODE - -func isDocument*(node: Node): bool = - return node.nodeType == DOCUMENT_NODE - -func getFmtLen*(htmlNode: Node): int = - return htmlNode.fmttext.join().runeLen() - -func getRawLen*(htmlNode: Node): int = - return htmlNode.rawtext.runeLen() - -func firstNode*(htmlNode: Node): bool = - return htmlNode.parentElement != nil and htmlNode.parentElement.childNodes[0] == htmlNode - -func lastNode*(htmlNode: Node): bool = - return htmlNode.parentElement != nil and htmlNode.parentElement.childNodes[^1] == htmlNode - -func toInputType*(str: string): InputType = - case str - of "button": INPUT_BUTTON - of "checkbox": INPUT_CHECKBOX - of "color": INPUT_COLOR - of "date": INPUT_DATE - of "datetime_local": INPUT_DATETIME_LOCAL - of "email": INPUT_EMAIL - of "file": INPUT_FILE - of "hidden": INPUT_HIDDEN - of "image": INPUT_IMAGE - of "month": INPUT_MONTH - of "number": INPUT_NUMBER - of "password": INPUT_PASSWORD - of "radio": INPUT_RADIO - of "range": INPUT_RANGE - of "reset": INPUT_RESET - of "search": INPUT_SEARCH - of "submit": INPUT_SUBMIT - of "tel": INPUT_TEL - of "text": INPUT_TEXT - of "time": INPUT_TIME - of "url": INPUT_URL - of "week": INPUT_WEEK - else: INPUT_UNKNOWN - -func toInputSize*(str: string): int = - if str.len == 0: - return 20 - for c in str: - if not c.isDigit(): - return 20 - return str.parseInt() - -func getFmtInput(inputElement: HtmlInputElement): seq[string] = - case inputElement.itype - of INPUT_TEXT, INPUT_SEARCH: - let valueFit = fitValueToSize(inputElement.value, inputElement.size) - return valueFit.ansiStyle(styleUnderscore).ansiReset().buttonFmt() - of INPUT_SUBMIT: - return inputElement.value.buttonFmt() - else: discard - -func getRawInput(inputElement: HtmlInputElement): string = - case inputElement.itype - of INPUT_TEXT, INPUT_SEARCH: - return inputElement.value.fitValueToSize(inputElement.size).buttonRaw() - of INPUT_SUBMIT: - return inputElement.value.buttonRaw() - else: discard - -#TODO -func ancestor*(htmlNode: Node, tagType: TagType): HtmlElement = - result = HtmlElement(htmlNode.parentElement) - while result != nil and result.tagType != tagType: - result = HtmlElement(result.parentElement) - -proc getRawText*(htmlNode: Node): string = - if htmlNode.isElemNode(): - case HtmlElement(htmlNode).tagType - of TAG_INPUT: return HtmlInputElement(htmlNode).getRawInput() - else: return "" - elif htmlNode.isTextNode(): - let chardata = CharacterData(htmlNode) - if htmlNode.parentElement != nil and htmlNode.parentElement.tagType != TAG_PRE: - result = chardata.data.remove("\n") - if unicode.strip(result).runeLen() > 0: - if htmlNode.getStyle().display != DISPLAY_INLINE: - result = unicode.strip(result) - else: - result = "" - else: - result = unicode.strip(chardata.data) - if htmlNode.parentElement != nil and htmlNode.parentElement.tagType == TAG_OPTION: - result = result.buttonRaw() - else: - assert(false) - -func getFmtText*(htmlNode: Node): seq[string] = - if htmlNode.isElemNode(): - case HtmlElement(htmlNode).tagType - of TAG_INPUT: return HtmlInputElement(htmlNode).getFmtInput() - else: return @[] - elif htmlNode.isTextNode(): - let chardata = CharacterData(htmlNode) - result &= chardata.data - if htmlNode.parentElement != nil: - if htmlNode.parentElement.style.islink: - result = result.ansiFgColor(fgBlue).ansiReset() - let anchor = htmlNode.ancestor(TAG_A) - if anchor != nil and anchor.style.selected: - result = result.ansiStyle(styleUnderscore).ansiReset() - - if htmlNode.parentElement.tagType == TAG_OPTION: - result = result.ansiFgColor(fgRed).ansiReset() - - if htmlNode.parentElement.style.bold: - result = result.ansiStyle(styleBright).ansiReset() - if htmlNode.parentElement.style.italic: - result = result.ansiStyle(styleItalic).ansiReset() - if htmlNode.parentElement.style.underscore: - result = result.ansiStyle(styleUnderscore).ansiReset() - else: - assert(false, "Uhhhh I'm pretty sure we should have parent elements for text nodes?" & htmlNode.rawtext) - else: - assert(false) - -func newDocument*(): Document = - new(result) - result.nodeType = DOCUMENT_NODE - -func newText*(): Text = - new(result) - result.nodeType = TEXT_NODE - -func newComment*(): Comment = - new(result) - result.nodeType = COMMENT_NODE - -func newHtmlElement*(tagType: TagType): HTMLElement = - case tagType - of TAG_INPUT: - result = new(HTMLInputElement) - of TAG_A: - result = new(HTMLAnchorElement) - of TAG_SELECT: - result = new(HTMLSelectElement) - of TAG_OPTION: - result = new(HTMLOptionElement) - of TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6: - result = new(HTMLHeadingElement) - of TAG_BR: - result = new(HTMLBRElement) - else: - new(result) - - result.nodeType = ELEMENT_NODE - result.tagType = tagType - result.style = new(CSS2Properties) - -func newAttr*(parent: Element, key: string, value: string): Attr = - new(result) - result.nodeType = ATTRIBUTE_NODE - result.ownerElement = parent - result.name = key - result.value = value - -func getAttrValue*(element: Element, s: string): string = - let attr = element.attributes.getOrDefault(s, nil) - if attr != nil: - return attr.value - return "" - - -#type -# SelectorType = enum -# TYPE_SELECTOR, ID_SELECTOR, ATTR_SELECTOR, CLASS_SELECTOR, CHILD_SELECTOR, -# UNIVERSAL_SELECTOR -# -# Selector = object -# t: SelectorType -# s0: string -# s1: string -# -#proc querySelector*(document: Document, q: string): seq[Element] = -# #let ss = newStringStream(q) -# #let cvals = parseCSSListOfComponentValues(ss) -# #var selectors: seq[Selector] -# return -# -# #for cval in cvals: -# # if cval of CSSToken: -# # case CSSToken(cval).tokenType -# # of CSS_DELIM_TOKEN: -# # if cval.rvalue == Rune('*'): -# # selectors.add(Selector(t)) -# # printc(cval) diff --git a/src/entity.nim b/src/entity.nim deleted file mode 100644 index dcac258e..00000000 --- a/src/entity.nim +++ /dev/null @@ -1,29 +0,0 @@ -import radixtree -import json - -when defined(small): - proc genEntityMap(data: seq[tuple[a: string, b: string]]): StaticRadixTree[string] = - result = newStaticRadixTree[string]() - for pair in data: - result[pair.a] = pair.b - - proc genEntityHashMap(): seq[tuple[a: string, b: string]] = - let entity = staticRead"../res/entity.json" - let entityJson = parseJson(entity) - - for k, v in entityJson: - result.add((k.substr(1), v{"characters"}.getStr())) - const entityHashMap = genEntityHashMap() - let entityMap* = genEntityMap(entityHashMap) #TODO: use refs here -else: - import tables - proc genEntityMap(): StaticRadixTree[string] = - let entity = staticRead"../res/entity.json" - let entityJson = parseJson(entity) - var entityMap = newStaticRadixTree[string]() - - for k, v in entityJson: - entityMap[k.substr(1)] = v{"characters"}.getStr() - - return entityMap - const entityMap* = genEntityMap() diff --git a/src/enums.nim b/src/enums.nim deleted file mode 100644 index 62e96e4c..00000000 --- a/src/enums.nim +++ /dev/null @@ -1,96 +0,0 @@ -type - NodeType* = - enum - UNKNOWN_NODE = 0, - ELEMENT_NODE = 1, - ATTRIBUTE_NODE = 2, - TEXT_NODE = 3, - CDATA_SECTION_NODE = 4, - ENTITY_REFERENCE_NODE = 5, - ENTITY_NODE = 6 - PROCESSING_INSTRUCTION_NODE = 7, - COMMENT_NODE = 8, - DOCUMENT_NODE = 9, - DOCUMENT_TYPE_NODE = 10, - DOCUMENT_FRAGMENT_NODE = 11, - NOTATION_NODE = 12 - - DisplayType* = - enum - DISPLAY_INLINE, DISPLAY_BLOCK, DISPLAY_LIST_ITEM, DISPLAY_TABLE_COLUMN, - DISPLAY_INLINE_BLOCK, DISPLAY_NONE - - InputType* = - enum - INPUT_UNKNOWN, INPUT_BUTTON, INPUT_CHECKBOX, INPUT_COLOR, INPUT_DATE, - INPUT_DATETIME_LOCAL, INPUT_EMAIL, INPUT_FILE, INPUT_HIDDEN, INPUT_IMAGE, - INPUT_MONTH, INPUT_NUMBER, INPUT_PASSWORD, INPUT_RADIO, INPUT_RANGE, - INPUT_RESET, INPUT_SEARCH, INPUT_SUBMIT, INPUT_TEL, INPUT_TEXT, INPUT_TIME, - INPUT_URL, INPUT_WEEK - - WhitespaceType* = - enum - WHITESPACE_UNKNOWN, WHITESPACE_NORMAL, WHITESPACE_NOWRAP, WHITESPACE_PRE, - WHITESPACE_PRE_LINE, WHITESPACE_PRE_WRAP, WHITESPACE_INITIAL, - WHITESPACE_INHERIT - - TagType* = - enum - TAG_UNKNOWN, TAG_HTML, TAG_BASE, TAG_HEAD, TAG_LINK, TAG_META, TAG_STYLE, - TAG_TITLE, TAG_BODY, TAG_ADDRESS, TAG_ARTICLE, TAG_ASIDE, TAG_FOOTER, - TAG_HEADER, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, TAG_HGROUP, - TAG_MAIN, TAG_NAV, TAG_SECTION, TAG_BLOCKQUOTE, TAG_DD, TAG_DIV, TAG_DL, - TAG_DT, TAG_FIGCAPTION, TAG_FIGURE, TAG_HR, TAG_LI, TAG_OL, TAG_P, TAG_PRE, - TAG_UL, TAG_A, TAG_ABBR, TAG_B, TAG_BDI, TAG_BDO, TAG_BR, TAG_CITE, - TAG_CODE, TAG_DATA, TAG_DFN, TAG_EM, TAG_I, TAG_KBD, TAG_MARK, TAG_Q, - TAG_RB, TAG_RP, TAG_RT, TAG_RTC, TAG_RUBY, TAG_S, TAG_SAMP, TAG_SMALL, - TAG_SPAN, TAG_STRONG, TAG_SUB, TAG_SUP, TAG_TIME, TAG_U, TAG_VAR, TAG_WBR, - TAG_AREA, TAG_AUDIO, TAG_IMG, TAG_MAP, TAG_TRACK, TAG_VIDEO, - TAG_IFRAME, TAG_OBJECT, TAG_PARAM, TAG_PICTURE, TAG_PORTAL, TAG_SOURCE, - TAG_CANVAS, TAG_NOSCRIPT, TAG_SCRIPT, TAG_DEL, TAG_INS, TAG_CAPTION, - TAG_COL, TAG_COLGROUP, TAG_TABLE, TAG_TBODY, TAG_TD, TAG_TFOOT, TAG_TH, - TAG_THEAD, TAG_TR, TAG_BUTTON, TAG_DATALIST, TAG_FIELDSET, TAG_FORM, - TAG_INPUT, TAG_LABEL, TAG_LEGEND, TAG_METER, TAG_OPTGROUP, TAG_OPTION, - TAG_OUTPUT, TAG_PROGRESS, TAG_SELECT, TAG_TEXTAREA, TAG_DETAILS, - TAG_DIALOG, TAG_MENU, TAG_SUMMARY, TAG_BLINK, TAG_CENTER, TAG_CONTENT, - TAG_DIR, TAG_FONT, TAG_FRAME, TAG_NOFRAMES, TAG_FRAMESET, TAG_STRIKE, TAG_TT - - CSSTokenType* = - enum - CSS_NO_TOKEN, CSS_IDENT_TOKEN, CSS_FUNCTION_TOKEN, CSS_AT_KEYWORD_TOKEN, - CSS_HASH_TOKEN, CSS_STRING_TOKEN, CSS_BAD_STRING_TOKEN, CSS_URL_TOKEN, - CSS_BAD_URL_TOKEN, CSS_DELIM_TOKEN, CSS_NUMBER_TOKEN, CSS_PERCENTAGE_TOKEN, - CSS_DIMENSION_TOKEN, CSS_WHITESPACE_TOKEN, CSS_CDO_TOKEN, CSS_CDC_TOKEN, - CSS_COLON_TOKEN, CSS_SEMICOLON_TOKEN, CSS_COMMA_TOKEN, CSS_RBRACKET_TOKEN, - CSS_LBRACKET_TOKEN, CSS_LPAREN_TOKEN, CSS_RPAREN_TOKEN, CSS_LBRACE_TOKEN, - CSS_RBRACE_TOKEN - -const DisplayInlineTags* = { - TAG_A, TAG_ABBR, TAG_B, TAG_BDO, TAG_BR, TAG_BUTTON, TAG_CITE, TAG_CODE, - TAG_DEL, TAG_DFN, TAG_EM, TAG_FONT, TAG_I, TAG_IMG, TAG_INS, TAG_INPUT, - TAG_IFRAME, TAG_KBD, TAG_LABEL, TAG_MAP, TAG_OBJECT, TAG_Q, TAG_SAMP, - TAG_SCRIPT, TAG_SELECT, TAG_SMALL, TAG_SPAN, TAG_STRONG, TAG_SUB, TAG_SUP, - TAG_TEXTAREA, TAG_TT, TAG_VAR, TAG_FONT, TAG_IFRAME, TAG_U, TAG_S, TAG_STRIKE, - TAG_FRAME, TAG_IMG, TAG_INPUT -} - -const DisplayNoneTags* = { - TAG_AREA, TAG_BASE, TAG_SOURCE, TAG_TRACK, TAG_LINK, TAG_META, TAG_PARAM, TAG_WBR -} - -const DisplayInlineBlockTags* = { - TAG_IMG -} - -const DisplayTableColumnTags* = { - TAG_COL -} - -const SelfClosingTagTypes* = { - TAG_LI, TAG_P -} - -const VoidTagTypes* = { - TAG_AREA, TAG_BASE, TAG_BR, TAG_COL, TAG_FRAME, TAG_HR, TAG_IMG, TAG_INPUT, - TAG_SOURCE, TAG_TRACK, TAG_LINK, TAG_META, TAG_PARAM, TAG_WBR, TAG_HR -} diff --git a/src/html/dom.nim b/src/html/dom.nim new file mode 100644 index 00000000..74ebe5ea --- /dev/null +++ b/src/html/dom.nim @@ -0,0 +1,492 @@ +import terminal +import uri +import unicode +import strutils +import tables +import streams +import sequtils +import sugar + +import ../css/style +import ../css/cssparser +import ../css/selector + +import ../types/enums +import ../types/tagtypes + +import ../utils/twtstr + +import ../io/twtio + +const css = staticRead"../../res/default.css" +let stylesheet = parseCSS(newStringStream(css)) + +type + EventTarget* = ref EventTargetObj + EventTargetObj = object of RootObj + + Node* = ref NodeObj + NodeObj = object of EventTargetObj + nodeType*: NodeType + childNodes*: seq[Node] + children*: seq[Element] + isConnected*: bool + nextSibling*: Node + previousSibling*: Node + parentNode*: Node + parentElement*: Element + ownerDocument*: Document + + rawtext*: string + fmttext*: seq[string] + x*: int + y*: int + ex*: int + ey*: int + width*: int + height*: int + hidden*: bool + + Attr* = ref AttrObj + AttrObj = object of NodeObj + namespaceURI*: string + prefix*: string + localName*: string + name*: string + value*: string + ownerElement*: Element + + Document* = ref DocumentObj + DocumentObj = object of NodeObj + location*: Uri + type_elements*: array[low(TagType)..high(TagType), seq[Element]] + id_elements*: Table[string, seq[Element]] + class_elements*: Table[string, seq[Element]] + all_elements*: seq[Element] + head*: HTMLElement + body*: HTMLElement + + CharacterData* = ref CharacterDataObj + CharacterDataObj = object of NodeObj + data*: string + length*: int + + Text* = ref TextObj + TextObj = object of CharacterDataObj + wholeText*: string + + Comment* = ref CommentObj + CommentObj = object of CharacterDataObj + + Element* = ref ElementObj + ElementObj = object of NodeObj + namespaceURI*: string + prefix*: string + localName*: string + tagName*: string + tagType*: TagType + + id*: string + classList*: seq[string] + attributes*: Table[string, Attr] + box*: CSSBox + + HTMLElement* = ref HTMLElementObj + HTMLElementObj = object of ElementObj + + HTMLInputElement* = ref HTMLInputElementObj + HTMLInputElementObj = object of HTMLElementObj + itype*: InputType + autofocus*: bool + required*: bool + value*: string + size*: int + + HTMLAnchorElement* = ref HTMLAnchorElementObj + HTMLAnchorElementObj = object of HTMLElementObj + href*: string + + HTMLSelectElement* = ref HTMLSelectElementObj + HTMLSelectElementObj = object of HTMLElementObj + name*: string + value*: string + valueSet*: bool + + HTMLSpanElement* = ref HTMLSpanElementObj + HTMLSpanElementObj = object of HTMLElementObj + + HTMLOptionElement* = ref HTMLOptionElementObj + HTMLOptionElementObj = object of HTMLElementObj + value*: string + + HTMLHeadingElement* = ref HTMLHeadingElementObj + HTMLHeadingElementObj = object of HTMLElementObj + rank*: uint16 + + HTMLBRElement* = ref HTMLBRElementObj + HTMLBRElementObj = object of HTMLElementObj + + +func firstChild(node: Node): Node = + if node.childNodes.len == 0: + return nil + return node.childNodes[0] + +func lastChild(node: Node): Node = + if node.childNodes.len == 0: + return nil + return node.childNodes[^1] + +func firstElementChild(node: Node): Element = + if node.children.len == 0: + return nil + return node.children[0] + +func lastElementChild(node: Node): Element = + if node.children.len == 0: + return nil + return node.children[^1] + +func `$`*(element: Element): string = + return "Element of " & $element.tagType + +#TODO +func nodeAttr*(node: Node): HtmlElement = + case node.nodeType + of TEXT_NODE: return HtmlElement(node.parentElement) + of ELEMENT_NODE: return HtmlElement(node) + else: assert(false) + +func getStyle*(node: Node): CSS2Properties = + case node.nodeType + of TEXT_NODE: return node.parentElement.box.props + of ELEMENT_NODE: return Element(node).box.props + else: assert(false) + +func displayed*(node: Node): bool = + return node.rawtext.len > 0 and node.getStyle().display != DISPLAY_NONE + +func isTextNode*(node: Node): bool = + return node.nodeType == TEXT_NODE + +func isElemNode*(node: Node): bool = + return node.nodeType == ELEMENT_NODE + +func isComment*(node: Node): bool = + return node.nodeType == COMMENT_NODE + +func isCData*(node: Node): bool = + return node.nodeType == CDATA_SECTION_NODE + +func isDocument*(node: Node): bool = + return node.nodeType == DOCUMENT_NODE + +func getFmtLen*(htmlNode: Node): int = + return htmlNode.fmttext.join().runeLen() + +func getRawLen*(htmlNode: Node): int = + return htmlNode.rawtext.runeLen() + +func firstNode*(htmlNode: Node): bool = + return htmlNode.parentElement != nil and htmlNode.parentElement.childNodes[0] == htmlNode + +func lastNode*(htmlNode: Node): bool = + return htmlNode.parentElement != nil and htmlNode.parentElement.childNodes[^1] == htmlNode + +func toInputType*(str: string): InputType = + case str + of "button": INPUT_BUTTON + of "checkbox": INPUT_CHECKBOX + of "color": INPUT_COLOR + of "date": INPUT_DATE + of "datetime_local": INPUT_DATETIME_LOCAL + of "email": INPUT_EMAIL + of "file": INPUT_FILE + of "hidden": INPUT_HIDDEN + of "image": INPUT_IMAGE + of "month": INPUT_MONTH + of "number": INPUT_NUMBER + of "password": INPUT_PASSWORD + of "radio": INPUT_RADIO + of "range": INPUT_RANGE + of "reset": INPUT_RESET + of "search": INPUT_SEARCH + of "submit": INPUT_SUBMIT + of "tel": INPUT_TEL + of "text": INPUT_TEXT + of "time": INPUT_TIME + of "url": INPUT_URL + of "week": INPUT_WEEK + else: INPUT_UNKNOWN + +func toInputSize*(str: string): int = + if str.len == 0: + return 20 + for c in str: + if not c.isDigit(): + return 20 + return str.parseInt() + +func getFmtInput(inputElement: HtmlInputElement): seq[string] = + case inputElement.itype + of INPUT_TEXT, INPUT_SEARCH: + let valueFit = fitValueToSize(inputElement.value, inputElement.size) + return valueFit.ansiStyle(styleUnderscore).ansiReset().buttonFmt() + of INPUT_SUBMIT: + return inputElement.value.buttonFmt() + else: discard + +func getRawInput(inputElement: HtmlInputElement): string = + case inputElement.itype + of INPUT_TEXT, INPUT_SEARCH: + return inputElement.value.fitValueToSize(inputElement.size).buttonRaw() + of INPUT_SUBMIT: + return inputElement.value.buttonRaw() + else: discard + +#TODO +func ancestor*(htmlNode: Node, tagType: TagType): HtmlElement = + result = HtmlElement(htmlNode.parentElement) + while result != nil and result.tagType != tagType: + result = HtmlElement(result.parentElement) + +proc getRawText*(htmlNode: Node): string = + if htmlNode.isElemNode(): + case HtmlElement(htmlNode).tagType + of TAG_INPUT: return HtmlInputElement(htmlNode).getRawInput() + else: return "" + elif htmlNode.isTextNode(): + let chardata = CharacterData(htmlNode) + #eprint "char data", chardata.data + if htmlNode.parentElement != nil and htmlNode.parentElement.tagType != TAG_PRE: + result = chardata.data.remove("\n") + if unicode.strip(result).runeLen() > 0: + if htmlNode.getStyle().display != DISPLAY_INLINE: + result = unicode.strip(result) + else: + result = "" + else: + result = unicode.strip(chardata.data) + if htmlNode.parentElement != nil and htmlNode.parentElement.tagType == TAG_OPTION: + result = result.buttonRaw() + else: + assert(false) + +func getFmtText*(node: Node): seq[string] = + if node.isElemNode(): + case HtmlElement(node).tagType + of TAG_INPUT: return HtmlInputElement(node).getFmtInput() + else: return @[] + elif node.isTextNode(): + let chardata = CharacterData(node) + result &= chardata.data + if node.parentElement != nil: + let style = node.getStyle() + if style.hasColor(): + result = result.ansiFgColor(style.termColor()) + + if node.parentElement.tagType == TAG_OPTION: + result = result.ansiFgColor(fgRed).ansiReset() + + if style.bold: + result = result.ansiStyle(styleBright).ansiReset() + if style.italic: + result = result.ansiStyle(styleItalic).ansiReset() + if style.underscore: + result = result.ansiStyle(styleUnderscore).ansiReset() + else: + assert(false, node.rawtext) + else: + assert(false) + +func newText*(): Text = + new(result) + result.nodeType = TEXT_NODE + +func newComment*(): Comment = + new(result) + result.nodeType = COMMENT_NODE + +func newBox*(element: HTMLElement): CSSBox = + new(result) + result.props = CSS2Properties() + +func newHtmlElement*(tagType: TagType): HTMLElement = + case tagType + of TAG_INPUT: + result = new(HTMLInputElement) + of TAG_A: + result = new(HTMLAnchorElement) + of TAG_SELECT: + result = new(HTMLSelectElement) + of TAG_OPTION: + result = new(HTMLOptionElement) + of TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6: + result = new(HTMLHeadingElement) + of TAG_BR: + result = new(HTMLBRElement) + of TAG_SPAN: + result = new(HTMLSpanElement) + else: + new(result) + + result.nodeType = ELEMENT_NODE + result.tagType = tagType + result.box = result.newBox() + +func newDocument*(): Document = + new(result) + result.head = newHtmlElement(TAG_HEAD) + result.body = newHtmlElement(TAG_BODY) + result.nodeType = DOCUMENT_NODE + +func newAttr*(parent: Element, key: string, value: string): Attr = + new(result) + result.nodeType = ATTRIBUTE_NODE + result.ownerElement = parent + result.name = key + result.value = value + +func getAttrValue*(element: Element, s: string): string = + let attr = element.attributes.getOrDefault(s, nil) + if attr != nil: + return attr.value + return "" + +#TODO case sensitivity +func attrSelectorMatches(elem: Element, sel: Selector): bool = + case sel.rel + of ' ': return sel.attr in elem.attributes + of '=': return elem.getAttrValue(sel.attr) == sel.value + of '~': return sel.value in unicode.split(elem.getAttrValue(sel.attr)) + of '|': + let val = elem.getAttrValue(sel.attr) + return val == sel.value or sel.value.startsWith(val & '-') + of '^': return elem.getAttrValue(sel.attr).startsWith(sel.value) + of '$': return elem.getAttrValue(sel.attr).endsWith(sel.value) + of '*': return elem.getAttrValue(sel.attr).contains(sel.value) + else: return false + +func pseudoSelectorMatches(elem: Element, sel: Selector): bool = + case sel.pseudo + of "first-child": return elem.parentNode.firstElementChild == elem + of "last-child": return elem.parentNode.lastElementChild == elem + else: return false + +func pseudoElemSelectorMatches(elem: Element, sel: Selector): bool = + case sel.elem + of "after": return false + of "before": return false + else: return false + +func selectorMatches(elem: Element, sel: Selector): bool = + case sel.t + of TYPE_SELECTOR: + return elem.tagType == sel.tag + of CLASS_SELECTOR: + return sel.class in elem.classList + of ID_SELECTOR: + return sel.id == elem.id + of ATTR_SELECTOR: + return elem.attrSelectorMatches(sel) + of PSEUDO_SELECTOR: + return pseudoSelectorMatches(elem, sel) + of PSELEM_SELECTOR: + return pseudoElemSelectorMatches(elem, sel) + of UNIVERSAL_SELECTOR: + return true + of FUNC_SELECTOR: + return false + +func selectorsMatch(elem: Element, selectors: SelectorList): bool = + for sel in selectors.sels: + if not selectorMatches(elem, sel): + return false + return true + +func selectElems(document: Document, sel: Selector): seq[Element] = + case sel.t + of TYPE_SELECTOR: + return document.type_elements[sel.tag] + of ID_SELECTOR: + return document.id_elements[sel.id] + of CLASS_SELECTOR: + return document.class_elements[sel.class] + of UNIVERSAL_SELECTOR: + return document.all_elements + #TODO: following selectors are rather inefficient + of ATTR_SELECTOR: + return document.all_elements.filter((elem) => attrSelectorMatches(elem, sel)) + of PSEUDO_SELECTOR: + return document.all_elements.filter((elem) => pseudoSelectorMatches(elem, sel)) + of PSELEM_SELECTOR: + return document.all_elements.filter((elem) => pseudoElemSelectorMatches(elem, sel)) + of FUNC_SELECTOR: + if sel.name == "not": + return document.all_elements.filter((elem) => not selectorsMatch(elem, sel.selectors)) + return newSeq[Element]() + +func optimizeSelectorList(selectors: SelectorList): SelectorList = + new(result) + #pass 1: check for invalid sequences + var i = 1 + while i < selectors.len: + let sel = selectors[i] + if sel.t == TYPE_SELECTOR or sel.t == UNIVERSAL_SELECTOR: + return SelectorList() + inc i + + #pass 2: move selectors in combination + if selectors.len > 1: + var i = 0 + var slow = SelectorList() + if selectors[0].t == UNIVERSAL_SELECTOR: + inc i + + while i < selectors.len: + if selectors[i].t in {ATTR_SELECTOR, PSEUDO_SELECTOR, PSELEM_SELECTOR}: + slow.add(selectors[i]) + else: + result.add(selectors[i]) + inc i + + result.add(slow) + else: + result.add(selectors[0]) + +func selectElems(document: Document, selectors: SelectorList): seq[Element] = + assert(selectors.len > 0) + let sellist = optimizeSelectorList(selectors) + result = document.selectElems(selectors[0]) + var i = 1 + + while i < sellist.len: + if sellist[i].t == FUNC_SELECTOR: + if sellist[i].name == "not": + result = result.filter((elem) => not selectorsMatch(elem, sellist[i].selectors)) + else: + result = result.filter((elem) => selectorMatches(elem, sellist[i])) + inc i + +proc querySelector*(document: Document, q: string): seq[Element] = + let ss = newStringStream(q) + let cvals = parseCSSListOfComponentValues(ss) + let selectors = parseSelectors(cvals) + + for sel in selectors: + result.add(document.selectElems(sel)) + +proc applyRule(elem: Element, rule: CSSRule) = + let selectors = parseSelectors(rule.prelude) + for sel in selectors: + if elem.selectorsMatch(sel): + eprint "match!" + +proc applyRules(document: Document, rules: CSSStylesheet) = + var stack: seq[Element] + + stack.add(document.firstElementChild) + while stack.len > 0: + let elem = stack.pop() + for child in elem.children: + stack.add(child) diff --git a/src/html/entity.nim b/src/html/entity.nim new file mode 100644 index 00000000..79c57ca0 --- /dev/null +++ b/src/html/entity.nim @@ -0,0 +1,28 @@ +import json + +import ../utils/radixtree + +const entity = staticRead"../../res/entity.json" +when defined(small): + proc genEntityMap(data: seq[tuple[a: string, b: string]]): RadixNode[string] = + result = newRadixTree[string]() + for pair in data: + result[pair.a] = pair.b + + proc genEntityTable(): seq[tuple[a: string, b: string]] = + let entityJson = parseJson(entity) + + for k, v in entityJson: + result.add((k.substr(1), v{"characters"}.getStr())) + const entityTable = genEntityTable() + let entityMap* = genEntityMap(entityTable) +else: + proc genEntityMap(): StaticRadixTree[string] = + let entityJson = parseJson(entity) + var entityMap = newStaticRadixTree[string]() + + for k, v in entityJson: + entityMap[k.substr(1)] = v{"characters"}.getStr() + + return entityMap + const entityMap* = genEntityMap() diff --git a/src/html/htmlparser.nim b/src/html/htmlparser.nim new file mode 100644 index 00000000..f43bcf40 --- /dev/null +++ b/src/html/htmlparser.nim @@ -0,0 +1,540 @@ +import streams +import unicode +import strutils +import tables +import json + +import ../types/enums +import ../types/tagtypes + +import ../utils/twtstr +import ../utils/radixtree + +import ../io/twtio + +import dom +import entity + +type + HTMLParseState = object + closed: bool + parents: seq[Node] + parsedNode: Node + a: string + b: string + attrs: seq[string] + in_comment: bool + in_script: bool + in_style: bool + in_noscript: bool + in_body: bool + parentNode: Node + textNode: Text + +#func newHtmlElement(tagType: TagType, parentNode: Node): HtmlElement = +# case tagType +# of TAG_INPUT: result = new(HtmlInputElement) +# of TAG_A: result = new(HtmlAnchorElement) +# of TAG_SELECT: result = new(HtmlSelectElement) +# of TAG_OPTION: result = new(HtmlOptionElement) +# else: result = new(HtmlElement) +# +# result.nodeType = ELEMENT_NODE +# result.tagType = tagType +# result.parentNode = parentNode +# if parentNode.isElemNode(): +# result.parentElement = HtmlElement(parentNode) +# +# if tagType in DisplayInlineTags: +# result.display = DISPLAY_INLINE +# elif tagType in DisplayBlockTags: +# result.display = DISPLAY_BLOCK +# elif tagType in DisplayInlineBlockTags: +# result.display = DISPLAY_INLINE_BLOCK +# elif tagType == TAG_LI: +# result.display = DISPLAY_LIST_ITEM +# else: +# result.display = DISPLAY_NONE +# +# case tagType +# of TAG_CENTER: +# result.centered = true +# of TAG_B: +# result.bold = true +# of TAG_I: +# result.italic = true +# of TAG_U: +# result.underscore = true +# of TAG_HEAD: +# result.hidden = true +# of TAG_STYLE: +# result.hidden = true +# of TAG_SCRIPT: +# result.hidden = true +# of TAG_OPTION: +# result.hidden = true #TODO +# of TAG_PRE, TAG_TD, TAG_TH: +# result.margin = 1 +# of TAG_UL, TAG_OL: +# result.indent = 2 +# result.margin = 1 +# of TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6: +# result.bold = true +# result.margin = 1 +# of TAG_A: +# result.islink = true +# of TAG_INPUT: +# HtmlInputElement(result).size = 20 +# else: discard +# +# if parentNode.isElemNode(): +# let parent = HtmlElement(parentNode) +# result.centered = result.centered or parent.centered +# result.bold = result.bold or parent.bold +# result.italic = result.italic or parent.italic +# result.underscore = result.underscore or parent.underscore +# result.hidden = result.hidden or parent.hidden +# result.islink = result.islink or parent.islink + +func inputSize*(str: string): int = + if str.len == 0: + return 20 + for c in str: + if not c.isDigit: + return 20 + return str.parseInt() + +#w3m's getescapecmd and parse_tag, transpiled to nim. +#(C) Copyright 1994-2002 by Akinori Ito +#(C) Copyright 2002-2011 by Akinori Ito, Hironori Sakamoto, Fumitoshi Ukai +# +#Use, modification and redistribution of this software is hereby granted, +#provided that this entire copyright notice is included on any copies of +#this software and applications and derivations thereof. +# +#This software is provided on an "as is" basis, without warranty of any +#kind, either expressed or implied, as to any matter including, but not +#limited to warranty of fitness of purpose, or merchantability, or +#results obtained from use of this software. +proc getescapecmd(buf: string, at: var int): string = + var i = at + + if buf[i] == '#': #num + inc i + var num: int + if buf[i].tolower() == 'x': #hex + inc i + if not isdigit(buf[i]): + at = i + return "" + + num = hexValue(buf[i]) + inc i + while i < buf.len and hexValue(buf[i]) != -1: + num *= 0x10 + num += hexValue(buf[i]) + inc i + else: #dec + if not isDigit(buf[i]): + at = i + return "" + + num = decValue(buf[i]) + inc i + while i < buf.len and isDigit(buf[i]): + num *= 10 + num += decValue(buf[i]) + inc i + + if buf[i] == ';': + inc i + at = i + return $(Rune(num)) + elif not isAlphaAscii(buf[i]): + return "" + + #TODO this could be way more efficient (and radixnode needs better interface) + when defined(small): + var n = entityMap + var s = "" + while true: + s &= buf[i] + if not entityMap.hasPrefix(s, n): + break + let pn = n + n = n{s} + if n != pn: + s = "" + inc i + + if n.leaf: + at = i + return n.value + else: + var n = 0 + var s = "" + while true: + s &= buf[i] + if not entityMap.hasPrefix(s, n): + break + let pn = n + n = entityMap{s, n} + if n != pn: + s = "" + inc i + + if entityMap.nodes[n].leaf: + at = i + return entityMap.nodes[n].value + + return "" + +type + DOMParsedTag = object + tagid: TagType + attrs: Table[string, string] + open: bool + +proc parse_tag(buf: string, at: var int): DOMParsedTag = + var tag = DOMParsedTag() + tag.open = true + + #Parse tag name + var tagname = "" + inc at + if buf[at] == '/': + inc at + tag.open = false + at = skipBlanks(buf, at) + + while at < buf.len and not buf[at].isWhitespace() and not (tag.open and buf[at] == '/') and buf[at] != '>': + tagname &= buf[at].tolower() + at += buf.runeLenAt(at) + + tag.tagid = tagType(tagname) + at = skipBlanks(buf, at) + + while at < buf.len and buf[at] != '>': + var value = "" + var attrname = "" + while at < buf.len and buf[at] != '=' and not buf[at].isWhitespace() and buf[at] != '>': + attrname &= buf[at].tolower() + at += buf.runeLenAt(at) + + at = skipBlanks(buf, at) + if buf[at] == '=': + inc at + at = skipBlanks(buf, at) + if at < buf.len and (buf[at] == '"' or buf[at] == '\''): + let startc = buf[at] + inc at + while at < buf.len and buf[at] != startc: + var r: Rune + fastRuneAt(buf, at, r) + if r == Rune('&'): + value &= getescapecmd(buf, at) + else: + value &= $r + if at < buf.len: + inc at + elif at < buf.len: + while at < buf.len and not buf[at].isWhitespace() and buf[at] != '>': + value &= buf[at] + at += buf.runeLenAt(at) + + if attrname.len > 0: + tag.attrs[attrname] = value + + while at < buf.len and buf[at] != '>': + at += buf.runeLenAt(at) + + if at < buf.len and buf[at] == '>': + inc at + return tag + +proc insertNode(parent: Node, node: Node) = + parent.childNodes.add(node) + + if parent.childNodes.len > 1: + let prevSibling = parent.childNodes[^1] + prevSibling.nextSibling = node + node.previousSibling = prevSibling + + node.parentNode = parent + if parent.nodeType == ELEMENT_NODE: + node.parentElement = (Element)parent + + if parent.ownerDocument != nil: + node.ownerDocument = parent.ownerDocument + elif parent.nodeType == DOCUMENT_NODE: + node.ownerDocument = (Document)parent + + if node.nodeType == ELEMENT_NODE: + parent.children.add((Element)node) + + let element = ((Element)node) + if element.ownerDocument != nil: + node.ownerDocument.all_elements.add((Element)node) + element.ownerDocument.type_elements[element.tagType].add(element) + if element.id != "": + if not (element.id in element.ownerDocument.id_elements): + element.ownerDocument.id_elements[element.id] = newSeq[Element]() + element.ownerDocument.id_elements[element.id].add(element) + + for c in element.classList: + if not (c in element.ownerDocument.class_elements): + element.ownerDocument.class_elements[c] = newSeq[Element]() + element.ownerDocument.class_elements[c].add(element) + +proc processDocumentBody(state: var HTMLParseState) = + if not state.in_body: + state.in_body = true + if state.parentNode.ownerDocument != nil: + state.parentNode = state.parentNode.ownerDocument.body + +proc processDocumentStartNode(state: var HTMLParseState, newNode: Node) = + if state.parentNode.nodeType == ELEMENT_NODE and ((Element)state.parentNode).tagType == TAG_HTML: + if state.in_body: + state.parentNode = state.parentNode.ownerDocument.body + else: + state.parentNode = state.parentNode.ownerDocument.head + + insertNode(state.parentNode, newNode) + state.parentNode = newNode + +proc processDocumentEndNode(state: var HTMLParseState) = + if state.parentNode == nil or state.parentNode.parentNode == nil: + return + state.parentNode = state.parentNode.parentNode + +proc processDocumentText(state: var HTMLParseState) = + if state.textNode != nil and state.textNode.data.len > 0: + processDocumentBody(state) + if state.textNode == nil: + state.textNode = newText() + + processDocumentStartNode(state, state.textNode) + processDocumentEndNode(state) + +proc processDocumentStartElement(state: var HTMLParseState, element: Element, tag: DOMParsedTag) = + var add = true + + for k, v in tag.attrs: + element.attributes[k] = element.newAttr(k, v) + + element.id = element.getAttrValue("id") + if element.attributes.hasKey("class"): + for w in unicode.split(element.attributes["class"].value, Rune(' ')): + element.classList.add(w) + + case element.tagType + of TAG_SCRIPT: + state.in_script = true + of TAG_NOSCRIPT: + state.in_noscript = true + of TAG_STYLE: + state.in_style = true + of TAG_SELECT: + HTMLSelectElement(element).name = element.getAttrValue("name") + HTMLSelectElement(element).value = element.getAttrValue("value") + of TAG_INPUT: + HTMLInputElement(element).value = element.getAttrValue("value") + HTMLInputElement(element).itype = element.getAttrValue("type").inputType() + HTMLInputElement(element).size = element.getAttrValue("size").inputSize() + of TAG_A: + HTMLAnchorElement(element).href = element.getAttrValue("href") + of TAG_OPTION: + HTMLOptionElement(element).value = element.getAttrValue("href") + of TAG_HTML: + add = false + of TAG_HEAD: + add = false + of TAG_BODY: + add = false + processDocumentBody(state) + else: discard + + if state.parentNode.nodeType == ELEMENT_NODE: + case element.tagType + of TAG_LI, TAG_P: + if Element(state.parentNode).tagType == element.tagType: + processDocumentEndNode(state) + of TAG_H1: + HTMLHeadingElement(element).rank = 1 + of TAG_H2: + HTMLHeadingElement(element).rank = 2 + of TAG_H3: + HTMLHeadingElement(element).rank = 3 + of TAG_H4: + HTMLHeadingElement(element).rank = 4 + of TAG_H5: + HTMLHeadingElement(element).rank = 5 + of TAG_H6: + HTMLHeadingElement(element).rank = 6 + else: discard + + if add: + processDocumentStartNode(state, element) + + if element.tagType in VoidTagTypes: + processDocumentEndNode(state) + +proc processDocumentEndElement(state: var HTMLParseState, tag: DOMParsedTag) = + if tag.tagid in VoidTagTypes: + return + if tag.tagid == TAG_HEAD: + state.in_body = true + return + if tag.tagid == TAG_BODY: + return + if state.parentNode.nodeType == ELEMENT_NODE: + if Element(state.parentNode).tagType in {TAG_LI, TAG_P}: + processDocumentEndNode(state) + + processDocumentEndNode(state) + +proc processDocumentTag(state: var HTMLParseState, tag: DOMParsedTag) = + if state.in_script: + if tag.tagid == TAG_SCRIPT: + state.in_script = false + else: + return + + if state.in_style: + if tag.tagid == TAG_STYLE: + state.in_style = false + else: + return + + if state.in_noscript: + if tag.tagid == TAG_NOSCRIPT: + state.in_noscript = false + else: + return + + if tag.open: + processDocumentStartElement(state, newHtmlElement(tag.tagid), tag) + else: + processDocumentEndElement(state, tag) + +proc processDocumentPart(state: var HTMLParseState, buf: string) = + var at = 0 + var max = 0 + var was_script = false + + max = buf.len + + while at < max: + case buf[at] + of '&': + inc at + let p = getescapecmd(buf, at) + if state.in_comment: + CharacterData(state.parentNode).data &= p + else: + processDocumentText(state) + state.textNode.data &= p + of '<': + if state.in_comment: + CharacterData(state.parentNode).data &= buf[at] + inc at + else: + var p = at + inc p + if p < max and buf[p] == '!': + inc p + if p < max and buf[p] == '-': + inc p + if p < max and buf[p] == '-': + inc p + at = p + state.in_comment = true + processDocumentStartNode(state, newComment()) + if state.textNode != nil: + state.textNode.rawtext = state.textNode.getRawText() + state.textNode = nil + else: + #TODO for doctype + while p < max and buf[p] != '>': + inc p + at = p + continue + + if not state.in_comment: + if state.textNode != nil: + state.textNode.rawtext = state.textNode.getRawText() + state.textNode = nil + p = at + var tag = parse_tag(buf, at) + was_script = state.in_script + + processDocumentTag(state, tag) +# if (was_script) { +# if (state->in_script) { +# ptr = p; +# processDocumentText(&state->parentNode, &state->textNode); +# Strcat_char(((CharacterData *)state->textNode)->data, *ptr++); +# } else if (buffer->javascript_enabled) { +# loadJSToBuffer(buffer, childTextContentNode(state->parentNode->lastChild)->ptr, "", state->document); +# } +# } + elif buf[at] == '-' and state.in_comment: + var p = at + inc p + if p < max and buf[p] == '-': + inc p + if p < max and buf[p] == '>': + inc p + at = p + state.in_comment = false + processDocumentEndNode(state) + + if state.in_comment: + CharacterData(state.parentNode).data &= buf[at] + inc at + else: + var r: Rune + fastRuneAt(buf, at, r) + if state.in_comment: + CharacterData(state.parentNode).data &= $r + else: + processDocumentText(state) + state.textNode.data &= $r + +proc parseHtml*(inputStream: Stream): Document = + let document = newDocument() + let html = newHtmlElement(TAG_HTML) + insertNode(document, html) + insertNode(html, document.head) + insertNode(html, document.body) + #eprint document.body.firstElementChild != nil + + var state = HTMLParseState() + state.parentNode = html + + var till_when = false + + var buf = "" + var lineBuf: string + while not inputStream.atEnd(): + lineBuf = inputStream.readLine() + buf &= lineBuf + + var at = 0 + while at < lineBuf.len: + case lineBuf[at] + of '<': + till_when = true + of '>': + till_when = false + else: discard + at += lineBuf.runeLenAt(at) + + if till_when: + continue + + processDocumentPart(state, buf) + buf = "" + + inputStream.close() + return document diff --git a/src/htmlparser.nim b/src/htmlparser.nim deleted file mode 100644 index 86065ef7..00000000 --- a/src/htmlparser.nim +++ /dev/null @@ -1,471 +0,0 @@ -import streams -import unicode -import strutils -import tables -import json - -import twtio -import enums -import twtstr -import dom -import radixtree -import entity - -type - HTMLParseState = object - closed: bool - parents: seq[Node] - parsedNode: Node - a: string - b: string - attrs: seq[string] - in_comment: bool - in_script: bool - in_style: bool - in_noscript: bool - parentNode: Node - textNode: Text - -#func newHtmlElement(tagType: TagType, parentNode: Node): HtmlElement = -# case tagType -# of TAG_INPUT: result = new(HtmlInputElement) -# of TAG_A: result = new(HtmlAnchorElement) -# of TAG_SELECT: result = new(HtmlSelectElement) -# of TAG_OPTION: result = new(HtmlOptionElement) -# else: result = new(HtmlElement) -# -# result.nodeType = ELEMENT_NODE -# result.tagType = tagType -# result.parentNode = parentNode -# if parentNode.isElemNode(): -# result.parentElement = HtmlElement(parentNode) -# -# if tagType in DisplayInlineTags: -# result.display = DISPLAY_INLINE -# elif tagType in DisplayBlockTags: -# result.display = DISPLAY_BLOCK -# elif tagType in DisplayInlineBlockTags: -# result.display = DISPLAY_INLINE_BLOCK -# elif tagType == TAG_LI: -# result.display = DISPLAY_LIST_ITEM -# else: -# result.display = DISPLAY_NONE -# -# case tagType -# of TAG_CENTER: -# result.centered = true -# of TAG_B: -# result.bold = true -# of TAG_I: -# result.italic = true -# of TAG_U: -# result.underscore = true -# of TAG_HEAD: -# result.hidden = true -# of TAG_STYLE: -# result.hidden = true -# of TAG_SCRIPT: -# result.hidden = true -# of TAG_OPTION: -# result.hidden = true #TODO -# of TAG_PRE, TAG_TD, TAG_TH: -# result.margin = 1 -# of TAG_UL, TAG_OL: -# result.indent = 2 -# result.margin = 1 -# of TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6: -# result.bold = true -# result.margin = 1 -# of TAG_A: -# result.islink = true -# of TAG_INPUT: -# HtmlInputElement(result).size = 20 -# else: discard -# -# if parentNode.isElemNode(): -# let parent = HtmlElement(parentNode) -# result.centered = result.centered or parent.centered -# result.bold = result.bold or parent.bold -# result.italic = result.italic or parent.italic -# result.underscore = result.underscore or parent.underscore -# result.hidden = result.hidden or parent.hidden -# result.islink = result.islink or parent.islink - -func inputSize*(str: string): int = - if str.len == 0: - return 20 - for c in str: - if not c.isDigit: - return 20 - return str.parseInt() - -#w3m's getescapecmd and parse_tag, transpiled to nim. -#(C) Copyright 1994-2002 by Akinori Ito -#(C) Copyright 2002-2011 by Akinori Ito, Hironori Sakamoto, Fumitoshi Ukai -# -#Use, modification and redistribution of this software is hereby granted, -#provided that this entire copyright notice is included on any copies of -#this software and applications and derivations thereof. -# -#This software is provided on an "as is" basis, without warranty of any -#kind, either expressed or implied, as to any matter including, but not -#limited to warranty of fitness of purpose, or merchantability, or -#results obtained from use of this software. -proc getescapecmd(buf: string, at: var int): string = - var i = at - - if buf[i] == '#': #num - inc i - var num: int - if buf[i].tolower() == 'x': #hex - inc i - if not isdigit(buf[i]): - at = i - return "" - - num = hexValue(buf[i]) - inc i - while i < buf.len and hexValue(buf[i]) != -1: - num *= 0x10 - num += hexValue(buf[i]) - inc i - else: #dec - if not isDigit(buf[i]): - at = i - return "" - - num = decValue(buf[i]) - inc i - while i < buf.len and isDigit(buf[i]): - num *= 10 - num += decValue(buf[i]) - inc i - - if buf[i] == ';': - inc i - at = i - return $(Rune(num)) - elif not isAlphaAscii(buf[i]): - return "" - - var n = 0 - var s = "" - while true: - s &= buf[i] - if not entityMap.hasPrefix(s, n): - break - let pn = n - n = entityMap{s, n} - if n != pn: - s = "" - inc i - - if entityMap.nodes[n].leaf: - at = i - return entityMap.nodes[n].value - - return "" - -type - DOMParsedTag = object - tagid: TagType - attrs: Table[string, string] - open: bool - -proc parse_tag(buf: string, at: var int): DOMParsedTag = - var tag = DOMParsedTag() - tag.open = true - - #Parse tag name - var tagname = "" - inc at - if buf[at] == '/': - inc at - tag.open = false - at = skipBlanks(buf, at) - - while at < buf.len and not buf[at].isWhitespace() and not (tag.open and buf[at] == '/') and buf[at] != '>': - tagname &= buf[at].tolower() - at += buf.runeLenAt(at) - - tag.tagid = tagType(tagname) - at = skipBlanks(buf, at) - - while at < buf.len and buf[at] != '>': - var value = "" - var attrname = "" - while at < buf.len and buf[at] != '=' and not buf[at].isWhitespace() and buf[at] != '>': - attrname &= buf[at].tolower() - at += buf.runeLenAt(at) - - at = skipBlanks(buf, at) - if buf[at] == '=': - inc at - at = skipBlanks(buf, at) - if at < buf.len and (buf[at] == '"' or buf[at] == '\''): - let startc = buf[at] - inc at - while at < buf.len and buf[at] != startc: - var r: Rune - fastRuneAt(buf, at, r) - if r == Rune('&'): - value &= getescapecmd(buf, at) - else: - value &= $r - if at < buf.len: - inc at - elif at < buf.len: - while at < buf.len and not buf[at].isWhitespace() and buf[at] != '>': - value &= buf[at] - at += buf.runeLenAt(at) - - if attrname.len > 0: - tag.attrs[attrname] = value - - while at < buf.len and buf[at] != '>': - at += buf.runeLenAt(at) - - if at < buf.len and buf[at] == '>': - inc at - return tag - -proc insertNode(parent: Node, node: Node) = - parent.childNodes.add(node) - - if parent.firstChild == nil: - parent.firstChild = node - - parent.lastChild = node - - if parent.childNodes.len > 1: - let prevSibling = parent.childNodes[^1] - prevSibling.nextSibling = node - node.previousSibling = prevSibling - - node.parentNode = parent - if parent.nodeType == ELEMENT_NODE: - node.parentElement = Element(parent) - - if parent.ownerDocument != nil: - node.ownerDocument = parent.ownerDocument - elif parent.nodeType == DOCUMENT_NODE: - node.ownerDocument = Document(parent) - -proc processDocumentStartNode(state: var HTMLParseState, newNode: Node) = - insertNode(state.parentNode, newNode) - state.parentNode = newNode - -proc processDocumentEndNode(state: var HTMLParseState) = - if state.parentNode == nil or state.parentNode.parentNode == nil: - return - state.parentNode = state.parentNode.parentNode - -proc processDocumentText(state: var HTMLParseState) = - if state.textNode == nil: - state.textNode = newText() - - processDocumentStartNode(state, state.textNode) - processDocumentEndNode(state) - -proc processDocumentStartElement(state: var HTMLParseState, element: Element, tag: DOMParsedTag) = - for k, v in tag.attrs: - element.attributes[k] = element.newAttr(k, v) - - element.id = element.getAttrValue("id") - if element.attributes.hasKey("class"): - for w in unicode.split(element.attributes["class"].value, Rune(' ')): - element.classList.add(w) - - case element.tagType - of TAG_SCRIPT: - state.in_script = true - of TAG_NOSCRIPT: - state.in_noscript = true - of TAG_STYLE: - state.in_style = true - of TAG_SELECT: - HTMLSelectElement(element).name = element.getAttrValue("name") - HTMLSelectElement(element).value = element.getAttrValue("value") - of TAG_INPUT: - HTMLInputElement(element).value = element.getAttrValue("value") - HTMLInputElement(element).itype = element.getAttrValue("type").inputType() - HTMLInputElement(element).size = element.getAttrValue("size").inputSize() - of TAG_A: - HTMLAnchorElement(element).href = element.getAttrValue("href") - of TAG_OPTION: - HTMLOptionElement(element).value = element.getAttrValue("href") - else: discard - - if state.parentNode.nodeType == ELEMENT_NODE: - case element.tagType - of TAG_LI, TAG_P: - if Element(state.parentNode).tagType == element.tagType: - processDocumentEndNode(state) - of TAG_H1: - HTMLHeadingElement(element).rank = 1 - of TAG_H2: - HTMLHeadingElement(element).rank = 2 - of TAG_H3: - HTMLHeadingElement(element).rank = 3 - of TAG_H4: - HTMLHeadingElement(element).rank = 4 - of TAG_H5: - HTMLHeadingElement(element).rank = 5 - of TAG_H6: - HTMLHeadingElement(element).rank = 6 - else: discard - - processDocumentStartNode(state, element) - if element.ownerDocument != nil: - for c in element.classList: - element.ownerDocument.id_elements[c] = element - if not (c in element.ownerDocument.class_elements): - element.ownerDocument.class_elements[c] = newSeq[Element]() - element.ownerDocument.class_elements[c].add(element) - - if element.tagType in VoidTagTypes: - processDocumentEndNode(state) - -proc processDocumentEndElement(state: var HTMLParseState, tag: DOMParsedTag) = - if tag.tagid in VoidTagTypes: - return - if state.parentNode.nodeType == ELEMENT_NODE: - if Element(state.parentNode).tagType in {TAG_LI, TAG_P}: - processDocumentEndNode(state) - - processDocumentEndNode(state) - -proc processDocumentTag(state: var HTMLParseState, tag: DOMParsedTag) = - if state.in_script: - if tag.tagid == TAG_SCRIPT: - state.in_script = false - else: - return - - if state.in_style: - if tag.tagid == TAG_STYLE: - state.in_style = false - else: - return - - if state.in_noscript: - if tag.tagid == TAG_NOSCRIPT: - state.in_noscript = false - else: - return - - if tag.open: - processDocumentStartElement(state, newHtmlElement(tag.tagid), tag) - else: - processDocumentEndElement(state, tag) - -proc processDocumentPart(state: var HTMLParseState, buf: string) = - var at = 0 - var max = 0 - var was_script = false - - max = buf.len - - while at < max: - case buf[at] - of '&': - inc at - let p = getescapecmd(buf, at) - if state.in_comment: - CharacterData(state.parentNode).data &= p - else: - processDocumentText(state) - state.textNode.data &= p - of '<': - if state.in_comment: - CharacterData(state.parentNode).data &= buf[at] - inc at - else: - var p = at - inc p - if p < max and buf[p] == '!': - inc p - if p < max and buf[p] == '-': - inc p - if p < max and buf[p] == '-': - inc p - at = p - state.in_comment = true - processDocumentStartNode(state, newComment()) - if state.textNode != nil: - state.textNode.rawtext = state.textNode.getRawText() - state.textNode = nil - - if not state.in_comment: - if state.textNode != nil: - state.textNode.rawtext = state.textNode.getRawText() - state.textNode = nil - p = at - var tag = parse_tag(buf, at) - was_script = state.in_script - - processDocumentTag(state, tag) -# if (was_script) { -# if (state->in_script) { -# ptr = p; -# processDocumentText(&state->parentNode, &state->textNode); -# Strcat_char(((CharacterData *)state->textNode)->data, *ptr++); -# } else if (buffer->javascript_enabled) { -# loadJSToBuffer(buffer, childTextContentNode(state->parentNode->lastChild)->ptr, "", state->document); -# } -# } - elif buf[at] == '-' and state.in_comment: - var p = at - inc p - if p < max and buf[p] == '-': - inc p - if p < max and buf[p] == '>': - inc p - at = p - state.in_comment = false - processDocumentEndNode(state) - - if state.in_comment: - CharacterData(state.parentNode).data &= buf[at] - inc at - else: - var r: Rune - fastRuneAt(buf, at, r) - if state.in_comment: - CharacterData(state.parentNode).data &= $r - else: - processDocumentText(state) - state.textNode.data &= $r - -proc parseHtml*(inputStream: Stream): Document = - let document = newDocument() - - var state = HTMLParseState() - state.parentNode = document - - var till_when = false - - var buf = "" - var lineBuf: string - while not inputStream.atEnd(): - lineBuf = inputStream.readLine() - buf &= lineBuf - - var at = 0 - while at < lineBuf.len: - case lineBuf[at] - of '<': - till_when = true - of '>': - till_when = false - else: discard - at += lineBuf.runeLenAt(at) - - if till_when: - continue - - processDocumentPart(state, buf) - buf = "" - - inputStream.close() - return document diff --git a/src/io/display.nim b/src/io/display.nim new file mode 100644 index 00000000..e3ce6bac --- /dev/null +++ b/src/io/display.nim @@ -0,0 +1,399 @@ +import terminal +import options +import uri +import strutils +import unicode + +import ../types/enums + +import ../utils/termattrs +import ../utils/twtstr + +import ../html/dom + +import ../buffer +import ../config + +import twtio + +proc clearStatusMsg*(at: int) = + setCursorPos(0, at) + eraseLine() + +proc statusMsg*(str: string, at: int) = + clearStatusMsg(at) + print(str.ansiStyle(styleReverse).ansiReset()) + +type + RenderState = object + x: int + y: int + lastwidth: int + fmtline: string + rawline: string + centerqueue: seq[Node] + centerlen: int + blanklines: int + blankspaces: int + nextspaces: int + docenter: bool + indent: int + listval: int + +func newRenderState(): RenderState = + return RenderState(blanklines: 1) + +proc write(state: var RenderState, s: string) = + state.fmtline &= s + state.rawline &= s + +proc write(state: var RenderState, fs: string, rs: string) = + state.fmtline &= fs + state.rawline &= rs + +proc flushLine(buffer: Buffer, state: var RenderState) = + if state.rawline.len == 0: + inc state.blanklines + assert(state.rawline.runeLen() < buffer.width, "line too long:\n" & state.rawline) + buffer.writefmt(state.fmtline) + buffer.writeraw(state.rawline) + state.x = 0 + inc state.y + state.nextspaces = 0 + state.fmtline = "" + state.rawline = "" + +proc addSpaces(buffer: Buffer, state: var RenderState, n: int) = + if state.x + n > buffer.width: + buffer.flushLine(state) + return + state.blankspaces += n + state.write(' '.repeat(n)) + state.x += n + +proc writeWrappedText(buffer: Buffer, state: var RenderState, node: Node) = + state.lastwidth = 0 + var n = 0 + var fmtword = "" + var rawword = "" + var prevl = false + let fmttext = node.getFmtText() + for w in fmttext: + if w.len > 0 and w[0] == '\e': + fmtword &= w + continue + + for r in w.runes: + if r == Rune(' '): + if rawword.len > 0 and rawword[0] == ' ' and prevl: + fmtword = fmtword.substr(1) + rawword = rawword.substr(1) + state.x -= 1 + prevl = false + state.write(fmtword, rawword) + fmtword = "" + rawword = "" + + if r == Rune('\n'): + state.write(fmtword, rawword) + buffer.flushLine(state) + rawword = "" + fmtword = "" + else: + fmtword &= r + rawword &= r + + state.x += r.width() + + if state.x >= buffer.width: + state.lastwidth = max(state.lastwidth, state.x) + buffer.flushLine(state) + state.x = rawword.width() + prevl = true + else: + state.lastwidth = max(state.lastwidth, state.x) + + inc n + + state.write(fmtword, rawword) + if prevl: + state.x += rawword.width() + prevl = false + + state.lastwidth = max(state.lastwidth, state.x) + +proc preAlignNode(buffer: Buffer, node: Node, state: var RenderState) = + let style = node.getStyle() + if state.rawline.len > 0 and node.firstNode() and state.blanklines == 0: + buffer.flushLine(state) + + if node.firstNode(): + #while state.blanklines < max(style.margin, style.margintop): + # buffer.flushLine(state) + state.indent += style.indent + + if state.rawline.len > 0 and state.blanklines == 0 and node.displayed(): + buffer.addSpaces(state, state.nextspaces) + state.nextspaces = 0 + #if state.blankspaces < max(style.margin, style.marginleft): + # buffer.addSpaces(state, max(style.margin, style.marginleft) - state.blankspaces) + + if style.centered and state.rawline.len == 0 and node.displayed(): + buffer.addSpaces(state, max(buffer.width div 2 - state.centerlen div 2, 0)) + state.centerlen = 0 + + if node.isElemNode() and style.display == DISPLAY_LIST_ITEM and state.indent > 0: + if state.blanklines == 0: + buffer.flushLine(state) + var listchar = "•" + #case elem.parentElement.tagType + #of TAG_UL: + # listchar = "•" + #of TAG_OL: + # inc state.listval + # listchar = $state.listval & ")" + #else: + # return + buffer.addSpaces(state, state.indent) + state.write(listchar) + state.x += listchar.runeLen() + buffer.addSpaces(state, 1) + +proc postAlignNode(buffer: Buffer, node: Node, state: var RenderState) = + let style = node.getStyle() + + if node.getRawLen() > 0: + state.blanklines = 0 + state.blankspaces = 0 + + #if state.rawline.len > 0 and state.blanklines == 0: + # state.nextspaces += max(style.margin, style.marginright) + # if node.lastNode() and (node.isTextNode() or elem.childNodes.len == 0): + # buffer.flushLine(state) + + if node.lastNode(): + #while state.blanklines < max(style.margin, style.marginbottom): + # buffer.flushLine(state) + state.indent -= style.indent + + if style.display == DISPLAY_LIST_ITEM and node.lastNode(): + buffer.flushLine(state) + +proc renderNode(buffer: Buffer, node: Node, state: var RenderState) = + if not (node.nodeType in {ELEMENT_NODE, TEXT_NODE}): + return + let style = node.getStyle() + if node.nodeType == ELEMENT_NODE: + if Element(node).tagType in {TAG_SCRIPT, TAG_STYLE, TAG_NOSCRIPT, TAG_TITLE}: + return + if style.hidden: return + + if not state.docenter: + if style.centered: + state.centerqueue.add(node) + if node.lastNode(): + state.docenter = true + state.centerlen = 0 + for node in state.centerqueue: + state.centerlen += node.getRawLen() + for node in state.centerqueue: + buffer.renderNode(node, state) + state.centerqueue.setLen(0) + state.docenter = false + return + else: + return + if state.centerqueue.len > 0: + state.docenter = true + state.centerlen = 0 + for node in state.centerqueue: + state.centerlen += node.getRawLen() + for node in state.centerqueue: + buffer.renderNode(node, state) + state.centerqueue.setLen(0) + state.docenter = false + + buffer.preAlignNode(node, state) + + node.x = state.x + node.y = state.y + buffer.writeWrappedText(state, node) + node.ex = state.x + node.ey = state.y + node.width = state.lastwidth - node.x - 1 + node.height = state.y - node.y + 1 + + buffer.postAlignNode(node, state) + +proc setLastHtmlLine(buffer: Buffer, state: var RenderState) = + if state.rawline.len != 0: + buffer.flushLine(state) + +proc renderHtml*(buffer: Buffer) = + var stack: seq[Node] + let first = buffer.document + stack.add(first) + + var state = newRenderState() + while stack.len > 0: + let currElem = stack.pop() + buffer.addNode(currElem) + buffer.renderNode(currElem, state) + var i = currElem.childNodes.len - 1 + while i >= 0: + stack.add(currElem.childNodes[i]) + i -= 1 + + buffer.setLastHtmlLine(state) + +proc drawHtml(buffer: Buffer) = + var state = newRenderState() + for node in buffer.nodes: + buffer.renderNode(node, state) + buffer.setLastHtmlLine(state) + +proc statusMsgForBuffer(buffer: Buffer) = + var msg = $(buffer.cursory + 1) & "/" & $(buffer.lastLine() + 1) & " (" & + $buffer.atPercentOf() & "%) " & + "<" & buffer.title & ">" + if buffer.hovertext.len > 0: + msg &= " " & buffer.hovertext + statusMsg(msg.maxString(buffer.width), buffer.height) + +proc cursorBufferPos(buffer: Buffer) = + var x = buffer.cursorx + var y = buffer.cursory - 1 - buffer.fromY + termGoto(x, y + 1) + +proc displayBuffer(buffer: Buffer) = + eraseScreen() + termGoto(0, 0) + + print(buffer.visibleText().ansiReset()) + +proc inputLoop(attrs: TermAttributes, buffer: Buffer): bool = + var s = "" + var feedNext = false + while true: + stdout.showCursor() + buffer.cursorBufferPos() + if not feedNext: + s = "" + else: + feedNext = false + let c = getch() + s &= c + let action = getNormalAction(s) + var redraw = false + var reshape = false + var nostatus = false + case action + of ACTION_QUIT: + eraseScreen() + return false + of ACTION_CURSOR_LEFT: redraw = buffer.cursorLeft() + of ACTION_CURSOR_DOWN: redraw = buffer.cursorDown() + of ACTION_CURSOR_UP: redraw = buffer.cursorUp() + of ACTION_CURSOR_RIGHT: redraw = buffer.cursorRight() + of ACTION_CURSOR_LINEBEGIN: buffer.cursorLineBegin() + of ACTION_CURSOR_LINEEND: buffer.cursorLineEnd() + of ACTION_CURSOR_NEXT_WORD: redraw = buffer.cursorNextWord() + of ACTION_CURSOR_PREV_WORD: redraw = buffer.cursorPrevWord() + of ACTION_CURSOR_NEXT_LINK: redraw = buffer.cursorNextLink() + of ACTION_CURSOR_PREV_LINK: redraw = buffer.cursorPrevLink() + of ACTION_PAGE_DOWN: redraw = buffer.pageDown() + of ACTION_PAGE_UP: redraw = buffer.pageUp() + of ACTION_HALF_PAGE_DOWN: redraw = buffer.halfPageDown() + of ACTION_HALF_PAGE_UP: redraw = buffer.halfPageUp() + of ACTION_CURSOR_FIRST_LINE: redraw = buffer.cursorFirstLine() + of ACTION_CURSOR_LAST_LINE: redraw = buffer.cursorLastLine() + of ACTION_CURSOR_TOP: redraw = buffer.cursorTop() + of ACTION_CURSOR_MIDDLE: redraw = buffer.cursorMiddle() + of ACTION_CURSOR_BOTTOM: redraw = buffer.cursorBottom() + of ACTION_CENTER_LINE: redraw = buffer.centerLine() + of ACTION_SCROLL_DOWN: redraw = buffer.scrollDown() + of ACTION_SCROLL_UP: redraw = buffer.scrollUp() + of ACTION_CLICK: + let selectedElem = buffer.findSelectedElement() + if selectedElem.isSome: + case selectedElem.get().tagType + of TAG_INPUT: + clearStatusMsg(buffer.height) + let status = readLine("TEXT: ", HtmlInputElement(selectedElem.get()).value, buffer.width) + if status: + reshape = true + redraw = true + else: discard + if selectedElem.get().getStyle().islink: + let anchor = HtmlAnchorElement(buffer.selectedlink.ancestor(TAG_A)).href + buffer.gotoLocation(parseUri(anchor)) + return true + of ACTION_CHANGE_LOCATION: + var url = $buffer.document.location + + clearStatusMsg(buffer.height) + let status = readLine("URL: ", url, buffer.width) + if status: + buffer.setLocation(parseUri(url)) + return true + of ACTION_LINE_INFO: + statusMsg("line " & $buffer.cursory & "/" & $buffer.lastLine() & " col " & $(buffer.cursorx + 1) & "/" & $buffer.currentLineLength(), buffer.width) + nostatus = true + of ACTION_FEED_NEXT: + feedNext = true + of ACTION_RELOAD: return true + of ACTION_RESHAPE: + reshape = true + redraw = true + of ACTION_REDRAW: redraw = true + else: discard + stdout.hideCursor() + + let prevlink = buffer.selectedlink + let sel = buffer.checkLinkSelection() + if sel: + buffer.clearText() + buffer.drawHtml() + termGoto(0, buffer.selectedlink.y - buffer.fromy) + stdout.eraseLine() + for i in buffer.selectedlink.y..buffer.selectedlink.ey: + if i < buffer.fromy + buffer.height - 1: + let line = buffer.fmttext[i] + print(line) + print('\n') + print("".ansiReset()) + + if prevlink != nil: + buffer.clearText() + buffer.drawHtml() + termGoto(0, prevlink.y - buffer.fromy) + for i in prevlink.y..prevlink.ey: + if i < buffer.fromy + buffer.height - 1: + let line = buffer.fmttext[i] + stdout.eraseLine() + print(line) + print('\n') + print("".ansiReset()) + + if buffer.refreshTermAttrs(): + redraw = true + reshape = true + + if reshape: + buffer.clearText() + buffer.drawHtml() + if redraw: + buffer.displayBuffer() + + if not nostatus: + buffer.statusMsgForBuffer() + else: + nostatus = false + +proc displayPage*(attrs: TermAttributes, buffer: Buffer): bool = + #buffer.printwrite = true + discard buffer.gotoAnchor() + buffer.displayBuffer() + buffer.statusMsgForBuffer() + return inputLoop(attrs, buffer) + diff --git a/src/io/twtio.nim b/src/io/twtio.nim new file mode 100644 index 00000000..34cf4ce6 --- /dev/null +++ b/src/io/twtio.nim @@ -0,0 +1,323 @@ +import terminal +import tables +import unicode +import strutils +import sequtils + +import ../utils/twtstr +import ../utils/radixtree + +import ../config + +template print*(s: varargs[string, `$`]) = + for x in s: + stdout.write(x) + +template printesc*(s: string) = + for r in s.runes: + if r.isControlChar(): + stdout.write(('^' & $($r)[0].getControlLetter()) + .ansiFgColor(fgBlue).ansiStyle(styleBright).ansiReset()) + else: + stdout.write($r) + +template printspc(i: int) = + print(' '.repeat(i)) + +template eprint*(s: varargs[string, `$`]) = {.cast(noSideEffect).}: + var a = false + for x in s: + if not a: + a = true + else: + stderr.write(' ') + stderr.write(x) + stderr.write('\n') + +proc termGoto*(x: int, y: int) = + setCursorPos(stdout, x, y) + +proc getNormalAction*(s: string): TwtAction = + if normalActionRemap.hasKey(s): + return normalActionRemap[s] + return NO_ACTION + +proc getLinedAction*(s: string): TwtAction = + if linedActionRemap.hasKey(s): + return linedActionRemap[s] + return NO_ACTION + +type LineState = object + news: seq[Rune] + s: string + feedNext: bool + escNext: bool + comp: bool + compn: RadixNode[string] + compa: int + comps: string + cursor: int + shift: int + minlen: int + maxlen: int + displen: int + spaces: seq[string] + +proc backward(state: LineState, i: int) = + if i == 1: + print('\b') + else: + cursorBackward(i) + +proc forward(state: LineState, i: int) = + cursorForward(i) + +proc begin(state: LineState) = + print('\r') + + state.forward(state.minlen) + +proc space(state: LineState, i: int) = + print(state.spaces[i]) + +proc kill(state: LineState) = + when defined(windows): + let w = min(state.news.width(state.cursor), state.displen) + state.space(w) + state.backward(w) + else: + print("\e[K") + +proc fullRedraw(state: var LineState) = + state.displen = state.maxlen - 1 + if state.cursor > state.shift: + var shiftw = state.news.width(state.shift, state.cursor) + while shiftw > state.maxlen - 1: + inc state.shift + shiftw -= state.news[state.shift].width() + else: + state.shift = max(state.cursor - 1, 0) + + var dispw = state.news.width(state.shift, state.shift + state.displen) + if state.shift + state.displen > state.news.len: + state.displen = state.news.len - state.shift + while dispw > state.maxlen - 1: + dispw -= state.news[state.shift + state.displen - 1].width() + dec state.displen + + state.begin() + let os = state.news.substr(state.shift, state.shift + state.displen) + printesc($os) + state.space(max(state.maxlen - os.width(), 0)) + + state.begin() + state.forward(state.news.width(state.shift, state.cursor)) + +proc zeroShiftRedraw(state: var LineState) = + state.shift = 0 + state.displen = state.maxlen - 1 + + var dispw = state.news.width(0, state.displen) + if state.displen > state.news.len: + state.displen = state.news.len + while dispw > state.maxlen - 1: + dispw -= state.news[state.displen - 1].width() + dec state.displen + + state.begin() + let os = state.news.substr(0, state.displen) + printesc($os) + state.space(max(state.maxlen - os.width(), 0)) + + state.begin() + state.forward(state.news.width(0, state.cursor)) + +proc insertCharseq(state: var LineState, cs: var seq[Rune]) = + let escNext = state.escNext + cs.keepIf(func(r: Rune): bool = escNext or not r.isControlChar()) + state.escNext = false + if cs.len == 0: + return + elif state.cursor >= state.news.len and state.news.width(state.shift, state.cursor) + cs.width() < state.displen: + state.news &= cs + state.cursor += cs.len + printesc($cs) + else: + state.news.insert(cs, state.cursor) + state.cursor += cs.len + state.fullRedraw() + +proc insertCompose(state: var LineState, c: char) = + state.comps &= c + let n = state.compn{state.comps} + if n != state.compn: + state.compn = n + state.compa += state.comps.len + state.comps = "" + if state.compn.hasPrefix(state.comps, state.compn) and n.children.len > 0: + state.feedNext = true + else: + var cs: seq[Rune] + if state.compn.leaf: + cs = state.compn.value.toRunes() + else: + cs = state.s.substr(0, state.compa - 1).toRunes() + state.comps = state.s.substr(state.compa) + if state.comps.len > 0 and composeRemap.hasPrefix(state.comps): + state.compa = state.comps.len + state.compn = composeRemap{state.comps} + state.s = state.comps + state.comps = "" + state.feedNext = true + else: + cs &= state.comps.toRunes() + state.compa = 0 + state.compn = composeRemap + state.comps = "" + + state.insertCharseq(cs) + +proc readLine*(current: var string, minlen: int, maxlen: int): bool = + var state: LineState + state.news = current.toRunes() + state.compn = composeRemap + state.cursor = state.news.len + state.minlen = minlen + state.maxlen = maxlen + state.displen = state.maxlen - 1 + #ugh + for i in 0..(maxlen - minlen): + state.spaces.add(' '.repeat(i)) + printesc(current) + while true: + if not state.feedNext: + state.s = "" + else: + state.feedNext = false + + let c = getch() + state.s &= c + + var action = getLinedAction(state.s) + if state.escNext: + action = NO_ACTION + case action + of ACTION_LINED_CANCEL: + return false + of ACTION_LINED_SUBMIT: + current = $state.news + return true + of ACTION_LINED_BACKSPACE: + if state.cursor > 0: + state.news.delete(state.cursor - 1, state.cursor - 1) + dec state.cursor + state.fullRedraw() + of ACTION_LINED_DELETE: + if state.cursor > 0 and state.cursor < state.news.len: + state.news.delete(state.cursor, state.cursor) + state.fullRedraw() + of ACTION_LINED_ESC: + state.escNext = true + of ACTION_LINED_CLEAR: + if state.cursor > 0: + state.news.delete(0, state.cursor - 1) + state.cursor = 0 + state.zeroShiftRedraw() + of ACTION_LINED_KILL: + if state.cursor < state.news.len: + state.kill() + state.news.setLen(state.cursor) + of ACTION_LINED_BACK: + if state.cursor > 0: + dec state.cursor + if state.cursor > state.shift or state.shift == 0: + state.backward(state.news[state.cursor].width()) + else: + state.fullRedraw() + of ACTION_LINED_FORWARD: + if state.cursor < state.news.len: + inc state.cursor + if state.news.width(state.shift, state.cursor) < state.displen: + var n = 1 + if state.news.len > state.cursor: + n = state.news[state.cursor].width() + state.forward(n) + else: + state.fullRedraw() + of ACTION_LINED_PREV_WORD: + let oc = state.cursor + while state.cursor > 0: + dec state.cursor + if state.news[state.cursor].breaksWord(): + break + if state.cursor != oc: + if state.cursor > state.shift or state.shift == 0: + state.backward(state.news.width(state.cursor, oc)) + else: + state.fullRedraw() + of ACTION_LINED_NEXT_WORD: + let oc = state.cursor + while state.cursor < state.news.len: + inc state.cursor + if state.cursor < state.news.len: + if state.news[state.cursor].breaksWord(): + break + + if state.cursor != oc: + let dw = state.news.width(oc, state.cursor) + if oc + dw - state.shift < state.displen: + state.forward(dw) + else: + state.fullRedraw() + of ACTION_LINED_KILL_WORD: + var chars = 0 + if state.cursor > chars: + inc chars + + while state.cursor > chars: + inc chars + if state.news[state.cursor - chars].breaksWord(): + dec chars + break + if chars > 0: + let w = state.news.width(state.cursor - chars, state.cursor) + state.news.delete(state.cursor - chars, state.cursor - 1) + state.cursor -= chars + if state.cursor > state.news.len and state.shift == 0: + state.backward(w) + state.space(w) + state.backward(w) + else: + state.fullRedraw() + of ACTION_LINED_BEGIN: + if state.cursor > 0: + if state.shift == 0: + state.backward(state.news.width(0, state.cursor)) + else: + state.fullRedraw() + state.cursor = 0 + of ACTION_LINED_END: + if state.cursor < state.news.len: + if state.news.width(state.shift, state.news.len) < maxlen: + state.forward(state.news.width(state.cursor, state.news.len)) + else: + state.fullRedraw() + state.cursor = state.news.len + of ACTION_LINED_COMPOSE_TOGGLE: + state.comp = not state.comp + state.compn = composeRemap + state.compa = 0 + state.comps = "" + of ACTION_FEED_NEXT: + state.feedNext = true + elif state.comp: + state.insertCompose(c) + elif validateUtf8(state.s) == -1: + var cs = state.s.toRunes() + state.insertCharseq(cs) + else: + state.feedNext = true + +proc readLine*(prompt: string, current: var string, termwidth: int): bool = + printesc(prompt) + readLine(current, prompt.width(), termwidth - prompt.len) diff --git a/src/main.nim b/src/main.nim index 95f1f3cd..539527f1 100644 --- a/src/main.nim +++ b/src/main.nim @@ -3,14 +3,18 @@ import uri import os import streams -import display -import termattrs +import css/style + +import utils/termattrs + +import html/dom +import html/htmlparser + +import io/display +import io/twtio + import buffer -import twtio import config -import htmlparser -import dom -import style let clientInstance = newHttpClient() proc loadRemotePage*(url: string): string = @@ -39,16 +43,17 @@ proc main*() = if paramCount() != 1: eprint "Invalid parameters. Usage:\ntwt " quit(1) - if not readConfig("config"): - eprint "Failed to read keymap, falling back to default" + if not readConfig("res/config"): + eprint "Failed to read keymap, fallback to default" let attrs = getTermAttributes() let buffer = newBuffer(attrs) let uri = parseUri(paramStr(1)) buffers.add(buffer) buffer.document = parseHtml(getPageUri(uri)) - #discard buffer.document.querySelector("#hi.a[title=\"test\"]") - var box = CSSBox() - applyProperties(box, "color: #090; line-height: 1.2") + let s = buffer.document.querySelector(":not(:first-child)") + eprint s.len + for q in s: + eprint q buffer.setLocation(uri) buffer.renderHtml() var lastUri = uri @@ -65,6 +70,5 @@ proc main*() = buffer.document = parseHtml(getPageUri(buffer.document.location)) buffer.renderHtml() lastUri = newUri - main() #parseCSS(newFileStream("default.css", fmRead)) diff --git a/src/radixtree.nim b/src/radixtree.nim deleted file mode 100644 index 0d9db3ab..00000000 --- a/src/radixtree.nim +++ /dev/null @@ -1,440 +0,0 @@ -# Radix tree implementation, with some caveats: -# * insertion takes forever, so try to insert only during compile-time -# * it isn't that much faster than a hash table, even when used for e.g. parsing -# -# Update: now it also has a version using references. Should be somewhat faster -# at the cost of having to initialize it every time the program is started. - -import strutils -import json -import tables - -type - RadixPair[T] = tuple[k: string, v: RadixNode[T]] - - RadixNode*[T] = ref object - children*: seq[RadixPair[T]] - case leaf*: bool - of true: value*: T - of false: discard - - StaticRadixPair = tuple[k: string, v: int] - StaticRadixPairSeq = seq[StaticRadixPair] - - StaticRadixNode[T] = object - children*: StaticRadixPairSeq - case leaf*: bool - of true: value*: T - of false: discard - - StaticRadixTree*[T] = object - nodes*: seq[StaticRadixNode[T]] - -func newStaticRadixTree*[T](): StaticRadixTree[T] = - result.nodes.add(StaticRadixNode[T](leaf: false)) - -func newRadixTree*[T](): RadixNode[T] = - new(result) - -func toRadixTree*[T](table: Table[string, T]): RadixNode[T] = - result = newRadixTree[T]() - for k, v in table: - result[k] = v - -# PairSeq Insert: theoretically this should only be called when there's no -# conflicts... TODO: so we should be able to just compare the first char? -# probably a bad idea... -proc `[]=`(pairseq: var StaticRadixPairSeq, k: string, v: int) = - var i = 0 - while i < pairseq.len: - if pairseq[i].k == k: - pairseq[i].v = v - return - inc i - - pairseq.add((k: k, v: v)) - -proc `[]=`[T](node: RadixNode[T], k: string, n: RadixNode[T]) = - var i = 0 - assert(k.len > 0) - while i < node.children.len: - if node.children[i].k == k: - node.children[i].v = n - return - inc i - - node.children.add((k: k, v: n)) - -# PairSeq Lookup: since we're sure k is in pairseq, return the first match. -func `[]`(pairseq: StaticRadixPairSeq, k: string): int = - var i = 0 - while i < pairseq.len: - if pairseq[i].k[0] == k[0]: - return pairseq[i].v - inc i - - return -1 - -func `[]`[T](node: RadixNode[T], k: string): RadixNode[T] = - var i = 0 - while i < node.children.len: - if node.children[i].k[0] == k[0]: - return node.children[i].v - inc i - - return nil - -# getOrDefault: we have to compare the entire string but if it doesn't match -# exactly we can just return default. -func getOrDefault(pairseq: StaticRadixPairSeq, k: string, default: int): int = - var i = 0 - while i < pairseq.len: - if pairseq[i].k[0] == k[0]: - if k.len != pairseq[i].k.len: - return default - var j = 1 - while j < k.len: - if pairseq[i].k[j] != k[j]: - return default - inc j - return pairseq[i].v - inc i - return default - -func getOrDefault[T](node: RadixNode[T], k: string, default: RadixNode[T]): RadixNode[T] = - var i = 0 - while i < node.children.len: - if node.children[i].k[0] == k[0]: - if k.len != node.children[i].k.len: - debugecho "defa: ", k, " ", node.children[i].k - return default - var j = 1 - while j < k.len: - if node.children[i].k[j] != k[j]: - return default - inc j - return node.children[i].v - inc i - return default - -func getOrDefault[T](node: RadixNode[T], k: string, default: int): int = - var i = 0 - while i < node.children.len: - if node.children[i].k[0] == k[0]: - if k.len != node.children[i].k.len: - return default - var j = 1 - while j < k.len: - if node.children[i].k[j] != k[j]: - return default - inc j - return i - inc i - return default - -iterator keys(pairseq: StaticRadixPairSeq): string = - var i = 0 - while i < pairseq.len: - yield pairseq[i].k - inc i - -iterator keys*[T](node: RadixNode[T]): string = - var i = 0 - while i < node.children.len: - yield node.children[i].k - inc i - -# AKA `in`. -func contains(pairseq: StaticRadixPairSeq, k: string): bool = - var i = 0 - while i < pairseq.len: - if pairseq[i].k[0] == k[0]: - if k.len != pairseq[i].k.len: - return false - var j = 1 - while j < k.len: - if pairseq[i].k[j] != k[j]: - return false - inc j - return true - inc i - return false - -func contains[T](node: RadixNode[T], k: string): bool = - var i = 0 - while i < node.children.len: - if node.children[i].k[0] == k[0]: - if k.len != node.children[i].k.len: - return false - var j = 1 - while j < k.len: - if node.children[i].k[j] != k[j]: - return false - inc j - return true - inc i - return false - -# Delete proc: again we should be able to check for first char only... TODO? -proc del(pairseq: var StaticRadixPairSeq, k: string) = - var i = 0 - while i < pairseq.len: - if pairseq[i].k == k: - pairseq.del(i) - return - inc i - -proc add[T](node: RadixNode[T], k: string, v: T) = - node.children.add((k, RadixNode[T](leaf: true, value: v))) - -proc add[T](node: RadixNode[T], k: string) = - node.children.add((k, RadixNode[T](leaf: false))) - -# Insert: this is ugly and I'm not quite sure about what it does at all. Oh -# well. -proc `[]=`*[T](tree: var StaticRadixTree[T], key: string, value: T) = - var n = 0 - var p = 0 - var i = 0 - var j = 0 - var s = "" - var t = "" - var nodeKey = "" - # find last matching node - while i < key.len: - s &= key[i] - inc i - if s in tree.nodes[n].children: - p = n - n = tree.nodes[n].children[s] - t &= s - j = i - nodeKey = s - s = "" - - for k in tree.nodes[n].children.keys: - if s.len > 0 and k[0] == s[0]: - p = n - n = tree.nodes[n].children[k] - t &= k - nodeKey = k - break - - # if first node, just add normally - if n == 0: - tree.nodes.add(StaticRadixNode[T](leaf: true, value: value)) - tree.nodes[n].children[key] = int(tree.nodes.len - 1) - else: - i = 0 - var conflict = false - # compare new key with the one we found so far - while i < t.len and i < key.len: - if key[i] == t[i]: - inc i - else: - conflict = true - break - - if conflict: - # conflict somewhere, so: - # * add new non-leaf to parent - # * add old to non-leaf - # * add new to non-leaf - # * remove old from parent - assert(i != 0) - - tree.nodes[p].children[key.substr(j, i - 1)] = int(tree.nodes.len) - tree.nodes.add(StaticRadixNode[T](leaf: false)) - tree.nodes[^1].children[t.substr(i)] = n - tree.nodes[^1].children[key.substr(i)] = int(tree.nodes.len) - tree.nodes.add(StaticRadixNode[T](leaf: true, value: value)) - tree.nodes[p].children.del(nodeKey) - else: # new is either substr of old or old is substr of new - # new matches a node, so replace - if key.len == t.len: - let children = tree.nodes[n].children - tree.nodes[n] = StaticRadixNode[T](leaf: true, value: value) - tree.nodes[n].children = children - elif i == j: - # new is longer than the old, so add child to old - tree.nodes[n].children[key.substr(i)] = int(tree.nodes.len) - tree.nodes.add(StaticRadixNode[T](leaf: true, value: value)) - elif i > 0: - # new is shorter than old, so: - # * add new to parent - # * add old to new - # * remove old from parent - tree.nodes[p].children[key.substr(j, i - 1)] = int(tree.nodes.len) - tree.nodes.add(StaticRadixNode[T](leaf: true, value: value)) - tree.nodes[^1].children[t.substr(i)] = n - tree.nodes[p].children.del(nodeKey) - -# Non-static insert, for extra fun - and code duplication :( -proc `[]=`*[T](tree: RadixNode[T], key: string, value: T) = - var n = tree - var p: RadixNode[T] = nil - var i = 0 - var j = 0 - var k = 0 - var s = "" - var t = "" - var l = 0 - # find last matching node - while i < key.len: - s &= key[i] - inc i - let pk = n.getOrDefault(s, -1) - if pk != -1: - k = pk - p = n - n = n.children[k].v - t &= s - j = i - s = "" - - l = 0 - for ki in n.keys: - if s.len > 0 and ki[0] == s[0]: - p = n - n = n[ki] - t &= ki - k = l - break - inc l - - # TODO: this below could be a better algorithm for what we do above - # but I'm kinda scared of touching it - #n = tree - #i = 0 - #j = 0 - #k = 0 - #t = "" - #p = nil - - #var conflict = false - #while i < key.len: - # k = 0 - # for pk in n.keys: - # if pk[0] == key[i]: - # var l = 0 - # while l < pk.len and i + l < key.len: - # if pk[l] != key[i + l]: - # conflict = true - # break - # inc l - # if not conflict: - # p = n - # n = n.children[k].v - # t &= pk - # i += l - # j = i - # break - # inc k - # inc i - - - # if first node, just add normally - if n == tree: - tree.add(key, value) - else: - i = 0 - var conflict = false - # compare new key with the one we found so far - while i < t.len and i < key.len: - if key[i] == t[i]: - inc i - else: - conflict = true - break - - if conflict: - # conflict somewhere, so: - # * add new non-leaf to parent - # * add old to non-leaf - # * add new to non-leaf - # * remove old from parent - debugecho "conflict: ", i, " ", j, " ", t, " ", key, ": ", key.substr(j, i - 1) - p[key.substr(j, i - 1)] = RadixNode[T](leaf: false) - p.children[^1].v[t.substr(i)] = n - p.children[^1].v[key.substr(i)] = RadixNode[T](leaf: true, value: value) - p.children.del(k) - else: # new is either substr of old or old is substr of new - # new matches a node, so replace - if key.len == t.len: - p.children[k].v = RadixNode[T](leaf: true, value: value, children: n.children) - elif key.len > t.len: - # new is longer than the old, so add child to old - debugecho "longer: ", i, " ", j, " ", t, " ", key, ": ", key.substr(i) - n[key.substr(i)] = RadixNode[T](leaf: true, value: value) - else: - assert(i > 0) - # new is shorter than old, so: - # * add new to parent - # * add old to new - # * remove old from parent - debugecho "shorter: ", i, " ", j, " ", t, " ", key, ": ", key.substr(i) - p[key.substr(j, i - 1)] = RadixNode[T](leaf: true, value: value) - p.children[^1].v[t.substr(i)] = n - p.children.del(k) - -func `{}`*[T](tree: StaticRadixTree[T], key: string, at: int = 0): int = - return tree.nodes[at].children.getOrDefault(key, at) - -func `{}`*[T](tree: RadixNode[T], key: string, at: RadixNode[T] = tree): RadixNode[T] = - return tree.getOrDefault(key, at) - -func hasPrefix*[T](tree: StaticRadixTree[T], prefix: string, at: int = 0): bool = - var n = at - var i = 0 - var j = 0 - var s = "" - while i < prefix.len: - s &= prefix[i] - inc i - if s in tree.nodes[n].children: - n = tree.nodes[n].children[s] - j = i - - if j == prefix.len: - return true - - for k in tree.nodes[n].children.keys: - if prefix.len - j < k.len and k[0] == prefix[j]: - i = 1 - inc j - while j < prefix.len: - inc i - inc j - if k[i] != k[j]: - return false - return true - - return false - -func hasPrefix*[T](tree: RadixNode[T], prefix: string, at: RadixNode[T] = tree): bool = - var n = at - var i = 0 - var j = 0 - var s = "" - while i < prefix.len: - s &= prefix[i] - inc i - if s in n: - n = n[s] - j = i - - if j == prefix.len: - return true - - for k in n.keys: - if prefix.len - j < k.len and k[0] == prefix[j]: - i = 1 - inc j - while j < prefix.len: - inc i - inc j - if k[i] != k[j]: - return false - return true - - return false diff --git a/src/style.nim b/src/style.nim deleted file mode 100644 index e5bee647..00000000 --- a/src/style.nim +++ /dev/null @@ -1,74 +0,0 @@ -import streams -import unicode - -import enums -import cssparser -import twtio - -type - CSS2Properties* = ref object - rawtext*: string - fmttext*: seq[string] - x*: int - y*: int - ex*: int - ey*: int - width*: int - height*: int - hidden*: bool - before*: CSS2Properties - after*: CSS2Properties - margintop*: int - marginbottom*: int - marginleft*: int - marginright*: int - margin*: int - centered*: bool - display*: DisplayType - bold*: bool - italic*: bool - underscore*: bool - islink*: bool - selected*: bool - indent*: int - - CSSRect* = object - x1*: int - y1*: int - x2*: int - y2*: int - - CSSBox* = ref object - display*: DisplayType - x*: int - y*: int - innerEdge*: CSSRect - paddingEdge*: CSSRect - borderEdge*: CSSRect - marginEdge*: CSSRect - parent*: CSSBox - color*: CSSColor - margintop*: int - marginbottom*: int - marginleft*: int - marginright*: int - margin*: int - -proc applyProperties*(box: var CSSBox, props: string) = - var decls = parseCSSListOfDeclarations(newStringStream(props)) - - for item in decls: - if item of CSSDeclaration: - let d = CSSDeclaration(item) - case $d.name - of "color": - if d.value.len > 0 and d.value[0] of CSSToken and - CSSToken(d.value[0]).tokenType == CSS_HASH_TOKEN: - box.color = toColor(CSSToken(d.value[0]).value) - of "margin-top": - if d.value.len > 0 and d.value[0] of CSSToken: - if CSSToken(d.value[0]).tokenType == CSS_PERCENTAGE_TOKEN: - discard - #box.margintop = CSSToken(d.value[0]).nvalue #TODO represent percentages - else: - printc(d) diff --git a/src/termattrs.nim b/src/termattrs.nim deleted file mode 100644 index d49800ae..00000000 --- a/src/termattrs.nim +++ /dev/null @@ -1,11 +0,0 @@ -import terminal - -type - TermAttributes* = object - termWidth*: int - termHeight*: int - -proc getTermAttributes*(): TermAttributes = - let attrs = TermAttributes(termWidth: terminalWidth(), - termHeight: terminalHeight()) - return attrs diff --git a/src/twtio.nim b/src/twtio.nim deleted file mode 100644 index 20c8c527..00000000 --- a/src/twtio.nim +++ /dev/null @@ -1,263 +0,0 @@ -import terminal -import tables -import unicode -import strutils - -import twtstr -import config -import radixtree - -template print*(s: varargs[string, `$`]) = - for x in s: - stdout.write(x) - -template printesc*(s: string) = - for r in s.runes: - if r.isControlChar(): - stdout.write(('^' & $($r)[0].getControlLetter()) - .ansiFgColor(fgBlue).ansiStyle(styleBright).ansiReset()) - else: - stdout.write($r) - -template eprint*(s: varargs[string, `$`]) = {.cast(noSideEffect).}: - var a = false - for x in s: - if not a: - a = true - else: - stderr.write(' ') - stderr.write(x) - stderr.write('\n') - -proc termGoto*(x: int, y: int) = - setCursorPos(stdout, x, y) - -proc getNormalAction*(s: string): TwtAction = - if normalActionRemap.hasKey(s): - return normalActionRemap[s] - return NO_ACTION - -proc getLinedAction*(s: string): TwtAction = - if linedActionRemap.hasKey(s): - return linedActionRemap[s] - return NO_ACTION - -proc readLine*(prompt: string, current: var string, termwidth: int): bool = - let maxlen = termwidth - prompt.len - let promptwidth = prompt.width() - var news = current.toRunes() - var s = "" - var feedNext = false - var escNext = false - var comp = false - var compi = composeRemap - var compa = 0 - var comps = "" - var cursor = news.len - var shift = 0 - var redraw = true - printesc(prompt) - while true: - if redraw: - var displen = maxlen - 1 - if cursor >= shift: - while news.substr(shift, cursor).width() > maxlen - 1: - shift += 1 - while news.substr(shift, shift + displen).width() > maxlen - 1: - displen -= 1 - - shift = max(0, min(cursor - 1, shift)) - - print('\r') - cursorForward(promptwidth) - let os = $news.substr(shift, shift + displen) - printesc(os) - print(' '.repeat(max(displen - os.width(), 0))) - - print('\r') - cursorForward(promptwidth + news.substr(shift, cursor).width()) - else: - redraw = true - - if not feedNext: - s = "" - else: - feedNext = false - - let c = getch() - s &= c - - var action = getLinedAction(s) - if escNext: - action = NO_ACTION - case action - of ACTION_LINED_CANCEL: - return false - of ACTION_LINED_SUBMIT: - current = $news - return true - of ACTION_LINED_BACKSPACE: - if cursor > 0: - news = news.substr(0, cursor - 1) & news.substr(cursor) - dec cursor - else: - redraw = false - of ACTION_LINED_DELETE: - if cursor > 0 and cursor < news.len: - news = news.substr(0, cursor) & news.substr(cursor + 1) - else: - redraw = false - of ACTION_LINED_ESC: - escNext = true - of ACTION_LINED_CLEAR: - news = news.substr(cursor) - cursor = 0 - of ACTION_LINED_KILL: - if cursor > 0: - news = news.substr(0, cursor) - else: - redraw = false - of ACTION_LINED_BACK: - if cursor > 0: - dec cursor - if cursor > shift: - redraw = false - cursorBackward(news[cursor].width()) - else: - redraw = false - of ACTION_LINED_FORWARD: - if cursor < news.len: - inc cursor - if news.substr(shift, cursor).width() < maxlen: - redraw = false - var n = 1 - if news.len > cursor: - n = news[cursor].width() - cursorForward(n) - else: - redraw = false - of ACTION_LINED_PREV_WORD: - let oc = cursor - while cursor > 0: - dec cursor - if news[cursor].breaksWord(): - break - if cursor == oc: - redraw = false - elif cursor > shift: - cursorBackward(news.substr(cursor, oc).width()) - redraw = false - of ACTION_LINED_NEXT_WORD: - let oc = cursor - while cursor < news.len: - inc cursor - if cursor < news.len: - if news[cursor].breaksWord(): - break - if cursor == oc: - redraw = false - else: - let dw = news.substr(oc, cursor).width() - if oc + dw - shift < maxlen: - cursorForward(dw) - redraw = false - of ACTION_LINED_KILL_WORD: - var chars = 0 - - while cursor > chars: - inc chars - if news[cursor - chars].breaksWord(): - break - if chars > 0: - let w = news.substr(cursor - chars, cursor).width() - news = news.substr(0, cursor - chars) & news.substr(cursor) - cursor -= chars - if cursor > shift: - redraw = false - cursorBackward(w) - print(' '.repeat(w)) - cursorBackward(w) - else: - redraw = false - of ACTION_LINED_BEGIN: - if cursor > 0: - if shift == 0: - redraw = false - cursorBackward(news.substr(0, cursor).width()) - cursor = 0 - else: - redraw = false - of ACTION_LINED_END: - if cursor < news.len: - if news.substr(shift, news.len).width() < maxlen: - redraw = false - cursorForward(news.substr(shift, news.len).width()) - cursor = news.len - else: - redraw = false - of ACTION_LINED_COMPOSE_TOGGLE: - comp = not comp - compi = composeRemap - compa = 0 - comps = "" - redraw = false - of ACTION_FEED_NEXT: - feedNext = true - redraw = false - elif comp: - comps &= c - let n = composeRemap{comps, compi} - if n != compi: - compi = n - compa += comps.len - comps = "" - if composeRemap.hasPrefix(comps, compi) and n.children.len > 0: - feedNext = true - else: - var cs = "" - if compi.leaf: - cs = compi.value - else: - cs = s.substr(0, compa - 1) - comps = s.substr(compa) - if comps.len > 0 and composeRemap.hasPrefix(comps): - compa = comps.len - compi = composeRemap{comps} - s = comps - comps = "" - feedNext = true - else: - cs &= comps - compa = 0 - compi = composeRemap - comps = "" - - news = news.substr(0, cursor) & cs.toRunes() & news.substr(cursor) - cursor += cs.runeLen() - elif validateUtf8(s) == -1: - var cs = "" - for c in s: - if not c.isControlChar(): - cs &= c - elif escNext: - cs &= c - escNext = false - escNext = false - if cs.len == 0: - redraw = false - continue - - let csr = cs.toRunes() - - if cursor >= news.len and - news.substr(shift, cursor).width() + csr.width() < maxlen - 1: - cursor += csr.len - news &= csr - print(csr) - redraw = false - else: - news = news.substr(0, cursor) & csr & news.substr(cursor) - cursor += csr.len - else: - feedNext = true - redraw = false diff --git a/src/twtstr.nim b/src/twtstr.nim deleted file mode 100644 index aa2cf2c7..00000000 --- a/src/twtstr.nim +++ /dev/null @@ -1,469 +0,0 @@ -import terminal -import strutils -import unicode - -func ansiStyle*(str: string, style: Style): seq[string] = - result &= ansiStyleCode(style) - result &= str - -func ansiFgColor*(str: string, color: ForegroundColor): seq[string] = - result &= ansiForegroundColorCode(color) - result &= str - -func ansiReset*(str: string): seq[string] = - result &= str - result &= ansiResetCode - -func ansiStyle*(str: seq[string], style: Style): seq[string] = - return ansiStyleCode(style) & str - -func ansiFgColor*(str: seq[string], color: ForegroundColor): seq[string] = - return ansiForegroundColorCode(color) & str - -func ansiReset*(str: seq[string]): seq[string] = - return str & ansiResetCode - -func maxString*(str: string, max: int): string = - if max < str.runeLen(): - return str.runeSubstr(0, max - 2) & "$" - return str - -func fitValueToSize*(str: string, size: int): string = - if str.runeLen < size: - return str & ' '.repeat(size - str.runeLen) - return str.maxString(size) - -func buttonFmt*(str: string): seq[string] = - return "[".ansiFgColor(fgRed) & str.ansiFgColor(fgRed).ansiReset() & "]".ansiFgColor(fgRed).ansiReset() - -func buttonFmt*(str: seq[string]): seq[string] = - return "[".ansiFgColor(fgRed) & str.ansiFgColor(fgRed).ansiReset() & "]".ansiFgColor(fgRed).ansiReset() - -func buttonRaw*(str: string): string = - return "[" & str & "]" - -func remove*(str: string, c: string): string = - let rem = c.toRunes()[0] - for rune in str.runes: - if rem != rune: - result &= $rune - -func isWhitespace*(c: char): bool = - case c - of ' ', '\n', '\r', '\t', '\f': return true - else: return false - -func isControlChar*(c: char): bool = - case c - of chr(0x00)..chr(0x1F): return true - of chr(0x7F): return true - else: return false - -func isControlChar*(r: Rune): bool = - case r - of Rune(0x00)..Rune(0x1F): return true - of Rune(0x7F): return true - else: return false - -func genControlCharMap*(): string = - for c in low(char)..high(char): - if c >= 'a': - result &= char(int(c) - int('a') + 1) - elif c == '?': - result &= char(127) - else: - result &= char(0) - -const controlCharMap = genControlCharMap() - -func getControlChar*(c: char): char = - return controlCharMap[int(c)] - -func getControlLetter*(c: char): char = - if int(c) <= 0x1F: - return char(int(c) + int('A') - 1) - elif c == '\x7F': - return '?' - assert(false) - -func findChar*(str: string, c: char, start: int = 0): int = - var i = start - while i < str.len: - if str[i] == c: - return i - inc i - return -1 - -func findChar*(str: string, c: Rune, start: int = 0): int = - var i = start - var n = i - while i < str.runeLen(): - var r: Rune - fastRuneAt(str, n, r) - if r == c: - return i - i = n - return -1 - -func getLowerChars*(): string = - result = "" - for i in 0..255: - if chr(i) >= 'A' and chr(i) <= 'Z': - result &= chr(i + 32) - else: - result &= chr(i) - -const lowerChars = getLowerChars() - -func tolower*(c: char): char = - return lowerChars[int(c)] - -const breakWord = [ - Rune('\n'), Rune('/'), Rune('\\'), Rune(' '), Rune('&'), Rune('='), - Rune('?'), Rune('.'), Rune(';') -] - -func genHexCharMap(): seq[int] = - for i in 0..255: - case chr(i) - of '0'..'9': result &= i - ord('0') - of 'a'..'f': result &= i - ord('a') + 10 - of 'A'..'F': result &= i - ord('A') + 10 - else: result &= -1 - -func genDecCharMap(): seq[int] = - for i in 0..255: - case chr(i) - of '0'..'9': result &= i - ord('0') - else: result &= -1 - -const hexCharMap = genHexCharMap() -const decCharMap = genDecCharMap() - -func hexValue*(c: char): int = - return hexCharMap[int(c)] - -func decValue*(c: char): int = - return decCharMap[int(c)] - -func isAscii*(r: Rune): bool = - return int(r) <= int(high(char)) - -func hexValue*(r: Rune): int = - if isAscii(r): - return hexValue(char(r)) - return -1 - -func decValue*(r: Rune): int = - if isAscii(r): - return decValue(char(r)) - return -1 - -func toAsciiLower*(s: seq[Rune]): string = - for r in s: - if isAscii(r): - result &= lowerChars[int(r)] - -func breaksWord*(r: Rune): bool = - return r in breakWord - -func isAlphaAscii*(r: Rune): bool = - return isAscii(r) and isAlphaAscii(char(r)) - -func isDigitAscii*(r: Rune): bool = - return isAscii(r) and isDigit(char(r)) - -func substr*(s: seq[Rune], i: int, j: int): seq[Rune] = - if s.len == 0: - return @[] - return s[min(high(s), i)..min(high(s), j - 1)] - -func substr*(s: seq[Rune], i: int): seq[Rune] = - if i >= high(s) or s.len == 0: - return @[] - return s[min(high(s), i)..high(s)] - -#Measure length of rune. From https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c - -#auxiliary function for binary search in interval table -func bisearch(ucs: Rune, table: openarray[(int, int)]): bool = - var max = table.high - var min = 0 - var mid: int - - if int(ucs) < table[0][0] or int(ucs) > table[max][1]: - return false - - while max >= min: - mid = (min + max) div 2 - if int(ucs) > table[mid][1]: - min = mid + 1 - elif int(ucs) < table[mid][0]: - max = mid - 1 - else: - return true - return false - -#The following two functions define the column width of an ISO 10646 -#character as follows: -# -# - The null character (U+0000) has a column width of 0. -# -# - Other C0/C1 control characters and DEL will lead to a return value of 2 -# (changed from 0 b/c we normally display control chars like ^H - TODO?). -# -# - Non-spacing and enclosing combining characters (general category code Mn -# or Me in the Unicode database) have a column width of 0. -# -# - SOFT HYPHEN (U+00AD) has a column width of 1. -# -# - Other format characters (general category code Cf in the Unicode -# database) and ZERO WIDTH SPACE (U+200B) have a column width of 0. -# -# - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) have a -# column width of 0. -# -# - Spacing characters in the East Asian Wide (W) or East Asian Full-width -# (F) category as defined in Unicode Technical Report #11 have a column -# width of 2. -# -# - All remaining characters (including all printable ISO 8859-1 and WGL4 -# characters, Unicode control characters, etc.) have a column width of 1. -# -#This implementation assumes that wchar_t characters are encoded -#in ISO 10646. -# - -# sorted list of non-overlapping intervals of non-spacing characters -# generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" -const combining = [ - ( 0x0300, 0x036F ), ( 0x0483, 0x0486 ), ( 0x0488, 0x0489 ), - ( 0x0591, 0x05BD ), ( 0x05BF, 0x05BF ), ( 0x05C1, 0x05C2 ), - ( 0x05C4, 0x05C5 ), ( 0x05C7, 0x05C7 ), ( 0x0600, 0x0603 ), - ( 0x0610, 0x0615 ), ( 0x064B, 0x065E ), ( 0x0670, 0x0670 ), - ( 0x06D6, 0x06E4 ), ( 0x06E7, 0x06E8 ), ( 0x06EA, 0x06ED ), - ( 0x070F, 0x070F ), ( 0x0711, 0x0711 ), ( 0x0730, 0x074A ), - ( 0x07A6, 0x07B0 ), ( 0x07EB, 0x07F3 ), ( 0x0901, 0x0902 ), - ( 0x093C, 0x093C ), ( 0x0941, 0x0948 ), ( 0x094D, 0x094D ), - ( 0x0951, 0x0954 ), ( 0x0962, 0x0963 ), ( 0x0981, 0x0981 ), - ( 0x09BC, 0x09BC ), ( 0x09C1, 0x09C4 ), ( 0x09CD, 0x09CD ), - ( 0x09E2, 0x09E3 ), ( 0x0A01, 0x0A02 ), ( 0x0A3C, 0x0A3C ), - ( 0x0A41, 0x0A42 ), ( 0x0A47, 0x0A48 ), ( 0x0A4B, 0x0A4D ), - ( 0x0A70, 0x0A71 ), ( 0x0A81, 0x0A82 ), ( 0x0ABC, 0x0ABC ), - ( 0x0AC1, 0x0AC5 ), ( 0x0AC7, 0x0AC8 ), ( 0x0ACD, 0x0ACD ), - ( 0x0AE2, 0x0AE3 ), ( 0x0B01, 0x0B01 ), ( 0x0B3C, 0x0B3C ), - ( 0x0B3F, 0x0B3F ), ( 0x0B41, 0x0B43 ), ( 0x0B4D, 0x0B4D ), - ( 0x0B56, 0x0B56 ), ( 0x0B82, 0x0B82 ), ( 0x0BC0, 0x0BC0 ), - ( 0x0BCD, 0x0BCD ), ( 0x0C3E, 0x0C40 ), ( 0x0C46, 0x0C48 ), - ( 0x0C4A, 0x0C4D ), ( 0x0C55, 0x0C56 ), ( 0x0CBC, 0x0CBC ), - ( 0x0CBF, 0x0CBF ), ( 0x0CC6, 0x0CC6 ), ( 0x0CCC, 0x0CCD ), - ( 0x0CE2, 0x0CE3 ), ( 0x0D41, 0x0D43 ), ( 0x0D4D, 0x0D4D ), - ( 0x0DCA, 0x0DCA ), ( 0x0DD2, 0x0DD4 ), ( 0x0DD6, 0x0DD6 ), - ( 0x0E31, 0x0E31 ), ( 0x0E34, 0x0E3A ), ( 0x0E47, 0x0E4E ), - ( 0x0EB1, 0x0EB1 ), ( 0x0EB4, 0x0EB9 ), ( 0x0EBB, 0x0EBC ), - ( 0x0EC8, 0x0ECD ), ( 0x0F18, 0x0F19 ), ( 0x0F35, 0x0F35 ), - ( 0x0F37, 0x0F37 ), ( 0x0F39, 0x0F39 ), ( 0x0F71, 0x0F7E ), - ( 0x0F80, 0x0F84 ), ( 0x0F86, 0x0F87 ), ( 0x0F90, 0x0F97 ), - ( 0x0F99, 0x0FBC ), ( 0x0FC6, 0x0FC6 ), ( 0x102D, 0x1030 ), - ( 0x1032, 0x1032 ), ( 0x1036, 0x1037 ), ( 0x1039, 0x1039 ), - ( 0x1058, 0x1059 ), ( 0x1160, 0x11FF ), ( 0x135F, 0x135F ), - ( 0x1712, 0x1714 ), ( 0x1732, 0x1734 ), ( 0x1752, 0x1753 ), - ( 0x1772, 0x1773 ), ( 0x17B4, 0x17B5 ), ( 0x17B7, 0x17BD ), - ( 0x17C6, 0x17C6 ), ( 0x17C9, 0x17D3 ), ( 0x17DD, 0x17DD ), - ( 0x180B, 0x180D ), ( 0x18A9, 0x18A9 ), ( 0x1920, 0x1922 ), - ( 0x1927, 0x1928 ), ( 0x1932, 0x1932 ), ( 0x1939, 0x193B ), - ( 0x1A17, 0x1A18 ), ( 0x1B00, 0x1B03 ), ( 0x1B34, 0x1B34 ), - ( 0x1B36, 0x1B3A ), ( 0x1B3C, 0x1B3C ), ( 0x1B42, 0x1B42 ), - ( 0x1B6B, 0x1B73 ), ( 0x1DC0, 0x1DCA ), ( 0x1DFE, 0x1DFF ), - ( 0x200B, 0x200F ), ( 0x202A, 0x202E ), ( 0x2060, 0x2063 ), - ( 0x206A, 0x206F ), ( 0x20D0, 0x20EF ), ( 0x302A, 0x302F ), - ( 0x3099, 0x309A ), ( 0xA806, 0xA806 ), ( 0xA80B, 0xA80B ), - ( 0xA825, 0xA826 ), ( 0xFB1E, 0xFB1E ), ( 0xFE00, 0xFE0F ), - ( 0xFE20, 0xFE23 ), ( 0xFEFF, 0xFEFF ), ( 0xFFF9, 0xFFFB ), - ( 0x10A01, 0x10A03 ), ( 0x10A05, 0x10A06 ), ( 0x10A0C, 0x10A0F ), - ( 0x10A38, 0x10A3A ), ( 0x10A3F, 0x10A3F ), ( 0x1D167, 0x1D169 ), - ( 0x1D173, 0x1D182 ), ( 0x1D185, 0x1D18B ), ( 0x1D1AA, 0x1D1AD ), - ( 0x1D242, 0x1D244 ), ( 0xE0001, 0xE0001 ), ( 0xE0020, 0xE007F ), - ( 0xE0100, 0xE01EF ) -] - -func is_dwidth(r: Rune): bool = - let ucs = int(r) - return (ucs >= 0x1100 and - (ucs <= 0x115f or # Hangul Jamo init. consonants - ucs == 0x2329 or ucs == 0x232a or - (ucs >= 0x2e80 and ucs <= 0xa4cf and - ucs != 0x303f) or # CJK ... Yi - (ucs >= 0xac00 and ucs <= 0xd7a3) or # Hangul Syllables - (ucs >= 0xf900 and ucs <= 0xfaff) or # CJK Compatibility Ideographs - (ucs >= 0xfe10 and ucs <= 0xfe19) or # Vertical forms - (ucs >= 0xfe30 and ucs <= 0xfe6f) or # CJK Compatibility Forms - (ucs >= 0xff00 and ucs <= 0xff60) or # Fullwidth Forms - (ucs >= 0xffe0 and ucs <= 0xffe6) or - (ucs >= 0x20000 and ucs <= 0x2fffd) or - (ucs >= 0x30000 and ucs <= 0x3fffd))) - -func makewidthtable(): array[0..0x10FFFF, byte] = - for r in low(char)..high(char): - if r.isControlChar(): - result[int(r)] = 2 - else: - result[int(r)] = 1 - - var i = 0 - var next_combining = combining[i] - for ucs in 256..0x10FFFF: - if ucs >= next_combining[0]: - if ucs <= next_combining[1]: - result[ucs] = 0 - continue - elif i + 1 < combining.len: - inc i - next_combining = combining[i] - - if Rune(ucs).is_dwidth(): - result[ucs] = 2 - else: - result[ucs] = 1 - - for range in combining: - for r in range[0]..range[1]: - result[r] = 0 - - -# lowmem: use slow binary search etc method -when defined(lowmem): - func width*(r: Rune): int = - # binary search in table of non-spacing characters - if bisearch(r, combining): - return 0 - - if r.isControlChar(): - return 2 - - # if we arrive here, ucs is not a combining or C0/C1 control character - - if r.is_dwidth(): - return 2 - return 1 - - func width*(r: Rune): int = - return int(width_table[int(r)]) -# small: store lookup table in memory on startup -elif defined(small): - let width_table = makewidthtable() - func width*(r: Rune): int = - {.cast(noSideEffect).}: - return int(width_table[int(r)]) -# release: store lookup table in executable -else: - const width_table = makewidthtable() - func width*(r: Rune): int = - return int(width_table[int(r)]) - -func width*(s: string): int = - for r in s.runes(): - result += width(r) - -func width*(s: seq[Rune]): int = - for r in s: - result += width(r) - -# sorted list of non-overlapping intervals of East Asian Ambiguous -# characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" - -const ambiguous = [ - ( 0x00A1, 0x00A1 ), ( 0x00A4, 0x00A4 ), ( 0x00A7, 0x00A8 ), - ( 0x00AA, 0x00AA ), ( 0x00AE, 0x00AE ), ( 0x00B0, 0x00B4 ), - ( 0x00B6, 0x00BA ), ( 0x00BC, 0x00BF ), ( 0x00C6, 0x00C6 ), - ( 0x00D0, 0x00D0 ), ( 0x00D7, 0x00D8 ), ( 0x00DE, 0x00E1 ), - ( 0x00E6, 0x00E6 ), ( 0x00E8, 0x00EA ), ( 0x00EC, 0x00ED ), - ( 0x00F0, 0x00F0 ), ( 0x00F2, 0x00F3 ), ( 0x00F7, 0x00FA ), - ( 0x00FC, 0x00FC ), ( 0x00FE, 0x00FE ), ( 0x0101, 0x0101 ), - ( 0x0111, 0x0111 ), ( 0x0113, 0x0113 ), ( 0x011B, 0x011B ), - ( 0x0126, 0x0127 ), ( 0x012B, 0x012B ), ( 0x0131, 0x0133 ), - ( 0x0138, 0x0138 ), ( 0x013F, 0x0142 ), ( 0x0144, 0x0144 ), - ( 0x0148, 0x014B ), ( 0x014D, 0x014D ), ( 0x0152, 0x0153 ), - ( 0x0166, 0x0167 ), ( 0x016B, 0x016B ), ( 0x01CE, 0x01CE ), - ( 0x01D0, 0x01D0 ), ( 0x01D2, 0x01D2 ), ( 0x01D4, 0x01D4 ), - ( 0x01D6, 0x01D6 ), ( 0x01D8, 0x01D8 ), ( 0x01DA, 0x01DA ), - ( 0x01DC, 0x01DC ), ( 0x0251, 0x0251 ), ( 0x0261, 0x0261 ), - ( 0x02C4, 0x02C4 ), ( 0x02C7, 0x02C7 ), ( 0x02C9, 0x02CB ), - ( 0x02CD, 0x02CD ), ( 0x02D0, 0x02D0 ), ( 0x02D8, 0x02DB ), - ( 0x02DD, 0x02DD ), ( 0x02DF, 0x02DF ), ( 0x0391, 0x03A1 ), - ( 0x03A3, 0x03A9 ), ( 0x03B1, 0x03C1 ), ( 0x03C3, 0x03C9 ), - ( 0x0401, 0x0401 ), ( 0x0410, 0x044F ), ( 0x0451, 0x0451 ), - ( 0x2010, 0x2010 ), ( 0x2013, 0x2016 ), ( 0x2018, 0x2019 ), - ( 0x201C, 0x201D ), ( 0x2020, 0x2022 ), ( 0x2024, 0x2027 ), - ( 0x2030, 0x2030 ), ( 0x2032, 0x2033 ), ( 0x2035, 0x2035 ), - ( 0x203B, 0x203B ), ( 0x203E, 0x203E ), ( 0x2074, 0x2074 ), - ( 0x207F, 0x207F ), ( 0x2081, 0x2084 ), ( 0x20AC, 0x20AC ), - ( 0x2103, 0x2103 ), ( 0x2105, 0x2105 ), ( 0x2109, 0x2109 ), - ( 0x2113, 0x2113 ), ( 0x2116, 0x2116 ), ( 0x2121, 0x2122 ), - ( 0x2126, 0x2126 ), ( 0x212B, 0x212B ), ( 0x2153, 0x2154 ), - ( 0x215B, 0x215E ), ( 0x2160, 0x216B ), ( 0x2170, 0x2179 ), - ( 0x2190, 0x2199 ), ( 0x21B8, 0x21B9 ), ( 0x21D2, 0x21D2 ), - ( 0x21D4, 0x21D4 ), ( 0x21E7, 0x21E7 ), ( 0x2200, 0x2200 ), - ( 0x2202, 0x2203 ), ( 0x2207, 0x2208 ), ( 0x220B, 0x220B ), - ( 0x220F, 0x220F ), ( 0x2211, 0x2211 ), ( 0x2215, 0x2215 ), - ( 0x221A, 0x221A ), ( 0x221D, 0x2220 ), ( 0x2223, 0x2223 ), - ( 0x2225, 0x2225 ), ( 0x2227, 0x222C ), ( 0x222E, 0x222E ), - ( 0x2234, 0x2237 ), ( 0x223C, 0x223D ), ( 0x2248, 0x2248 ), - ( 0x224C, 0x224C ), ( 0x2252, 0x2252 ), ( 0x2260, 0x2261 ), - ( 0x2264, 0x2267 ), ( 0x226A, 0x226B ), ( 0x226E, 0x226F ), - ( 0x2282, 0x2283 ), ( 0x2286, 0x2287 ), ( 0x2295, 0x2295 ), - ( 0x2299, 0x2299 ), ( 0x22A5, 0x22A5 ), ( 0x22BF, 0x22BF ), - ( 0x2312, 0x2312 ), ( 0x2460, 0x24E9 ), ( 0x24EB, 0x254B ), - ( 0x2550, 0x2573 ), ( 0x2580, 0x258F ), ( 0x2592, 0x2595 ), - ( 0x25A0, 0x25A1 ), ( 0x25A3, 0x25A9 ), ( 0x25B2, 0x25B3 ), - ( 0x25B6, 0x25B7 ), ( 0x25BC, 0x25BD ), ( 0x25C0, 0x25C1 ), - ( 0x25C6, 0x25C8 ), ( 0x25CB, 0x25CB ), ( 0x25CE, 0x25D1 ), - ( 0x25E2, 0x25E5 ), ( 0x25EF, 0x25EF ), ( 0x2605, 0x2606 ), - ( 0x2609, 0x2609 ), ( 0x260E, 0x260F ), ( 0x2614, 0x2615 ), - ( 0x261C, 0x261C ), ( 0x261E, 0x261E ), ( 0x2640, 0x2640 ), - ( 0x2642, 0x2642 ), ( 0x2660, 0x2661 ), ( 0x2663, 0x2665 ), - ( 0x2667, 0x266A ), ( 0x266C, 0x266D ), ( 0x266F, 0x266F ), - ( 0x273D, 0x273D ), ( 0x2776, 0x277F ), ( 0xE000, 0xF8FF ), - ( 0xFFFD, 0xFFFD ), ( 0xF0000, 0xFFFFD ), ( 0x100000, 0x10FFFD ) -] - -# -# The following functions are the same as mk_wcwidth() and -# mk_wcswidth(), except that spacing characters in the East Asian -# Ambiguous (A) category as defined in Unicode Technical Report #11 -# have a column width of 2. This variant might be useful for users of -# CJK legacy encodings who want to migrate to UCS without changing -# the traditional terminal character-width behaviour. It is not -# otherwise recommended for general use. -# -# note: seconded, this should only be used if some option was changed (TODO: -# make such an option available) - -func mk_wcwidth_cjk(r: Rune): int = - # binary search in table of non-spacing characters - if bisearch(r, ambiguous): - return 2; - - return r.width(); - -func mk_wcswidth_cjk(s: string): int = - for r in s.runes: - result += mk_wcwidth_cjk(r) - return result - -func skipBlanks*(buf: string, at: int): int = - result = at - while result < buf.len and buf[result].isWhitespace(): - inc result - -iterator split*(s: seq[Rune], sep: Rune): seq[Rune] = - var i = 0 - var prev = 0 - while i < s.len: - if s[i] == sep: - yield s.substr(prev, i) - prev = i - inc i - - if prev < i: - yield s.substr(prev, i) diff --git a/src/types/enums.nim b/src/types/enums.nim new file mode 100644 index 00000000..21b27ab6 --- /dev/null +++ b/src/types/enums.nim @@ -0,0 +1,101 @@ +import tables + +type + NodeType* = enum + UNKNOWN_NODE = 0, + ELEMENT_NODE = 1, + ATTRIBUTE_NODE = 2, + TEXT_NODE = 3, + CDATA_SECTION_NODE = 4, + ENTITY_REFERENCE_NODE = 5, + ENTITY_NODE = 6 + PROCESSING_INSTRUCTION_NODE = 7, + COMMENT_NODE = 8, + DOCUMENT_NODE = 9, + DOCUMENT_TYPE_NODE = 10, + DOCUMENT_FRAGMENT_NODE = 11, + NOTATION_NODE = 12 + + DisplayType* = enum + DISPLAY_INLINE, DISPLAY_BLOCK, DISPLAY_LIST_ITEM, DISPLAY_TABLE_COLUMN, + DISPLAY_INLINE_BLOCK, DISPLAY_NONE + + InputType* = enum + INPUT_UNKNOWN, INPUT_BUTTON, INPUT_CHECKBOX, INPUT_COLOR, INPUT_DATE, + INPUT_DATETIME_LOCAL, INPUT_EMAIL, INPUT_FILE, INPUT_HIDDEN, INPUT_IMAGE, + INPUT_MONTH, INPUT_NUMBER, INPUT_PASSWORD, INPUT_RADIO, INPUT_RANGE, + INPUT_RESET, INPUT_SEARCH, INPUT_SUBMIT, INPUT_TEL, INPUT_TEXT, INPUT_TIME, + INPUT_URL, INPUT_WEEK + + WhitespaceType* = enum + WHITESPACE_UNKNOWN, WHITESPACE_NORMAL, WHITESPACE_NOWRAP, WHITESPACE_PRE, + WHITESPACE_PRE_LINE, WHITESPACE_PRE_WRAP, WHITESPACE_INITIAL, + WHITESPACE_INHERIT + + TagType* = enum + TAG_UNKNOWN, TAG_HTML, TAG_BASE, TAG_HEAD, TAG_LINK, TAG_META, TAG_STYLE, + TAG_TITLE, TAG_BODY, TAG_ADDRESS, TAG_ARTICLE, TAG_ASIDE, TAG_FOOTER, + TAG_HEADER, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, TAG_HGROUP, + TAG_MAIN, TAG_NAV, TAG_SECTION, TAG_BLOCKQUOTE, TAG_DD, TAG_DIV, TAG_DL, + TAG_DT, TAG_FIGCAPTION, TAG_FIGURE, TAG_HR, TAG_LI, TAG_OL, TAG_P, TAG_PRE, + TAG_UL, TAG_A, TAG_ABBR, TAG_B, TAG_BDI, TAG_BDO, TAG_BR, TAG_CITE, + TAG_CODE, TAG_DATA, TAG_DFN, TAG_EM, TAG_I, TAG_KBD, TAG_MARK, TAG_Q, + TAG_RB, TAG_RP, TAG_RT, TAG_RTC, TAG_RUBY, TAG_S, TAG_SAMP, TAG_SMALL, + TAG_SPAN, TAG_STRONG, TAG_SUB, TAG_SUP, TAG_TIME, TAG_U, TAG_VAR, TAG_WBR, + TAG_AREA, TAG_AUDIO, TAG_IMG, TAG_MAP, TAG_TRACK, TAG_VIDEO, + TAG_IFRAME, TAG_OBJECT, TAG_PARAM, TAG_PICTURE, TAG_PORTAL, TAG_SOURCE, + TAG_CANVAS, TAG_NOSCRIPT, TAG_SCRIPT, TAG_DEL, TAG_INS, TAG_CAPTION, + TAG_COL, TAG_COLGROUP, TAG_TABLE, TAG_TBODY, TAG_TD, TAG_TFOOT, TAG_TH, + TAG_THEAD, TAG_TR, TAG_BUTTON, TAG_DATALIST, TAG_FIELDSET, TAG_FORM, + TAG_INPUT, TAG_LABEL, TAG_LEGEND, TAG_METER, TAG_OPTGROUP, TAG_OPTION, + TAG_OUTPUT, TAG_PROGRESS, TAG_SELECT, TAG_TEXTAREA, TAG_DETAILS, + TAG_DIALOG, TAG_MENU, TAG_SUMMARY, TAG_BLINK, TAG_CENTER, TAG_CONTENT, + TAG_DIR, TAG_FONT, TAG_FRAME, TAG_NOFRAMES, TAG_FRAMESET, TAG_STRIKE, TAG_TT + + CSSTokenType* = enum + CSS_NO_TOKEN, CSS_IDENT_TOKEN, CSS_FUNCTION_TOKEN, CSS_AT_KEYWORD_TOKEN, + CSS_HASH_TOKEN, CSS_STRING_TOKEN, CSS_BAD_STRING_TOKEN, CSS_URL_TOKEN, + CSS_BAD_URL_TOKEN, CSS_DELIM_TOKEN, CSS_NUMBER_TOKEN, CSS_PERCENTAGE_TOKEN, + CSS_DIMENSION_TOKEN, CSS_WHITESPACE_TOKEN, CSS_CDO_TOKEN, CSS_CDC_TOKEN, + CSS_COLON_TOKEN, CSS_SEMICOLON_TOKEN, CSS_COMMA_TOKEN, CSS_RBRACKET_TOKEN, + CSS_LBRACKET_TOKEN, CSS_LPAREN_TOKEN, CSS_RPAREN_TOKEN, CSS_LBRACE_TOKEN, + CSS_RBRACE_TOKEN + + CSSUnit* = enum + CM_UNIT, MM_UNIT, IN_UNIT, PX_UNIT, PT_UNIT, PC_UNIT, + EM_UNIT, EX_UNIT, CH_UNIT, REM_UNIT, VW_UNIT, VH_UNIT, VMIN_UNIT, VMAX_UNIT, + PERC_UNIT + + CSSPosition* = enum + STATIC_POSITION, RELATIVE_POSITION, ABSOLUTE_POSITION, FIXED_POSITION, + INHERIT_POSITION + +const DisplayInlineTags* = { + TAG_A, TAG_ABBR, TAG_B, TAG_BDO, TAG_BR, TAG_BUTTON, TAG_CITE, TAG_CODE, + TAG_DEL, TAG_DFN, TAG_EM, TAG_FONT, TAG_I, TAG_IMG, TAG_INS, TAG_INPUT, + TAG_IFRAME, TAG_KBD, TAG_LABEL, TAG_MAP, TAG_OBJECT, TAG_Q, TAG_SAMP, + TAG_SCRIPT, TAG_SELECT, TAG_SMALL, TAG_SPAN, TAG_STRONG, TAG_SUB, TAG_SUP, + TAG_TEXTAREA, TAG_TT, TAG_VAR, TAG_FONT, TAG_IFRAME, TAG_U, TAG_S, TAG_STRIKE, + TAG_FRAME, TAG_IMG, TAG_INPUT +} + +const DisplayNoneTags* = { + TAG_AREA, TAG_BASE, TAG_SOURCE, TAG_TRACK, TAG_LINK, TAG_META, TAG_PARAM, TAG_WBR +} + +const DisplayInlineBlockTags* = { + TAG_IMG +} + +const DisplayTableColumnTags* = { + TAG_COL +} + +const SelfClosingTagTypes* = { + TAG_LI, TAG_P +} + +const VoidTagTypes* = { + TAG_AREA, TAG_BASE, TAG_BR, TAG_COL, TAG_FRAME, TAG_HR, TAG_IMG, TAG_INPUT, + TAG_SOURCE, TAG_TRACK, TAG_LINK, TAG_META, TAG_PARAM, TAG_WBR, TAG_HR +} diff --git a/src/types/tagtypes.nim b/src/types/tagtypes.nim new file mode 100644 index 00000000..34111570 --- /dev/null +++ b/src/types/tagtypes.nim @@ -0,0 +1,30 @@ +import tables +import enums +import strutils + +func getTagTypeMap(): Table[string, TagType] = + for i in low(TagType) .. high(TagType): + let enumname = $TagType(i) + let tagname = enumname.split('_')[1..^1].join("_").tolower() + result[tagname] = TagType(i) + +func getInputTypeMap(): Table[string, InputType] = + for i in low(InputType) .. high(InputType): + let enumname = $InputType(i) + let tagname = enumname.split('_')[1..^1].join("_").tolower() + result[tagname] = InputType(i) + +const tagTypeMap = getTagTypeMap() +const inputTypeMap = getInputTypeMap() + +func tagType*(s: string): TagType = + if tagTypeMap.hasKey(s): + return tagTypeMap[s] + else: + return TAG_UNKNOWN + +func inputType*(s: string): InputType = + if inputTypeMap.hasKey(s): + return inputTypeMap[s] + else: + return INPUT_UNKNOWN diff --git a/src/utils/radixtree.nim b/src/utils/radixtree.nim new file mode 100644 index 00000000..0601eeba --- /dev/null +++ b/src/utils/radixtree.nim @@ -0,0 +1,309 @@ +# Radix tree implementation. It isn't that much faster than a hash table, +# however it *is* faster. Use StaticRadixTree for saving trees in the +# executable and RadixNode otherwise (which needs less bounds checking). + +import json +import tables + +type + RadixPair[T] = tuple[k: string, v: RadixNode[T]] + + RadixNode*[T] = ref object + children*: seq[RadixPair[T]] + case leaf*: bool + of true: value*: T + of false: discard + + StaticRadixPair = tuple[k: string, v: int] + + StaticRadixNode[T] = object + children*: seq[StaticRadixPair] + case leaf*: bool + of true: value*: T + of false: discard + + StaticRadixTree*[T] = object + nodes*: seq[StaticRadixNode[T]] + +func newStaticRadixTree*[T](): StaticRadixTree[T] = + result.nodes.add(StaticRadixNode[T](leaf: false)) + +func newRadixTree*[T](): RadixNode[T] = + new(result) + +func toRadixTree*[T](table: Table[string, T]): RadixNode[T] = + result = newRadixTree[T]() + for k, v in table: + result[k] = v + +# getOrDefault: we have to compare the entire string but if it doesn't match +# exactly we can just return default. +func getOrDefault(pairseq: seq[StaticRadixPair], k: string, default: int): int = + var i = 0 + while i < pairseq.len: + if pairseq[i].k[0] == k[0]: + if k.len != pairseq[i].k.len: + return default + var j = 1 + while j < k.len: + if pairseq[i].k[j] != k[j]: + return default + inc j + return pairseq[i].v + inc i + return default + +func getOrDefault[T](node: RadixNode[T], k: string, default: RadixNode[T]): RadixNode[T] = + var i = 0 + while i < node.children.len: + if node.children[i].k[0] == k[0]: + if k.len != node.children[i].k.len: + return default + var j = 1 + while j < k.len: + if node.children[i].k[j] != k[j]: + return default + inc j + return node.children[i].v + inc i + return default + +iterator keys(pairseq: seq[StaticRadixPair]): string = + var i = 0 + while i < pairseq.len: + yield pairseq[i].k + inc i + +iterator keys*[T](node: RadixNode[T]): string = + var i = 0 + while i < node.children.len: + yield node.children[i].k + inc i + +func contains(pairseq: seq[StaticRadixPair], k: string): bool = + var i = 0 + while i < pairseq.len: + if pairseq[i].k[0] == k[0]: + if k.len != pairseq[i].k.len: + return false + var j = 1 + while j < k.len: + if pairseq[i].k[j] != k[j]: + return false + inc j + return true + inc i + return false + +func contains[T](node: RadixNode[T], k: string): bool = + var i = 0 + while i < node.children.len: + if node.children[i].k[0] == k[0]: + if k.len != node.children[i].k.len: + return false + var j = 1 + while j < k.len: + if node.children[i].k[j] != k[j]: + return false + inc j + return true + inc i + return false + +# Static insert +proc `[]=`*[T](tree: var StaticRadixTree[T], key: string, value: T) = + var n = 0 + var p = 0 + var i = 0 + var j = 0 + var k = 0 + var t = "" + # find last matching node + var conflict = false + while i < key.len: + let m = i + var o = 0 + for pk in tree.nodes[n].children.keys: + if pk[0] == key[i]: + var l = 0 + while l < pk.len and i + l < key.len: + if pk[l] != key[i + l]: + conflict = true + break + inc l + p = n + k = o + n = tree.nodes[n].children[k].v + t &= pk + i += l + if not conflict and pk.len == l: + j = i + break + inc o + if i == m: + break + if conflict: + break + + # if first node, just add normally + if n == 0: + tree.nodes.add(StaticRadixNode[T](leaf: true, value: value)) + tree.nodes[n].children.add((k: key, v: int(tree.nodes.len - 1))) + elif conflict: + # conflict somewhere, so: + # * add new non-leaf to parent + # * add old to non-leaf + # * add new to non-leaf + # * remove old from parent + tree.nodes[p].children.add((k: key.substr(j, i - 1), v: int(tree.nodes.len))) + tree.nodes.add(StaticRadixNode[T](leaf: false)) + tree.nodes[^1].children.add((k: t.substr(i), v: n)) + tree.nodes[^1].children.add((k: key.substr(i), v: int(tree.nodes.len))) + tree.nodes.add(StaticRadixNode[T](leaf: true, value: value)) + tree.nodes[p].children.del(k) + elif key.len == t.len: + # new matches a node, so replace + tree.nodes[n] = StaticRadixNode[T](leaf: true, value: value, children: tree.nodes[n].children) + elif i == j: + # new is longer than the old, so add child to old + tree.nodes[n].children.add((k: key.substr(i), v: int(tree.nodes.len))) + tree.nodes.add(StaticRadixNode[T](leaf: true, value: value)) + else: + # new is shorter than old, so: + # * add new to parent + # * add old to new + # * remove old from parent + tree.nodes[p].children.add((k: key.substr(j, i - 1), v: int(tree.nodes.len))) + tree.nodes.add(StaticRadixNode[T](leaf: true, value: value)) + tree.nodes[^1].children.add((k: key.substr(i), v: n)) + tree.nodes[p].children.del(k) + +# O(1) add procedures for insert +proc add[T](node: RadixNode[T], k: string, v: T) = + node.children.add((k, RadixNode[T](leaf: true, value: v))) + +proc add[T](node: RadixNode[T], k: string) = + node.children.add((k, RadixNode[T](leaf: false))) + +proc add[T](node: RadixNode[T], k: string, v: RadixNode[T]) = + node.children.add((k, v)) + +# Non-static insert +proc `[]=`*[T](tree: RadixNode[T], key: string, value: T) = + var n = tree + var p: RadixNode[T] = nil + var i = 0 + var j = 0 + var k = 0 + var t = "" + + # find last matching node + var conflict = false + while i < key.len: + let m = i + var o = 0 + for pk in n.keys: + if pk[0] == key[i]: + var l = 0 + while l < pk.len and i + l < key.len: + if pk[l] != key[i + l]: + conflict = true + #t = key.substr(0, i + l - 1) & pk.substr(l) + break + inc l + p = n + k = o + n = n.children[k].v + t &= pk + i += l + if not conflict and pk.len == l: + j = i + # t = key.substr(0, i - 1) + #elif not conflict and pk.len > l: + # t = key & pk.substr(l) + break + inc o + if i == m: + break + if conflict: + break + + if n == tree: + # first node, just add normally + tree.add(key, value) + elif conflict: + # conflict somewhere, so: + # * add new non-leaf to parent + # * add old to non-leaf + # * add new to non-leaf + # * remove old from parent + p.add(key.substr(j, i - 1)) + p.children[^1].v.add(t.substr(i), n) + p.children[^1].v.add(key.substr(i), value) + p.children.del(k) + elif key.len == t.len: + # new matches a node, so replace + p.children[k].v = RadixNode[T](leaf: true, value: value, children: n.children) + elif key.len > t.len: + # new is longer than the old, so add child to old + n.add(key.substr(i), value) + else: + # new is shorter than old, so: + # * add new to parent + # * add old to new + # * remove old from parent + p.add(key.substr(j, i - 1), value) + p.children[^1].v.add(t.substr(i), n) + p.children.del(k) + +func `{}`*[T](tree: StaticRadixTree[T], key: string, at: int = 0): int = + return tree.nodes[at].children.getOrDefault(key, at) + +func `{}`*[T](node: RadixNode[T], key: string): RadixNode[T] = + return node.getOrDefault(key, node) + +func hasPrefix*[T](tree: StaticRadixTree[T], prefix: string, at: int = 0): bool = + var n = at + var i = 0 + + while i < prefix.len: + let m = i + var j = 0 + for pk in tree.nodes[n].children.keys: + if pk[0] == prefix[i]: + var l = 0 + while l < pk.len and i + l < prefix.len: + if pk[l] != prefix[i + l]: + return false + inc l + n = tree.nodes[n].children[j].v + i += l + break + inc j + if i == m: + return false + + return true + +func hasPrefix*[T](tree: RadixNode[T], prefix: string, at: RadixNode[T] = tree): bool = + var n = at + var i = 0 + + while i < prefix.len: + let m = i + var j = 0 + for pk in n.keys: + if pk[0] == prefix[i]: + var l = 0 + while l < pk.len and i + l < prefix.len: + if pk[l] != prefix[i + l]: + return false + inc l + n = n.children[j].v + i += l + break + inc j + if i == m: + return false + + return true diff --git a/src/utils/termattrs.nim b/src/utils/termattrs.nim new file mode 100644 index 00000000..d49800ae --- /dev/null +++ b/src/utils/termattrs.nim @@ -0,0 +1,11 @@ +import terminal + +type + TermAttributes* = object + termWidth*: int + termHeight*: int + +proc getTermAttributes*(): TermAttributes = + let attrs = TermAttributes(termWidth: terminalWidth(), + termHeight: terminalHeight()) + return attrs diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim new file mode 100644 index 00000000..d5de3eef --- /dev/null +++ b/src/utils/twtstr.nim @@ -0,0 +1,455 @@ +import terminal +import strutils +import unicode + +func ansiStyle*(str: string, style: Style): seq[string] = + result &= ansiStyleCode(style) + result &= str + +func ansiFgColor*(str: string, color: ForegroundColor): seq[string] = + result &= ansiForegroundColorCode(color) + result &= str + +func ansiReset*(str: string): seq[string] = + result &= str + result &= ansiResetCode + +func ansiStyle*(str: seq[string], style: Style): seq[string] = + return ansiStyleCode(style) & str + +func ansiFgColor*(str: seq[string], color: ForegroundColor): seq[string] = + return ansiForegroundColorCode(color) & str + +func ansiReset*(str: seq[string]): seq[string] = + return str & ansiResetCode + +func maxString*(str: string, max: int): string = + if max < str.runeLen(): + return str.runeSubstr(0, max - 2) & "$" + return str + +func fitValueToSize*(str: string, size: int): string = + if str.runeLen < size: + return str & ' '.repeat(size - str.runeLen) + return str.maxString(size) + +func buttonFmt*(str: string): seq[string] = + return "[".ansiFgColor(fgRed) & str.ansiFgColor(fgRed).ansiReset() & "]".ansiFgColor(fgRed).ansiReset() + +func buttonFmt*(str: seq[string]): seq[string] = + return "[".ansiFgColor(fgRed) & str.ansiFgColor(fgRed).ansiReset() & "]".ansiFgColor(fgRed).ansiReset() + +func buttonRaw*(str: string): string = + return "[" & str & "]" + +func remove*(str: string, c: string): string = + let rem = c.toRunes()[0] + for rune in str.runes: + if rem != rune: + result &= $rune + +func isWhitespace*(c: char): bool = + case c + of ' ', '\n', '\r', '\t', '\f': return true + else: return false + +func isControlChar*(c: char): bool = + case c + of chr(0x00)..chr(0x1F): return true + of chr(0x7F): return true + else: return false + +func isControlChar*(r: Rune): bool = + case r + of Rune(0x00)..Rune(0x1F): return true + of Rune(0x7F): return true + else: return false + +func genControlCharMap*(): string = + for c in low(char)..high(char): + if c >= 'a': + result &= char(int(c) - int('a') + 1) + elif c == '?': + result &= char(127) + else: + result &= char(0) + +const controlCharMap = genControlCharMap() + +func getControlChar*(c: char): char = + return controlCharMap[int(c)] + +func getControlLetter*(c: char): char = + if int(c) <= 0x1F: + return char(int(c) + int('A') - 1) + elif c == '\x7F': + return '?' + assert(false) + +func findChar*(str: string, c: char, start: int = 0): int = + var i = start + while i < str.len: + if str[i] == c: + return i + inc i + return -1 + +func findChar*(str: string, c: Rune, start: int = 0): int = + var i = start + var n = i + while i < str.runeLen(): + var r: Rune + fastRuneAt(str, n, r) + if r == c: + return i + i = n + return -1 + +func getLowerChars*(): string = + result = "" + for i in 0..255: + if chr(i) >= 'A' and chr(i) <= 'Z': + result &= chr(i + 32) + else: + result &= chr(i) + +const lowerChars = getLowerChars() + +func tolower*(c: char): char = + return lowerChars[int(c)] + +const breakWord = [ + Rune('\n'), Rune('/'), Rune('\\'), Rune(' '), Rune('&'), Rune('='), + Rune('?'), Rune('.'), Rune(';') +] + +func genHexCharMap(): seq[int] = + for i in 0..255: + case chr(i) + of '0'..'9': result &= i - ord('0') + of 'a'..'f': result &= i - ord('a') + 10 + of 'A'..'F': result &= i - ord('A') + 10 + else: result &= -1 + +func genDecCharMap(): seq[int] = + for i in 0..255: + case chr(i) + of '0'..'9': result &= i - ord('0') + else: result &= -1 + +const hexCharMap = genHexCharMap() +const decCharMap = genDecCharMap() + +func hexValue*(c: char): int = + return hexCharMap[int(c)] + +func decValue*(c: char): int = + return decCharMap[int(c)] + +func isAscii*(r: Rune): bool = + return int(r) <= int(high(char)) + +func hexValue*(r: Rune): int = + if isAscii(r): + return hexValue(char(r)) + return -1 + +func decValue*(r: Rune): int = + if isAscii(r): + return decValue(char(r)) + return -1 + +func toAsciiLower*(s: seq[Rune]): string = + for r in s: + if isAscii(r): + result &= lowerChars[int(r)] + +func breaksWord*(r: Rune): bool = + return r in breakWord + +func isAlphaAscii*(r: Rune): bool = + return isAscii(r) and isAlphaAscii(char(r)) + +func isDigitAscii*(r: Rune): bool = + return isAscii(r) and isDigit(char(r)) + +func substr*(s: seq[Rune], i: int, j: int): seq[Rune] = + if s.len == 0: + return @[] + return s[min(high(s), i)..min(high(s), j - 1)] + +func substr*(s: seq[Rune], i: int): seq[Rune] = + if i >= high(s) or s.len == 0: + return @[] + return s[min(high(s), i)..high(s)] + +func skipBlanks*(buf: string, at: int): int = + result = at + while result < buf.len and buf[result].isWhitespace(): + inc result + +iterator split*(s: seq[Rune], sep: Rune): seq[Rune] = + var i = 0 + var prev = 0 + while i < s.len: + if s[i] == sep: + yield s.substr(prev, i) + prev = i + inc i + + if prev < i: + yield s.substr(prev, i) + +# Measure length of runes. From https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c +# +# The following two functions define the column width of an ISO 10646 +# character as follows: +# +# - The null character (U+0000) has a column width of 0. +# +# - Other C0/C1 control characters and DEL will lead to a return value of 2 +# (changed from 0 b/c we normally display control chars like ^H - TODO?). +# +# - Non-spacing and enclosing combining characters (general category code Mn +# or Me in the Unicode database) have a column width of 0. +# +# - SOFT HYPHEN (U+00AD) has a column width of 1. +# +# - Other format characters (general category code Cf in the Unicode +# database) and ZERO WIDTH SPACE (U+200B) have a column width of 0. +# +# - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) have a +# column width of 0. +# +# - Spacing characters in the East Asian Wide (W) or East Asian Full-width +# (F) category as defined in Unicode Technical Report #11 have a column +# width of 2. +# +# - All remaining characters (including all printable ISO 8859-1 and WGL4 +# characters, Unicode control characters, etc.) have a column width of 1. +# + +# sorted list of non-overlapping intervals of non-spacing characters generated +# by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" +const combining = [ + ( 0x0300, 0x036F ), ( 0x0483, 0x0486 ), ( 0x0488, 0x0489 ), + ( 0x0591, 0x05BD ), ( 0x05BF, 0x05BF ), ( 0x05C1, 0x05C2 ), + ( 0x05C4, 0x05C5 ), ( 0x05C7, 0x05C7 ), ( 0x0600, 0x0603 ), + ( 0x0610, 0x0615 ), ( 0x064B, 0x065E ), ( 0x0670, 0x0670 ), + ( 0x06D6, 0x06E4 ), ( 0x06E7, 0x06E8 ), ( 0x06EA, 0x06ED ), + ( 0x070F, 0x070F ), ( 0x0711, 0x0711 ), ( 0x0730, 0x074A ), + ( 0x07A6, 0x07B0 ), ( 0x07EB, 0x07F3 ), ( 0x0901, 0x0902 ), + ( 0x093C, 0x093C ), ( 0x0941, 0x0948 ), ( 0x094D, 0x094D ), + ( 0x0951, 0x0954 ), ( 0x0962, 0x0963 ), ( 0x0981, 0x0981 ), + ( 0x09BC, 0x09BC ), ( 0x09C1, 0x09C4 ), ( 0x09CD, 0x09CD ), + ( 0x09E2, 0x09E3 ), ( 0x0A01, 0x0A02 ), ( 0x0A3C, 0x0A3C ), + ( 0x0A41, 0x0A42 ), ( 0x0A47, 0x0A48 ), ( 0x0A4B, 0x0A4D ), + ( 0x0A70, 0x0A71 ), ( 0x0A81, 0x0A82 ), ( 0x0ABC, 0x0ABC ), + ( 0x0AC1, 0x0AC5 ), ( 0x0AC7, 0x0AC8 ), ( 0x0ACD, 0x0ACD ), + ( 0x0AE2, 0x0AE3 ), ( 0x0B01, 0x0B01 ), ( 0x0B3C, 0x0B3C ), + ( 0x0B3F, 0x0B3F ), ( 0x0B41, 0x0B43 ), ( 0x0B4D, 0x0B4D ), + ( 0x0B56, 0x0B56 ), ( 0x0B82, 0x0B82 ), ( 0x0BC0, 0x0BC0 ), + ( 0x0BCD, 0x0BCD ), ( 0x0C3E, 0x0C40 ), ( 0x0C46, 0x0C48 ), + ( 0x0C4A, 0x0C4D ), ( 0x0C55, 0x0C56 ), ( 0x0CBC, 0x0CBC ), + ( 0x0CBF, 0x0CBF ), ( 0x0CC6, 0x0CC6 ), ( 0x0CCC, 0x0CCD ), + ( 0x0CE2, 0x0CE3 ), ( 0x0D41, 0x0D43 ), ( 0x0D4D, 0x0D4D ), + ( 0x0DCA, 0x0DCA ), ( 0x0DD2, 0x0DD4 ), ( 0x0DD6, 0x0DD6 ), + ( 0x0E31, 0x0E31 ), ( 0x0E34, 0x0E3A ), ( 0x0E47, 0x0E4E ), + ( 0x0EB1, 0x0EB1 ), ( 0x0EB4, 0x0EB9 ), ( 0x0EBB, 0x0EBC ), + ( 0x0EC8, 0x0ECD ), ( 0x0F18, 0x0F19 ), ( 0x0F35, 0x0F35 ), + ( 0x0F37, 0x0F37 ), ( 0x0F39, 0x0F39 ), ( 0x0F71, 0x0F7E ), + ( 0x0F80, 0x0F84 ), ( 0x0F86, 0x0F87 ), ( 0x0F90, 0x0F97 ), + ( 0x0F99, 0x0FBC ), ( 0x0FC6, 0x0FC6 ), ( 0x102D, 0x1030 ), + ( 0x1032, 0x1032 ), ( 0x1036, 0x1037 ), ( 0x1039, 0x1039 ), + ( 0x1058, 0x1059 ), ( 0x1160, 0x11FF ), ( 0x135F, 0x135F ), + ( 0x1712, 0x1714 ), ( 0x1732, 0x1734 ), ( 0x1752, 0x1753 ), + ( 0x1772, 0x1773 ), ( 0x17B4, 0x17B5 ), ( 0x17B7, 0x17BD ), + ( 0x17C6, 0x17C6 ), ( 0x17C9, 0x17D3 ), ( 0x17DD, 0x17DD ), + ( 0x180B, 0x180D ), ( 0x18A9, 0x18A9 ), ( 0x1920, 0x1922 ), + ( 0x1927, 0x1928 ), ( 0x1932, 0x1932 ), ( 0x1939, 0x193B ), + ( 0x1A17, 0x1A18 ), ( 0x1B00, 0x1B03 ), ( 0x1B34, 0x1B34 ), + ( 0x1B36, 0x1B3A ), ( 0x1B3C, 0x1B3C ), ( 0x1B42, 0x1B42 ), + ( 0x1B6B, 0x1B73 ), ( 0x1DC0, 0x1DCA ), ( 0x1DFE, 0x1DFF ), + ( 0x200B, 0x200F ), ( 0x202A, 0x202E ), ( 0x2060, 0x2063 ), + ( 0x206A, 0x206F ), ( 0x20D0, 0x20EF ), ( 0x302A, 0x302F ), + ( 0x3099, 0x309A ), ( 0xA806, 0xA806 ), ( 0xA80B, 0xA80B ), + ( 0xA825, 0xA826 ), ( 0xFB1E, 0xFB1E ), ( 0xFE00, 0xFE0F ), + ( 0xFE20, 0xFE23 ), ( 0xFEFF, 0xFEFF ), ( 0xFFF9, 0xFFFB ), + ( 0x10A01, 0x10A03 ), ( 0x10A05, 0x10A06 ), ( 0x10A0C, 0x10A0F ), + ( 0x10A38, 0x10A3A ), ( 0x10A3F, 0x10A3F ), ( 0x1D167, 0x1D169 ), + ( 0x1D173, 0x1D182 ), ( 0x1D185, 0x1D18B ), ( 0x1D1AA, 0x1D1AD ), + ( 0x1D242, 0x1D244 ), ( 0xE0001, 0xE0001 ), ( 0xE0020, 0xE007F ), + ( 0xE0100, 0xE01EF ) +] + +func is_dwidth(r: Rune): bool = + let ucs = int(r) + return (ucs >= 0x1100 and + (ucs <= 0x115f or # Hangul Jamo init. consonants + ucs == 0x2329 or ucs == 0x232a or + (ucs >= 0x2e80 and ucs <= 0xa4cf and + ucs != 0x303f) or # CJK ... Yi + (ucs >= 0xac00 and ucs <= 0xd7a3) or # Hangul Syllables + (ucs >= 0xf900 and ucs <= 0xfaff) or # CJK Compatibility Ideographs + (ucs >= 0xfe10 and ucs <= 0xfe19) or # Vertical forms + (ucs >= 0xfe30 and ucs <= 0xfe6f) or # CJK Compatibility Forms + (ucs >= 0xff00 and ucs <= 0xff60) or # Fullwidth Forms + (ucs >= 0xffe0 and ucs <= 0xffe6) or + (ucs >= 0x20000 and ucs <= 0x2fffd) or + (ucs >= 0x30000 and ucs <= 0x3fffd))) + +func makewidthtable(): array[0..0x10FFFF, byte] = + for r in low(char)..high(char): + if r.isControlChar(): + result[int(r)] = 2 + else: + result[int(r)] = 1 + + var i = 0 + var next_combining = combining[i] + for ucs in 256..0x10FFFF: + if ucs >= next_combining[0]: + if ucs <= next_combining[1]: + result[ucs] = 0 + continue + elif i + 1 < combining.len: + inc i + next_combining = combining[i] + + if Rune(ucs).is_dwidth(): + result[ucs] = 2 + else: + result[ucs] = 1 + +when defined(small): + # compute lookup table on startup + let width_table = makewidthtable() +else: + # store lookup table in executable + const width_table = makewidthtable() + +{.push boundChecks:off.} +func width*(r: Rune): int = + {.cast(noSideEffect).}: + return int(width_table[int(r)]) + +func width*(s: string): int = + for r in s.runes(): + result += width(r) + +func width*(s: seq[Rune]): int = + for r in s: + result += width(r) + +func width*(s: seq[Rune], min: int, max: int): int = + var i = min + var mi = min(max, s.len) + while i < mi: + result += width(s[i]) + inc i + +func width*(s: seq[Rune], min: int): int = + var i = min + while i < s.len: + result += width(s[i]) + inc i + +# sorted list of non-overlapping intervals of East Asian Ambiguous characters, +# generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" + +const ambiguous = [ + ( 0x00A1, 0x00A1 ), ( 0x00A4, 0x00A4 ), ( 0x00A7, 0x00A8 ), + ( 0x00AA, 0x00AA ), ( 0x00AE, 0x00AE ), ( 0x00B0, 0x00B4 ), + ( 0x00B6, 0x00BA ), ( 0x00BC, 0x00BF ), ( 0x00C6, 0x00C6 ), + ( 0x00D0, 0x00D0 ), ( 0x00D7, 0x00D8 ), ( 0x00DE, 0x00E1 ), + ( 0x00E6, 0x00E6 ), ( 0x00E8, 0x00EA ), ( 0x00EC, 0x00ED ), + ( 0x00F0, 0x00F0 ), ( 0x00F2, 0x00F3 ), ( 0x00F7, 0x00FA ), + ( 0x00FC, 0x00FC ), ( 0x00FE, 0x00FE ), ( 0x0101, 0x0101 ), + ( 0x0111, 0x0111 ), ( 0x0113, 0x0113 ), ( 0x011B, 0x011B ), + ( 0x0126, 0x0127 ), ( 0x012B, 0x012B ), ( 0x0131, 0x0133 ), + ( 0x0138, 0x0138 ), ( 0x013F, 0x0142 ), ( 0x0144, 0x0144 ), + ( 0x0148, 0x014B ), ( 0x014D, 0x014D ), ( 0x0152, 0x0153 ), + ( 0x0166, 0x0167 ), ( 0x016B, 0x016B ), ( 0x01CE, 0x01CE ), + ( 0x01D0, 0x01D0 ), ( 0x01D2, 0x01D2 ), ( 0x01D4, 0x01D4 ), + ( 0x01D6, 0x01D6 ), ( 0x01D8, 0x01D8 ), ( 0x01DA, 0x01DA ), + ( 0x01DC, 0x01DC ), ( 0x0251, 0x0251 ), ( 0x0261, 0x0261 ), + ( 0x02C4, 0x02C4 ), ( 0x02C7, 0x02C7 ), ( 0x02C9, 0x02CB ), + ( 0x02CD, 0x02CD ), ( 0x02D0, 0x02D0 ), ( 0x02D8, 0x02DB ), + ( 0x02DD, 0x02DD ), ( 0x02DF, 0x02DF ), ( 0x0391, 0x03A1 ), + ( 0x03A3, 0x03A9 ), ( 0x03B1, 0x03C1 ), ( 0x03C3, 0x03C9 ), + ( 0x0401, 0x0401 ), ( 0x0410, 0x044F ), ( 0x0451, 0x0451 ), + ( 0x2010, 0x2010 ), ( 0x2013, 0x2016 ), ( 0x2018, 0x2019 ), + ( 0x201C, 0x201D ), ( 0x2020, 0x2022 ), ( 0x2024, 0x2027 ), + ( 0x2030, 0x2030 ), ( 0x2032, 0x2033 ), ( 0x2035, 0x2035 ), + ( 0x203B, 0x203B ), ( 0x203E, 0x203E ), ( 0x2074, 0x2074 ), + ( 0x207F, 0x207F ), ( 0x2081, 0x2084 ), ( 0x20AC, 0x20AC ), + ( 0x2103, 0x2103 ), ( 0x2105, 0x2105 ), ( 0x2109, 0x2109 ), + ( 0x2113, 0x2113 ), ( 0x2116, 0x2116 ), ( 0x2121, 0x2122 ), + ( 0x2126, 0x2126 ), ( 0x212B, 0x212B ), ( 0x2153, 0x2154 ), + ( 0x215B, 0x215E ), ( 0x2160, 0x216B ), ( 0x2170, 0x2179 ), + ( 0x2190, 0x2199 ), ( 0x21B8, 0x21B9 ), ( 0x21D2, 0x21D2 ), + ( 0x21D4, 0x21D4 ), ( 0x21E7, 0x21E7 ), ( 0x2200, 0x2200 ), + ( 0x2202, 0x2203 ), ( 0x2207, 0x2208 ), ( 0x220B, 0x220B ), + ( 0x220F, 0x220F ), ( 0x2211, 0x2211 ), ( 0x2215, 0x2215 ), + ( 0x221A, 0x221A ), ( 0x221D, 0x2220 ), ( 0x2223, 0x2223 ), + ( 0x2225, 0x2225 ), ( 0x2227, 0x222C ), ( 0x222E, 0x222E ), + ( 0x2234, 0x2237 ), ( 0x223C, 0x223D ), ( 0x2248, 0x2248 ), + ( 0x224C, 0x224C ), ( 0x2252, 0x2252 ), ( 0x2260, 0x2261 ), + ( 0x2264, 0x2267 ), ( 0x226A, 0x226B ), ( 0x226E, 0x226F ), + ( 0x2282, 0x2283 ), ( 0x2286, 0x2287 ), ( 0x2295, 0x2295 ), + ( 0x2299, 0x2299 ), ( 0x22A5, 0x22A5 ), ( 0x22BF, 0x22BF ), + ( 0x2312, 0x2312 ), ( 0x2460, 0x24E9 ), ( 0x24EB, 0x254B ), + ( 0x2550, 0x2573 ), ( 0x2580, 0x258F ), ( 0x2592, 0x2595 ), + ( 0x25A0, 0x25A1 ), ( 0x25A3, 0x25A9 ), ( 0x25B2, 0x25B3 ), + ( 0x25B6, 0x25B7 ), ( 0x25BC, 0x25BD ), ( 0x25C0, 0x25C1 ), + ( 0x25C6, 0x25C8 ), ( 0x25CB, 0x25CB ), ( 0x25CE, 0x25D1 ), + ( 0x25E2, 0x25E5 ), ( 0x25EF, 0x25EF ), ( 0x2605, 0x2606 ), + ( 0x2609, 0x2609 ), ( 0x260E, 0x260F ), ( 0x2614, 0x2615 ), + ( 0x261C, 0x261C ), ( 0x261E, 0x261E ), ( 0x2640, 0x2640 ), + ( 0x2642, 0x2642 ), ( 0x2660, 0x2661 ), ( 0x2663, 0x2665 ), + ( 0x2667, 0x266A ), ( 0x266C, 0x266D ), ( 0x266F, 0x266F ), + ( 0x273D, 0x273D ), ( 0x2776, 0x277F ), ( 0xE000, 0xF8FF ), + ( 0xFFFD, 0xFFFD ), ( 0xF0000, 0xFFFFD ), ( 0x100000, 0x10FFFD ) +] + +# +# The following functions are the same as mk_wcwidth() and mk_wcswidth(), +# except that spacing characters in the East Asian Ambiguous (A) category as +# defined in Unicode Technical Report #11 have a column width of 2. This +# variant might be useful for users of CJK legacy encodings who want to migrate +# to UCS without changing the traditional terminal character-width behaviour. +# It is not otherwise recommended for general use. +# +# TODO: currently these are unused, the user should be able to toggle them + +# auxiliary function for binary search in interval table +func bisearch(ucs: Rune, table: openarray[(int, int)]): bool = + var max = table.high + var min = 0 + var mid: int + + if int(ucs) < table[0][0] or int(ucs) > table[max][1]: + return false + + while max >= min: + mid = (min + max) div 2 + if int(ucs) > table[mid][1]: + min = mid + 1 + elif int(ucs) < table[mid][0]: + max = mid - 1 + else: + return true + return false + + +func mk_wcwidth_cjk(r: Rune): int = + # binary search in table of non-spacing characters + if bisearch(r, ambiguous): + return 2; + + return r.width(); + +func mk_wcswidth_cjk(s: string): int = + for r in s.runes: + result += mk_wcwidth_cjk(r) + return result -- cgit 1.4.1-2-gfad0