diff options
author | bptato <nincsnevem662@gmail.com> | 2021-03-17 12:20:05 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2021-03-17 12:20:05 +0100 |
commit | 94a10242dca6181ef8f15a37e7083069ead09559 (patch) | |
tree | ccdb28c82535aa7cda25f2bfe26cbfa451eaecab | |
parent | 97f19da347b27a4d12f54784fa8bcbf304aa4fea (diff) | |
download | chawan-94a10242dca6181ef8f15a37e7083069ead09559.tar.gz |
...
-rw-r--r-- | Makefile | 17 | ||||
-rw-r--r-- | res/config (renamed from src/config) | 47 | ||||
-rw-r--r-- | res/default.css | 32 | ||||
-rw-r--r-- | res/entity.json (renamed from src/entity.json) | 0 | ||||
-rw-r--r-- | src/a.nim | 1 | ||||
-rw-r--r-- | src/config.nim | 26 | ||||
-rw-r--r-- | src/cssparser.nim | 833 | ||||
-rw-r--r-- | src/default.css | 20 | ||||
-rw-r--r-- | src/dom.nim | 27 | ||||
-rwxr-xr-x | src/entity | bin | 85288 -> 0 bytes | |||
-rw-r--r-- | src/entity.nim | 35 | ||||
-rw-r--r-- | src/enums.nim | 13 | ||||
-rw-r--r-- | src/htmlparser.nim (renamed from src/parser.nim) | 98 | ||||
-rw-r--r-- | src/main.nim | 7 | ||||
-rw-r--r-- | src/radixtree.nim | 367 | ||||
-rw-r--r-- | src/style.nim | 58 | ||||
-rw-r--r-- | src/twtio.nim | 157 | ||||
-rw-r--r-- | src/twtstr.nim | 146 |
18 files changed, 1590 insertions, 294 deletions
diff --git a/Makefile b/Makefile index 569cb2b5..3d42c18d 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,16 @@ +NIMC = nim compile +FLAGS = -d:ssl -o:twt +FILES = src/main.nim + debug: - nim compile -d:ssl -o:twt src/main.nim + $(NIMC) $(FLAGS) -d:small $(FILES) release: - nim compile -d:release -d:ssl -o:twt src/main.nim -release_opt: - nim compile -d:danger -d:ssl -o:twt src/main.nim + $(NIMC) $(FLAGS) -d:release $(FILES) +danger: + $(NIMC) $(FLAGS) -d:danger $(FILES) +small: + $(NIMC) $(FLAGS) -d:release -d:small $(FILES) +lowmem: + $(NIMC) $(FLAGS) -d:release -d:lowmem $(FILES) clean: rm ./twt -all: debug release release_opt diff --git a/src/config b/res/config index 99397288..48ec0d32 100644 --- a/src/config +++ b/res/config @@ -54,6 +54,8 @@ lemap C-f LINED_FORWARD lemap C-u LINED_CLEAR lemap C-k LINED_KILL lemap C-w LINED_KILL_WORD +lemap C-a LINED_BEGIN +lemap C-e LINED_END lemap C-v LINED_ESC lemap C-g LINED_COMPOSE_TOGGLE @@ -136,6 +138,11 @@ comp fu ふ comp he へ comp ho ほ +comp fa ふぁ +comp fi ふぃ +comp fe ふぇ +comp fo ふぉ + comp ma ま comp mi み comp mu む @@ -236,6 +243,18 @@ comp pyo ぴょ comp kwa くゎ comp gwa ぐゎ +comp ti ってぃ +comp tti ってぃ + +comp di てぃ +comp ddi ってぃ + +comp she しぇ +comp sshe っしぇ + +comp je じぇ +comp jje っじぇ + #katakana comp A ア comp I イ @@ -249,11 +268,11 @@ comp LU ゥ comp LE ェ comp LO ォ -comp KA か -comp KI き -comp KU く -comp KE け -comp KO こ +comp KA カ +comp KI キ +comp KU ク +comp KE ケ +comp KO コ comp KKA ッカ comp KKI ッキ @@ -297,6 +316,11 @@ comp FU フ comp HE ヘ comp HO ホ +comp FA ファ +comp FI フィ +comp FE フェ +comp FO フォ + comp MA マ comp MI ミ comp MU ム @@ -349,7 +373,7 @@ comp PE ペ comp PO ポ comp N ン -comp xc ー +comp X ー comp KYA キャ comp KYU キュ @@ -397,3 +421,14 @@ comp PYO ピョ comp KWA クヮ comp GWA グヮ + +comp TI ティ +comp TTI ッティ +comp DI ディ +comp DDI ッディ + +comp SHE シェ +comp SSHE ッシェ + +comp JE ジェ +comp JJE ッジェ diff --git a/res/default.css b/res/default.css new file mode 100644 index 00000000..d1d6801d --- /dev/null +++ b/res/default.css @@ -0,0 +1,32 @@ +area, base, source, track, link, meta, param, wbr, head, style, script { + display: none; +} + +address, blockquote, center, del, dir, div, dl, fieldset, form, h1, h2, h3, h4, +h5, h6, hr, ins, menu, noframes, noscript, ol, p, pre, table, ul, center, dir, +menu, noframes, body { + display: block +} + +br { + display: block; + content: ' ' +} + +a, abbr, b, bdo, button, cite, code, del, dfn, em, font, i, img, ins, +input, iframe, kbd, label, map, object, q, samp, select, small, span, strong, +sub, sup, textarea, tt, var, font, iframe, u, s, strike, frame, img, input { + display: inline +} + +col { + display: table-column +} + +img { + display: inline-block +} + +li { + display: list-item +} diff --git a/src/entity.json b/res/entity.json index 557170b4..557170b4 100644 --- a/src/entity.json +++ b/res/entity.json diff --git a/src/a.nim b/src/a.nim deleted file mode 100644 index da4e4903..00000000 --- a/src/a.nim +++ /dev/null @@ -1 +0,0 @@ -echo int(high(char)) diff --git a/src/config.nim b/src/config.nim index 6d185a1d..d4713123 100644 --- a/src/config.nim +++ b/src/config.nim @@ -29,11 +29,11 @@ type ACTION_LINED_CLEAR, ACTION_LINED_KILL, ACTION_LINED_KILL_WORD, ACTION_LINED_BACK, ACTION_LINED_FORWARD, ACTION_LINED_PREV_WORD, ACTION_LINED_NEXT_WORD, - ACTION_LINED_COMPOSE_TOGGLE, ACTION_LINED_COMPOSE_ON, ACTION_LINED_COMPOSE_OFF - ACTION_LINED_ESC + ACTION_LINED_BEGIN, ACTION_LINED_END, + ACTION_LINED_COMPOSE_TOGGLE, ACTION_LINED_ESC ActionMap = Table[string, TwtAction] - ComposeMap = RadixTree[string] + ComposeMap = RadixNode[string] var normalActionRemap*: ActionMap var linedActionRemap*: ActionMap @@ -91,7 +91,7 @@ func constructActionTable*(origTable: ActionMap): ActionMap = return newTable proc parseConfigLine(line: string, nmap: var ActionMap, lemap: var ActionMap, - compose: var ComposeMap) = + compose: var Table[string, string]) = if line.len == 0 or line[0] == '#': return let cmd = line.split(' ') @@ -103,11 +103,11 @@ proc parseConfigLine(line: string, nmap: var ActionMap, lemap: var ActionMap, elif cmd[0] == "comp": compose[getRealKey(cmd[1])] = cmd[2] -proc staticReadKeymap(): (ActionMap, ActionMap, ComposeMap) = - let config = staticRead"config" +proc staticReadKeymap(): (ActionMap, ActionMap, Table[string, string]) = + let config = staticRead"../res/config" var nmap: ActionMap var lemap: ActionMap - var compose = newRadixTree[string]() + var compose: Table[string, string] for line in config.split('\n'): parseConfigLine(line, nmap, lemap, compose) @@ -119,14 +119,21 @@ const (normalActionMap, linedActionMap, composeMap) = staticReadKeymap() normalActionRemap = normalActionMap linedActionRemap = linedActionMap -composeRemap = composeMap +composeRemap = composeMap.toRadixTree() +proc traverseRemap[T](m: RadixNode[T], s: string) = + echo s + for k in m.keys: + assert(m{k, m} != m, s & " " & k) + m{k, m}.traverseRemap(s & k) + +composeRemap.traverseRemap("") proc readConfig*(filename: string): bool = var f: File let status = f.open(filename, fmRead) var nmap: ActionMap var lemap: ActionMap - var compose = newRadixTree[string]() + var compose: Table[string, string] if status: var line: TaintedString while f.readLine(line): @@ -134,6 +141,7 @@ proc readConfig*(filename: string): bool = normalActionRemap = constructActionTable(normalActionMap) linedActionRemap = constructActionTable(linedActionMap) + composeRemap = compose.toRadixTree() return true else: return false diff --git a/src/cssparser.nim b/src/cssparser.nim new file mode 100644 index 00000000..ce5b5037 --- /dev/null +++ b/src/cssparser.nim @@ -0,0 +1,833 @@ +# CSS tokenizer and parser. The tokenizer is a mess, and may or may not work +# correctly. The parser should work, though the outputted object model is +# questionable at best. + +import unicode +import streams +import math +import options + +import twtstr +import twtio +import enums + +type + CSSTokenizerState = object + at: int + stream: Stream + buf: seq[Rune] + + CSSParseState = object + tokens: seq[CSSParsedItem] + at: int + top_level: bool + + tflaga = enum + TFLAGA_UNRESTRICTED, TFLAGA_ID + tflagb = enum + TFLAGB_INTEGER, TFLAGB_NUMBER + + CSSParsedItem* = ref object of RootObj + CSSComponentValue* = ref object of CSSParsedItem + + CSSToken* = ref object of CSSComponentValue + case tokenType*: CSSTokenType + of CSS_IDENT_TOKEN, CSS_FUNCTION_TOKEN, CSS_AT_KEYWORD_TOKEN, + CSS_HASH_TOKEN, CSS_STRING_TOKEN, CSS_URL_TOKEN: + value*: seq[Rune] + tflaga*: tflaga + of CSS_DELIM_TOKEN: + rvalue*: Rune + of CSS_NUMBER_TOKEN, CSS_PERCENTAGE_TOKEN, CSS_DIMENSION_TOKEN: + nvalue*: float64 + tflagb*: tflagb + unit*: seq[Rune] + else: discard + + CSSRule* = ref object of CSSParsedItem + prelude*: seq[CSSComponentValue] + oblock*: CSSSimpleBlock + + CSSAtRule* = ref object of CSSRule + name*: seq[Rune] + + CSSQualifiedRule* = ref object of CSSRule + + CSSDeclaration* = ref object of CSSComponentValue + name*: seq[Rune] + value*: seq[CSSComponentValue] + important*: bool + + CSSFunction* = ref object of CSSComponentValue + name*: seq[Rune] + value*: seq[CSSComponentValue] + + CSSSimpleBlock* = ref object of CSSComponentValue + token*: CSSToken + value*: seq[CSSComponentValue] + + CSSStylesheet* = object + value*: seq[CSSRule] + + SyntaxError = object of ValueError + + CSSColor* = tuple[r: uint8, g: uint8, b: uint8, a: uint8] + +func `==`(a: CSSParsedItem, b: CSSTokenType): bool = + return a of CSSToken and CSSToken(a).tokenType == b + +func toNumber(s: seq[Rune]): float64 = + var sign = 1 + var t = 1 + var d = 0 + var integer: float64 = 0 + var f: float64 = 0 + var e: float64 = 0 + + var i = 0 + if i < s.len and s[i] == Rune('-'): + sign = -1 + inc i + elif i < s.len and s[i] == Rune('+'): + inc i + + while i < s.len and isDigitAscii(s[i]): + integer *= 10 + integer += float64(decValue(s[i])) + inc i + + if i < s.len and s[i] == Rune('.'): + inc i + while i < s.len and isDigitAscii(s[i]): + f *= 10 + f += float64(decValue(s[i])) + inc i + inc d + + if i < s.len and (s[i] == Rune('e') or s[i] == Rune('E')): + inc i + if i < s.len and s[i] == Rune('-'): + t = -1 + inc i + elif i < s.len and s[i] == Rune('+'): + inc i + + while i < s.len and isDigitAscii(s[i]): + e *= 10 + e += float64(decValue(s[i])) + inc i + + return float64(sign) * (integer + f * pow(10, float64(-d))) * pow(10, (float64(t) * e)) + +func toColor*(s: seq[Rune]): CSSColor = + if s.len == 3: + for r in s: + if hexValue(r) == -1: + return + let r = hexValue(s[0]) * 0x10 + hexValue(s[0]) + let g = hexValue(s[1]) * 0x10 + hexValue(s[1]) + let b = hexValue(s[2]) * 0x10 + hexValue(s[2]) + + result.r = uint8(r) + result.g = uint8(g) + result.b = uint8(b) + result.a = 0 + elif s.len == 6: + for r in s: + if hexValue(r) == -1: + return + let r = hexValue(s[0]) * 0x10 + hexValue(s[1]) + let g = hexValue(s[2]) * 0x10 + hexValue(s[3]) + let b = hexValue(s[4]) * 0x10 + hexValue(s[5]) + result.r = uint8(r) + result.g = uint8(g) + result.b = uint8(b) + result.a = 0 + +func isNameStartCodePoint*(r: Rune): bool = + return not isAscii(r) or r == Rune('_') or isAlphaAscii(r) + +func isNameCodePoint*(r: Rune): bool = + return isNameStartCodePoint(r) or isDigitAscii(r) or r == Rune('-') + +proc consume(state: var CSSTokenizerState): Rune = + result = state.buf[state.at] + inc state.at + +proc reconsume(state: var CSSTokenizerState) = + dec state.at + +func peek(state: CSSTokenizerState, i: int): Rune = + return state.buf[state.at + i] + +proc has(state: var CSSTokenizerState, i: int): bool = + if state.at + i >= state.buf.len and not state.stream.atEnd(): + state.buf &= state.stream.readLine().toRunes() & Rune('\n') + return state.at + i < state.buf.len + +func curr(state: CSSTokenizerState): Rune = + return state.buf[state.at] + +proc isValidEscape*(state: var CSSTokenizerState): bool = + return state.has(1) and state.curr() == Rune('\\') and state.peek(1) != Rune('\n') + +proc has(state: var CSSTokenizerState): bool = + if state.at >= state.buf.len and not state.stream.atEnd(): + state.buf &= state.stream.readLine().toRunes() & Rune('\n') + return state.at < state.buf.len + +proc startsWithIdentifier*(state: var CSSTokenizerState): bool = + if not state.has(): + return false + + if isNameStartCodePoint(state.curr()): + return true + if state.curr() == Rune('-'): + if state.has(1) and state.peek(1).isNameStartCodePoint(): + return true + if state.isValidEscape(): + return true + return false + elif state.curr() == Rune('\\'): + return state.isValidEscape() + + return false + +proc startsWithNumber*(state: var CSSTokenizerState): bool = + if state.has(): + case state.curr() + of Rune('+'), Rune('-'): + if state.has(1): + if isDigitAscii(state.peek(1)): + return true + elif state.peek(1) == Rune('.'): + if state.has(2) and isDigitAscii(state.peek(2)): + return true + of Rune('.'): + if isDigitAscii(state.peek(1)): + return true + elif isDigitAscii(state.curr()): + return true + else: + return false + return false + +proc consumeEscape(state: var CSSTokenizerState): Rune = + let r = state.consume() + var num = hexValue(r) + if num != -1: + var i = 0 + while state.has() and i <= 5: + let r = state.consume() + if hexValue(r) == -1: + state.reconsume() + break + num *= 0x10 + num += hexValue(r) + inc i + if num == 0 or num > 0x10FFFF or num in {0xD800..0xDFFF}: + return Rune(0xFFFD) + else: + return Rune(num) + else: + return r + +proc consumeString(state: var CSSTokenizerState): CSSToken = + var s: seq[Rune] + state.reconsume() + let ending = state.consume() + + while state.has(): + let r = state.consume() + case r + of Rune('\n'): + return CSSToken(tokenType: CSS_BAD_STRING_TOKEN) + of Rune('\\'): + s &= consumeEscape(state) + elif r == ending: + break + else: + s &= r + return CSSToken(tokenType: CSS_STRING_TOKEN, value: s) + +proc consumeName(state: var CSSTokenizerState): seq[Rune] = + while state.has(): + let r = state.consume() + if state.isValidEscape(): + result &= state.consumeEscape() + elif isNameCodePoint(r): + result &= r + else: + state.reconsume() + return result + +proc consumeNumberSign(state: var CSSTokenizerState): CSSToken = + if state.has(): + let r = state.consume() + if isNameCodePoint(r) or state.isValidEscape(): + result = CSSToken(tokenType: CSS_HASH_TOKEN) + if state.startsWithIdentifier(): + result.tflaga = TFLAGA_ID + + state.reconsume() + result.value = consumeName(state) + else: + let r = state.consume() + result = CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) + +proc consumeNumber(state: var CSSTokenizerState): tuple[t: tflagb, val: float64] = + var t = TFLAGB_INTEGER + var repr: seq[Rune] + if state.has(): + if state.curr() == Rune('+') or state.curr() == Rune('-'): + repr &= state.consume() + + while state.has() and isDigitAscii(state.curr()): + repr &= state.consume() + + if state.has(1): + if state.curr() == Rune('.') and isDigitAscii(state.peek(1)): + repr &= state.consume() + repr &= state.consume() + t = TFLAGB_NUMBER + while state.has() and isDigitAscii(state.curr()): + repr &= state.consume() + + if state.has(1): + if state.curr() == Rune('E') or state.curr() == Rune('e'): + var j = 2 + if state.peek(1) == Rune('-') or state.peek(1) == Rune('+'): + inc j + if state.has(j) and isDigitAscii(state.peek(j)): + while j > 0: + repr &= state.consume() + dec j + + while state.has() and isDigitAscii(state.curr()): + repr &= state.consume() + + let val = toNumber(repr) + return (t, val) + +proc consumeNumericToken(state: var CSSTokenizerState): CSSToken = + let num = state.consumeNumber() + if state.startsWithIdentifier(): + result = CSSToken(tokenType: CSS_DIMENSION_TOKEN, nvalue: num.val, tflagb: num.t) + result.unit = state.consumeName() + elif state.has() and state.curr() == Rune('%'): + discard state.consume() + result = CSSToken(tokenType: CSS_PERCENTAGE_TOKEN, nvalue: num.val) + else: + result = CSSToken(tokenType: CSS_NUMBER_TOKEN, nvalue: num.val, tflagb: num.t) + +proc consumeBadURL(state: var CSSTokenizerState) = + while state.has(1): + let r = state.consume() + case r + of Rune(')'): + return + elif state.isValidEscape(): + discard state.consumeEscape() + else: discard + +proc consumeURL(state: var CSSTokenizerState): CSSToken = + result = CSSToken(tokenType: CSS_URL_TOKEN) + while state.has(1) and state.peek(1).isWhitespace(): + discard state.consume() + + while state.has(1): + let r = state.consume() + case r + of Rune(')'): + return result + of Rune('"'), Rune('\''), Rune('('): + state.consumeBadURL() + return CSSToken(tokenType: CSS_BAD_URL_TOKEN) + of Rune('\\'): + state.reconsume() + if state.isValidEscape(): + result.value &= state.consumeEscape() + else: + state.consumeBadURL() + return CSSToken(tokenType: CSS_BAD_URL_TOKEN) + elif r.isWhitespace(): + while state.has(1) and state.peek(1).isWhitespace(): + discard state.consume() + else: + result.value &= r + +proc consumeIdentLikeToken(state: var CSSTokenizerState): CSSToken = + let s = state.consumeName() + if s.toAsciiLower() == "url" and state.has() and state.curr() == Rune('('): + discard state.consume() + while state.has(1) and state.curr().isWhitespace() and state.peek(1).isWhitespace(): + discard state.consume() + if state.curr() == Rune('\'') or state.curr() == Rune('"') or state.curr().isWhitespace(): + return CSSToken(tokenType: CSS_FUNCTION_TOKEN, value: s) + else: + return state.consumeURL() + elif state.has() and state.curr() == Rune('('): + discard state.consume() + return CSSToken(tokenType: CSS_FUNCTION_TOKEN, value: s) + + return CSSToken(tokenType: CSS_IDENT_TOKEN, value: s) + +proc consumeComments(state: var CSSTokenizerState) = + if state.has(2) and state.peek(1) == Rune('/') and state.peek(2) == Rune('*'): + discard state.consume() + discard state.consume() + while state.has(2) and not (state.peek(1) == Rune('*') and state.peek(2) == Rune('/')): + discard state.consume() + + if state.has(2): + discard state.consume() + if state.has(1): + discard state.consume() + +proc consumeToken(state: var CSSTokenizerState): CSSToken = + state.consumeComments() + let r = state.consume() + case r + of Rune('\n'), Rune('\t'), Rune(' '), Rune('\f'), Rune('\r'): + while state.has() and state.curr().isWhitespace(): + discard state.consume() + return CSSToken(tokenType: CSS_WHITESPACE_TOKEN) + of Rune('"'), Rune('\''): + return consumeString(state) + of Rune('#'): + return consumeNumberSign(state) + of Rune('('): + return CSSToken(tokenType: CSS_LPAREN_TOKEN) + of Rune(')'): + return CSSToken(tokenType: CSS_RPAREN_TOKEN) + of Rune('['): + return CSSToken(tokenType: CSS_LBRACKET_TOKEN) + of Rune(']'): + return CSSToken(tokenType: CSS_RBRACKET_TOKEN) + of Rune('{'): + return CSSToken(tokenType: CSS_LBRACE_TOKEN) + of Rune('}'): + return CSSToken(tokenType: CSS_RBRACE_TOKEN) + of Rune(','): + return CSSToken(tokenType: CSS_COMMA_TOKEN) + of Rune(':'): + return CSSToken(tokenType: CSS_COLON_TOKEN) + of Rune(';'): + return CSSToken(tokenType: CSS_SEMICOLON_TOKEN) + of Rune('+'): + if state.startsWithNumber(): + state.reconsume() + return state.consumeNumericToken() + else: + return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) + of Rune('-'): + if state.startsWithNumber(): + state.reconsume() + return state.consumeNumericToken() + else: + if state.has(2) and state.peek(1) == Rune('-') and state.peek(2) == Rune('>'): + discard state.consume() + discard state.consume() + return CSSToken(tokenType: CSS_CDC_TOKEN) + elif state.startsWithIdentifier(): + state.reconsume() + result = state.consumeIdentLikeToken() + eprint result.value + else: + return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) + of Rune('.'): + if state.startsWithNumber(): + state.reconsume() + return state.consumeNumericToken() + else: + return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) + of Rune('<'): + if state.has(3) and state.peek(1) == Rune('!') and state.peek(2) == Rune('-') and state.peek(3) == Rune('-'): + discard state.consume() + discard state.consume() + discard state.consume() + return CSSToken(tokenType: CSS_CDO_TOKEN) + else: + return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) + of Rune('@'): + if state.startsWithIdentifier(): + let name = state.consumeName() + return CSSToken(tokenType: CSS_AT_KEYWORD_TOKEN, value: name) + else: + return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) + elif isDigitAscii(r): + state.reconsume() + return state.consumeNumericToken() + elif isNameStartCodePoint(r): + state.reconsume() + return state.consumeIdentLikeToken() + else: + return CSSToken(tokenType: CSS_DELIM_TOKEN, rvalue: r) + +proc tokenizeCSS*(inputStream: Stream): seq[CSSParsedItem] = + var state: CSSTokenizerState + state.stream = inputStream + state.buf = state.stream.readLine().toRunes() + while state.has(): + result.add(state.consumeToken()) + eprint "consume token", CSSToken(result[^1]).tokenType + + inputStream.close() + +proc consume(state: var CSSParseState): CSSParsedItem = + result = state.tokens[state.at] + inc state.at + +proc reconsume(state: var CSSParseState) = + dec state.at + +func has(state: CSSParseState, i: int): bool = + return state.at + i < state.tokens.len + +func curr(state: CSSParseState): CSSParsedItem = + return state.tokens[state.at] + +func has(state: CSSParseState): bool = + return state.at < state.tokens.len + +proc consumeSimpleBlock(state: var CSSParseState): CSSSimpleBlock = + state.reconsume() + let t = CSSToken(state.consume()) + var ending: CSSTokenType + case t.tokenType + of CSS_LBRACE_TOKEN: ending = CSS_RBRACE_TOKEN + of CSS_LPAREN_TOKEN: ending = CSS_RPAREN_TOKEN + of CSS_LBRACKET_TOKEN: ending = CSS_RBRACKET_TOKEN + else: raise newException(Exception, "Parse error!") + + result = CSSSimpleBlock(token: t) + while state.at < state.tokens.len: + let t = state.consume() + if t == ending: + return result + else: + if t == CSS_LBRACE_TOKEN or t == CSS_LBRACKET_TOKEN or t == CSS_LPAREN_TOKEN: + result.value.add(state.consumeSimpleBlock()) + else: + result.value.add(CSSComponentValue(t)) + return result + +proc consumeComponentValue(state: var CSSParseState): CSSComponentValue = + let t = state.consume() + if t == CSS_LBRACE_TOKEN or t == CSS_LBRACKET_TOKEN or t == CSS_LPAREN_TOKEN: + return state.consumeSimpleBlock() + return CSSComponentValue(t) + +proc consumeQualifiedRule(state: var CSSParseState): Option[CSSQualifiedRule] = + var r = CSSQualifiedRule() + while state.has(): + let t = state.consume() + if t of CSSSimpleBlock: + r.oblock = state.consumeSimpleBlock() + return some(r) + elif t == CSS_LBRACE_TOKEN: + r.oblock = state.consumeSimpleBlock() + return some(r) + else: + state.reconsume() + r.prelude.add(state.consumeComponentValue()) + return none(CSSQualifiedRule) + + +proc consumeAtRule(state: var CSSParseState): CSSAtRule = + let t = CSSToken(state.consume()) + result = CSSAtRule(name: t.value) + + while state.at < state.tokens.len: + let t = state.consume() + if t of CSSSimpleBlock: + result.oblock = state.consumeSimpleBlock() + elif t == CSS_SEMICOLON_TOKEN: + return result + elif t == CSS_LBRACE_TOKEN: + result.oblock = state.consumeSimpleBlock() + return result + else: + state.reconsume() + result.prelude.add(state.consumeComponentValue()) + +proc consumeDeclaration(state: var CSSParseState): Option[CSSDeclaration] = + let t = CSSToken(state.consume()) + var decl = CSSDeclaration(name: t.value) + while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: + discard state.consume() + if not state.has() or state.curr() != CSS_COLON_TOKEN: + return none(CSSDeclaration) + discard state.consume() + eprint state.tokens.len + while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: + eprint "ok...", CSSToken(state.curr()).tokenType + discard state.consume() + + while state.has(): + eprint "ok..." + decl.value.add(state.consumeComponentValue()) + eprint "helloo?", decl.value.len + + var i = decl.value.len - 1 + var j = 2 + var k = 0 + var l = 0 + while i >= 0 and j > 0: + if decl.value[i] != CSS_WHITESPACE_TOKEN: + dec j + if decl.value[i] == CSS_IDENT_TOKEN and k == 0: + if CSSToken(decl.value[i]).value.toAsciiLower() == "important": + inc k + l = i + elif k == 1 and decl.value[i] == CSS_DELIM_TOKEN: + if CSSToken(decl.value[i]).rvalue == Rune('!'): + decl.important = true + decl.value.del(l) + decl.value.del(i) + break + dec i + + while decl.value.len > 0 and decl.value[^1] == CSS_WHITESPACE_TOKEN: + decl.value.del(decl.value.len - 1) + return some(decl) + +#> Note: Despite the name, this actually parses a mixed list of declarations +#> and at-rules, as CSS 2.1 does for @page. Unexpected at-rules (which could be +#> all of them, in a given context) are invalid and should be ignored by the +#> consumer. +#Wow this is ugly. +proc consumeListOfDeclarations(state: var CSSParseState): seq[CSSParsedItem] = + while state.has(): + let t = state.consume() + if t == CSS_wHITESPACE_TOKEN or t == CSS_SEMICOLON_TOKEN: + continue + elif t == CSS_AT_KEYWORD_TOKEN: + state.reconsume() + result.add(state.consumeAtRule()) + elif t == CSS_IDENT_TOKEN: + var tempList: seq[CSSParsedItem] + tempList.add(CSSToken(t)) + while state.has() and state.curr() != CSS_SEMICOLON_TOKEN: + tempList.add(state.consumeComponentValue()) + + var tempState = CSSParseState(at: 0, tokens: tempList) + let decl = tempState.consumeDeclaration() + if decl.isSome: + result.add(decl.get) + else: + state.reconsume() + if state.curr() != CSS_SEMICOLON_TOKEN: + discard state.consumeComponentValue() + +proc consumeListOfRules(state: var CSSParseState): seq[CSSRule] = + while state.at < state.tokens.len: + let t = state.consume() + if t == CSS_WHITESPACE_TOKEN: + continue + elif t == CSS_CDO_TOKEN or t == CSS_CDC_TOKEN: + if state.top_level: + continue + else: + state.reconsume() + let q = state.consumeQualifiedRule() + if q.isSome: + result.add(q.get) + elif t == CSS_AT_KEYWORD_TOKEN: + state.reconsume() + result.add(state.consumeAtRule()) + else: + state.reconsume() + let q = state.consumeQualifiedRule() + if q.isSome: + result.add(q.get) + +proc consumeFunction(state: var CSSParseState): CSSFunction = + while state.at < state.tokens.len: + let t = state.consume() + if t == CSS_RPAREN_TOKEN: + return result + else: + state.reconsume() + result.value.add(state.consumeComponentValue()) + +proc parseStylesheet(state: var CSSParseState): CSSStylesheet = + state.top_level = true + result.value.add(state.consumeListOfRules()) + +proc parseStylesheet(inputStream: Stream): CSSStylesheet = + var state = CSSParseState() + state.tokens = tokenizeCSS(inputStream) + return state.parseStylesheet() + +proc parseListOfRules(state: var CSSParseState): seq[CSSRule] = + return state.consumeListOfRules() + +proc parseListOfRules(inputStream: Stream): seq[CSSRule] = + var state = CSSParseState() + state.tokens = tokenizeCSS(inputStream) + return state.parseListOfRules() + +proc parseRule(state: var CSSParseState): CSSRule = + while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: + discard state.consume() + if not state.has(): + raise newException(SyntaxError, "EOF reached!") + + if state.curr() == CSS_AT_KEYWORD_TOKEN: + result = state.consumeAtRule() + else: + let q = state.consumeQualifiedRule() + if q.isSome: + result = q.get + else: + raise newException(SyntaxError, "No qualified rule found!") + + while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: + discard state.consume() + if state.has(): + raise newException(SyntaxError, "EOF not reached!") + +proc parseRule(inputStream: Stream): CSSRule = + var state = CSSParseState() + state.tokens = tokenizeCSS(inputStream) + return state.parseRule() + +proc parseDeclaration(state: var CSSParseState): CSSDeclaration = + while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: + discard state.consume() + + if not state.has() or state.curr() != CSS_IDENT_TOKEN: + raise newException(SyntaxError, "No ident token found!") + + let d = state.consumeDeclaration() + if d.isSome: + return d.get + + raise newException(SyntaxError, "No declaration found!") + +proc parseCSSDeclaration*(inputStream: Stream): CSSDeclaration = + var state = CSSParseState() + state.tokens = tokenizeCSS(inputStream) + return state.parseDeclaration() + +proc parseListOfDeclarations(state: var CSSParseState): seq[CSSParsedItem] = + return state.consumeListOfDeclarations() + +proc parseCSSListOfDeclarations*(inputStream: Stream): seq[CSSParsedItem] = + var state = CSSParseState() + state.tokens = tokenizeCSS(inputStream) + return state.parseListOfDeclarations() + +proc parseComponentValue(state: var CSSParseState): CSSComponentValue = + while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: + discard state.consume() + if not state.has(): + raise newException(SyntaxError, "EOF reached!") + + result = state.consumeComponentValue() + + while state.has() and state.curr() == CSS_WHITESPACE_TOKEN: + discard state.consume() + if state.has(): + raise newException(SyntaxError, "EOF not reached!") + +proc parseCSSComponentValue*(inputStream: Stream): CSSComponentValue = + var state = CSSParseState() + state.tokens = tokenizeCSS(inputStream) + return state.parseComponentValue() + +proc parseListOfComponentValues(state: var CSSParseState): seq[CSSComponentValue] = + while state.has(): + result.add(state.consumeComponentValue()) + +proc parseCSSListOfComponentValues*(inputStream: Stream): seq[CSSComponentValue] = + var state = CSSParseState() + state.tokens = tokenizeCSS(inputStream) + return state.parseListOfComponentValues() + +proc parseCommaSeparatedListOfComponentValues(state: var CSSParseState): seq[CSSComponentValue] = + while state.has(1): + let cvl = state.consumeComponentValue() + if cvl != CSS_COMMA_TOKEN: + result.add(state.consumeComponentValue()) + +proc parseCommaSeparatedListOfComponentValues(inputStream: Stream): seq[CSSComponentValue] = + var state = CSSParseState() + state.tokens = tokenizeCSS(inputStream) + return state.parseCommaSeparatedListOfComponentValues() + +proc printc*(c: CSSComponentValue) = + if c of CSSToken: + case CSSToken(c).tokenType: + of CSS_FUNCTION_TOKEN, CSS_AT_KEYWORD_TOKEN, CSS_URL_TOKEN: + eprint CSSToken(c).tokenType, CSSToken(c).value + of CSS_HASH_TOKEN: + stderr.write('#' & $CSSToken(c).value) + of CSS_IDENT_TOKEN: + stderr.write(CSSToken(c).value) + of CSS_STRING_TOKEN: + stderr.write("\"" & $CSSToken(c).value & "\"") + of CSS_DELIM_TOKEN: + stderr.write(CSSToken(c).rvalue) + of CSS_DIMENSION_TOKEN: + eprint CSSToken(c).tokenType, CSSToken(c).nvalue, "unit", CSSToken(c).unit, CSSToken(c).tflagb + of CSS_NUMBER_TOKEN: + stderr.write($CSSToken(c).nvalue & $CSSToken(c).unit) + of CSS_PERCENTAGE_TOKEN: + stderr.write($CSSToken(c).nvalue & "%") + of CSS_COLON_TOKEN: + stderr.write(":") + of CSS_WHITESPACE_TOKEN: + stderr.write(" ") + of CSS_SEMICOLON_TOKEN: + stderr.write(";\n") + of CSS_COMMA_TOKEN: + stderr.write(",") + else: + eprint CSSToken(c).tokenType + elif c of CSSDeclaration: + stderr.write(CSSDeclaration(c).name) + stderr.write(": ") + for s in CSSDeclaration(c).value: + printc(s) + stderr.write(";\n") + elif c of CSSFunction: + eprint "FUNCTION", CSSFunction(c).name + for s in CSSFunction(c).value: + printc(s) + elif c of CSSSimpleBlock: + case CSSSimpleBlock(c).token.tokenType + of CSS_LBRACE_TOKEN: eprint "{" + of CSS_LPAREN_TOKEN: stderr.write("(") + of CSS_LBRACKET_TOKEN: stderr.write("[") + else: discard + for s in CSSSimpleBlock(c).value: + printc(s) + case CSSSimpleBlock(c).token.tokenType + of CSS_LBRACE_TOKEN: eprint "}" + of CSS_LPAREN_TOKEN: stderr.write(")") + of CSS_LBRACKET_TOKEN: stderr.write("]") + else: discard + + +proc parseCSS*(inputStream: Stream) = + let ss = inputStream.parseStylesheet() + for v in ss.value: + if v of CSSAtRule: + eprint CSSAtRule(v).name + else: + for c in CSSQualifiedRule(v).prelude: + printc(c) + case v.oblock.token.tokenType + of CSS_LBRACE_TOKEN: eprint "\n{" + of CSS_LPAREN_TOKEN: eprint "(" + of CSS_LBRACKET_TOKEN: eprint "[" + else: discard + for s in v.oblock.value: + printc(s) + case v.oblock.token.tokenType + of CSS_LBRACE_TOKEN: eprint "\n}" + of CSS_LPAREN_TOKEN: eprint ")" + of CSS_LBRACKET_TOKEN: eprint "]" + else: discard diff --git a/src/default.css b/src/default.css deleted file mode 100644 index db11f036..00000000 --- a/src/default.css +++ /dev/null @@ -1,20 +0,0 @@ -head { - display: none -} - -a, abbr, b, bdo, br, button, cite, code, -del, dfn, em, font, i, img, ins, input, -iframe, kbd, label, map, object, q, samp, -script, select, small, span, strong, sub, sup, -textarea, tt, var, font, iframe, u, s, strike, -frame, img, input { - display: block -} - -li { - display: list-item -} - -br::before { - content: "\\" -} diff --git a/src/dom.nim b/src/dom.nim index 9585456a..c9dc367e 100644 --- a/src/dom.nim +++ b/src/dom.nim @@ -48,6 +48,8 @@ type Document* = ref DocumentObj DocumentObj = object of NodeObj location*: Uri + id_elements*: Table[string, Element] + class_elements*: Table[string, seq[Element]] CharacterData* = ref CharacterDataObj CharacterDataObj = object of NodeObj @@ -329,3 +331,28 @@ func getAttrValue*(element: Element, s: string): string = if attr != nil: return attr.value return "" + + +#type +# SelectorType = enum +# TYPE_SELECTOR, ID_SELECTOR, ATTR_SELECTOR, CLASS_SELECTOR, CHILD_SELECTOR, +# UNIVERSAL_SELECTOR +# +# Selector = object +# t: SelectorType +# s0: string +# s1: string +# +#proc querySelector*(document: Document, q: string): seq[Element] = +# #let ss = newStringStream(q) +# #let cvals = parseCSSListOfComponentValues(ss) +# #var selectors: seq[Selector] +# return +# +# #for cval in cvals: +# # if cval of CSSToken: +# # case CSSToken(cval).tokenType +# # of CSS_DELIM_TOKEN: +# # if cval.rvalue == Rune('*'): +# # selectors.add(Selector(t)) +# # printc(cval) diff --git a/src/entity b/src/entity deleted file mode 100755 index 05f6705f..00000000 --- a/src/entity +++ /dev/null Binary files differdiff --git a/src/entity.nim b/src/entity.nim index 3ebe9df0..dcac258e 100644 --- a/src/entity.nim +++ b/src/entity.nim @@ -1,18 +1,29 @@ import radixtree import json -import tables -import strutils -import unicode -import twtstr -proc genEntityMap(): RadixTree[string] = - let entity = staticRead"entity.json" - let entityJson = parseJson(entity) - var entityMap = newRadixTree[string]() +when defined(small): + proc genEntityMap(data: seq[tuple[a: string, b: string]]): StaticRadixTree[string] = + result = newStaticRadixTree[string]() + for pair in data: + result[pair.a] = pair.b - for k, v in entityJson: - entityMap[k.substr(1)] = v{"characters"}.getStr() + proc genEntityHashMap(): seq[tuple[a: string, b: string]] = + let entity = staticRead"../res/entity.json" + let entityJson = parseJson(entity) - return entityMap + for k, v in entityJson: + result.add((k.substr(1), v{"characters"}.getStr())) + const entityHashMap = genEntityHashMap() + let entityMap* = genEntityMap(entityHashMap) #TODO: use refs here +else: + import tables + proc genEntityMap(): StaticRadixTree[string] = + let entity = staticRead"../res/entity.json" + let entityJson = parseJson(entity) + var entityMap = newStaticRadixTree[string]() -const entityMap* = genEntityMap() + for k, v in entityJson: + entityMap[k.substr(1)] = v{"characters"}.getStr() + + return entityMap + const entityMap* = genEntityMap() diff --git a/src/enums.nim b/src/enums.nim index a0606332..62e96e4c 100644 --- a/src/enums.nim +++ b/src/enums.nim @@ -61,9 +61,9 @@ type CSS_HASH_TOKEN, CSS_STRING_TOKEN, CSS_BAD_STRING_TOKEN, CSS_URL_TOKEN, CSS_BAD_URL_TOKEN, CSS_DELIM_TOKEN, CSS_NUMBER_TOKEN, CSS_PERCENTAGE_TOKEN, CSS_DIMENSION_TOKEN, CSS_WHITESPACE_TOKEN, CSS_CDO_TOKEN, CSS_CDC_TOKEN, - CSS_COLON_TOKEN, CSS_SEMICOLON_TOKEN, CSS_COMMA_TOKEN, CSS_OBRACKET_TOKEN, - CSS_CBRACKET_TOKEN, CSS_OPAREN_TOKEN, CSS_CPAREN_TOKEN, CSS_OBRACE_TOKEN, - CSS_CBRACE_TOKEN + CSS_COLON_TOKEN, CSS_SEMICOLON_TOKEN, CSS_COMMA_TOKEN, CSS_RBRACKET_TOKEN, + CSS_LBRACKET_TOKEN, CSS_LPAREN_TOKEN, CSS_RPAREN_TOKEN, CSS_LBRACE_TOKEN, + CSS_RBRACE_TOKEN const DisplayInlineTags* = { TAG_A, TAG_ABBR, TAG_B, TAG_BDO, TAG_BR, TAG_BUTTON, TAG_CITE, TAG_CODE, @@ -86,13 +86,6 @@ const DisplayTableColumnTags* = { TAG_COL } -const DisplayBlockTags* = { - TAG_ADDRESS, TAG_BLOCKQUOTE, TAG_CENTER, TAG_DEL, TAG_DIR, TAG_DIV, TAG_DL, - TAG_FIELDSET, TAG_FORM, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, - TAG_HR, TAG_INS, TAG_MENU, TAG_NOFRAMES, TAG_NOSCRIPT, TAG_OL, TAG_P, TAG_PRE, - TAG_TABLE, TAG_UL, TAG_CENTER, TAG_DIR, TAG_MENU, TAG_NOFRAMES, TAG_BODY, -} - const SelfClosingTagTypes* = { TAG_LI, TAG_P } diff --git a/src/parser.nim b/src/htmlparser.nim index 724d2b30..86065ef7 100644 --- a/src/parser.nim +++ b/src/htmlparser.nim @@ -9,7 +9,6 @@ import enums import twtstr import dom import radixtree -import style import entity type @@ -27,9 +26,6 @@ type parentNode: Node textNode: Text - CSSParseState = object - in_comment: bool - #func newHtmlElement(tagType: TagType, parentNode: Node): HtmlElement = # case tagType # of TAG_INPUT: result = new(HtmlInputElement) @@ -159,7 +155,7 @@ proc getescapecmd(buf: string, at: var int): string = if not entityMap.hasPrefix(s, n): break let pn = n - n = entityMap[s, n] + n = entityMap{s, n} if n != pn: s = "" inc i @@ -320,6 +316,12 @@ proc processDocumentStartElement(state: var HTMLParseState, element: Element, ta else: discard processDocumentStartNode(state, element) + if element.ownerDocument != nil: + for c in element.classList: + element.ownerDocument.id_elements[c] = element + if not (c in element.ownerDocument.class_elements): + element.ownerDocument.class_elements[c] = newSeq[Element]() + element.ownerDocument.class_elements[c].add(element) if element.tagType in VoidTagTypes: processDocumentEndNode(state) @@ -467,89 +469,3 @@ proc parseHtml*(inputStream: Stream): Document = inputStream.close() return document - -proc consumeCSSString(state: var CSSParseState, line: seq[Rune], at: var int): CSSToken = - var s: seq[Rune] - let ending = line[at] - inc at - if at >= line.len: - return CSSToken(tokenType: CSS_STRING_TOKEN, value: s) - - while line[at] != ending: - inc at - - if at >= line.len: - return CSSToken(tokenType: CSS_STRING_TOKEN, value: s) - - s &= line[at] - - case line[at] - of Rune('\n'): - return CSSToken(tokenType: CSS_BAD_STRING_TOKEN) - of Rune('\\'): - inc at - if at > line.len: - break - var num = hexValue(line[at]) - if num != -1: - let ca = at - inc at - while at < line.len and hexValue(line[at]) != -1 and ca - at <= 5: - num *= 0x10 - num += hexValue(line[at]) - inc at - if num == 0 or num > 0x10FFFF or num in {0xD800..0xDFFF}: - s &= Rune(0xFFFD) - else: - s &= Rune(num) - else: discard - -proc consumeCSSNumberSign(state: var CSSParseState, line: seq[Rune], at: var int): CSSToken = - inc at - if at < line.len: - if isNameCodePoint(line[at]): - discard - else: - discard - - -proc consumeCSSToken(state: var CSSParseState, line: seq[Rune], at: var int): CSSToken = - case line[at] - of Rune('\n'), Rune('\t'), Rune(' '): - while at < line.len and line[at].isWhitespace(): - inc at - return CSSToken(tokenType: CSS_WHITESPACE_TOKEN) - of Rune('"'): - return consumeCSSString(state, line, at) - of Rune('#'): - return consumeCSSNumberSign(state, line, at) - else: inc at - -proc tokenizeCSS*(inputStream: Stream): seq[CSSToken] = - var line: seq[Rune] - var state: CSSParseState - while not inputStream.atEnd(): - line = inputStream.readLine().toRunes() - var cline: seq[Rune] = @[] - var lfc = false - for r in line: - case r - of Rune('\r'), Rune('\f'), Rune('\n'): - lfc = true - of Rune(0), Rune(0xD800)..Rune(0xDFFF): - cline &= Rune(0xFFFD) - else: - if lfc: - cline &= Rune('\n') - cline &= r - - cline &= Rune('\n') - var lat = 0 - while lat < cline.len: - result.add(consumeCSSToken(state, cline, lat)) - - inputStream.close() - -proc parseCSS*(inputStream: Stream) = - for t in tokenizeCSS(inputStream): - eprint t.tokenType diff --git a/src/main.nim b/src/main.nim index bc212ef0..95f1f3cd 100644 --- a/src/main.nim +++ b/src/main.nim @@ -8,7 +8,9 @@ import termattrs import buffer import twtio import config -import parser +import htmlparser +import dom +import style let clientInstance = newHttpClient() proc loadRemotePage*(url: string): string = @@ -44,6 +46,9 @@ proc main*() = let uri = parseUri(paramStr(1)) buffers.add(buffer) buffer.document = parseHtml(getPageUri(uri)) + #discard buffer.document.querySelector("#hi.a[title=\"test\"]") + var box = CSSBox() + applyProperties(box, "color: #090; line-height: 1.2") buffer.setLocation(uri) buffer.renderHtml() var lastUri = uri diff --git a/src/radixtree.nim b/src/radixtree.nim index b04e1c22..0d9db3ab 100644 --- a/src/radixtree.nim +++ b/src/radixtree.nim @@ -1,25 +1,198 @@ # Radix tree implementation, with some caveats: # * insertion takes forever, so try to insert only during compile-time # * it isn't that much faster than a hash table, even when used for e.g. parsing +# +# Update: now it also has a version using references. Should be somewhat faster +# at the cost of having to initialize it every time the program is started. -import tables import strutils import json +import tables type - RadixNode[T] = object - children*: Table[string, int] + RadixPair[T] = tuple[k: string, v: RadixNode[T]] + + RadixNode*[T] = ref object + children*: seq[RadixPair[T]] case leaf*: bool of true: value*: T of false: discard - RadixTree*[T] = object - nodes*: seq[RadixNode[T]] + StaticRadixPair = tuple[k: string, v: int] + StaticRadixPairSeq = seq[StaticRadixPair] + + StaticRadixNode[T] = object + children*: StaticRadixPairSeq + case leaf*: bool + of true: value*: T + of false: discard + + StaticRadixTree*[T] = object + nodes*: seq[StaticRadixNode[T]] + +func newStaticRadixTree*[T](): StaticRadixTree[T] = + result.nodes.add(StaticRadixNode[T](leaf: false)) + +func newRadixTree*[T](): RadixNode[T] = + new(result) + +func toRadixTree*[T](table: Table[string, T]): RadixNode[T] = + result = newRadixTree[T]() + for k, v in table: + result[k] = v + +# PairSeq Insert: theoretically this should only be called when there's no +# conflicts... TODO: so we should be able to just compare the first char? +# probably a bad idea... +proc `[]=`(pairseq: var StaticRadixPairSeq, k: string, v: int) = + var i = 0 + while i < pairseq.len: + if pairseq[i].k == k: + pairseq[i].v = v + return + inc i + + pairseq.add((k: k, v: v)) + +proc `[]=`[T](node: RadixNode[T], k: string, n: RadixNode[T]) = + var i = 0 + assert(k.len > 0) + while i < node.children.len: + if node.children[i].k == k: + node.children[i].v = n + return + inc i + + node.children.add((k: k, v: n)) -func newRadixTree*[T](): RadixTree[T] = - result.nodes.add(RadixNode[T](leaf: false)) +# PairSeq Lookup: since we're sure k is in pairseq, return the first match. +func `[]`(pairseq: StaticRadixPairSeq, k: string): int = + var i = 0 + while i < pairseq.len: + if pairseq[i].k[0] == k[0]: + return pairseq[i].v + inc i + + return -1 -proc `[]=`*[T](tree: var RadixTree[T], key: string, value: T) = +func `[]`[T](node: RadixNode[T], k: string): RadixNode[T] = + var i = 0 + while i < node.children.len: + if node.children[i].k[0] == k[0]: + return node.children[i].v + inc i + + return nil + +# getOrDefault: we have to compare the entire string but if it doesn't match +# exactly we can just return default. +func getOrDefault(pairseq: StaticRadixPairSeq, k: string, default: int): int = + var i = 0 + while i < pairseq.len: + if pairseq[i].k[0] == k[0]: + if k.len != pairseq[i].k.len: + return default + var j = 1 + while j < k.len: + if pairseq[i].k[j] != k[j]: + return default + inc j + return pairseq[i].v + inc i + return default + +func getOrDefault[T](node: RadixNode[T], k: string, default: RadixNode[T]): RadixNode[T] = + var i = 0 + while i < node.children.len: + if node.children[i].k[0] == k[0]: + if k.len != node.children[i].k.len: + debugecho "defa: ", k, " ", node.children[i].k + return default + var j = 1 + while j < k.len: + if node.children[i].k[j] != k[j]: + return default + inc j + return node.children[i].v + inc i + return default + +func getOrDefault[T](node: RadixNode[T], k: string, default: int): int = + var i = 0 + while i < node.children.len: + if node.children[i].k[0] == k[0]: + if k.len != node.children[i].k.len: + return default + var j = 1 + while j < k.len: + if node.children[i].k[j] != k[j]: + return default + inc j + return i + inc i + return default + +iterator keys(pairseq: StaticRadixPairSeq): string = + var i = 0 + while i < pairseq.len: + yield pairseq[i].k + inc i + +iterator keys*[T](node: RadixNode[T]): string = + var i = 0 + while i < node.children.len: + yield node.children[i].k + inc i + +# AKA `in`. +func contains(pairseq: StaticRadixPairSeq, k: string): bool = + var i = 0 + while i < pairseq.len: + if pairseq[i].k[0] == k[0]: + if k.len != pairseq[i].k.len: + return false + var j = 1 + while j < k.len: + if pairseq[i].k[j] != k[j]: + return false + inc j + return true + inc i + return false + +func contains[T](node: RadixNode[T], k: string): bool = + var i = 0 + while i < node.children.len: + if node.children[i].k[0] == k[0]: + if k.len != node.children[i].k.len: + return false + var j = 1 + while j < k.len: + if node.children[i].k[j] != k[j]: + return false + inc j + return true + inc i + return false + +# Delete proc: again we should be able to check for first char only... TODO? +proc del(pairseq: var StaticRadixPairSeq, k: string) = + var i = 0 + while i < pairseq.len: + if pairseq[i].k == k: + pairseq.del(i) + return + inc i + +proc add[T](node: RadixNode[T], k: string, v: T) = + node.children.add((k, RadixNode[T](leaf: true, value: v))) + +proc add[T](node: RadixNode[T], k: string) = + node.children.add((k, RadixNode[T](leaf: false))) + +# Insert: this is ugly and I'm not quite sure about what it does at all. Oh +# well. +proc `[]=`*[T](tree: var StaticRadixTree[T], key: string, value: T) = var n = 0 var p = 0 var i = 0 @@ -40,7 +213,7 @@ proc `[]=`*[T](tree: var RadixTree[T], key: string, value: T) = s = "" for k in tree.nodes[n].children.keys: - if s.len > 0 and k.startsWith(s[0]): + if s.len > 0 and k[0] == s[0]: p = n n = tree.nodes[n].children[k] t &= k @@ -49,7 +222,7 @@ proc `[]=`*[T](tree: var RadixTree[T], key: string, value: T) = # if first node, just add normally if n == 0: - tree.nodes.add(RadixNode[T](leaf: true, value: value)) + tree.nodes.add(StaticRadixNode[T](leaf: true, value: value)) tree.nodes[n].children[key] = int(tree.nodes.len - 1) else: i = 0 @@ -71,35 +244,146 @@ proc `[]=`*[T](tree: var RadixTree[T], key: string, value: T) = assert(i != 0) tree.nodes[p].children[key.substr(j, i - 1)] = int(tree.nodes.len) - tree.nodes.add(RadixNode[T](leaf: false)) + tree.nodes.add(StaticRadixNode[T](leaf: false)) tree.nodes[^1].children[t.substr(i)] = n tree.nodes[^1].children[key.substr(i)] = int(tree.nodes.len) - tree.nodes.add(RadixNode[T](leaf: true, value: value)) + tree.nodes.add(StaticRadixNode[T](leaf: true, value: value)) tree.nodes[p].children.del(nodeKey) else: # new is either substr of old or old is substr of new # new matches a node, so replace if key.len == t.len: let children = tree.nodes[n].children - tree.nodes[n] = RadixNode[T](leaf: true, value: value) + tree.nodes[n] = StaticRadixNode[T](leaf: true, value: value) tree.nodes[n].children = children elif i == j: # new is longer than the old, so add child to old tree.nodes[n].children[key.substr(i)] = int(tree.nodes.len) - tree.nodes.add(RadixNode[T](leaf: true, value: value)) + tree.nodes.add(StaticRadixNode[T](leaf: true, value: value)) elif i > 0: # new is shorter than old, so: # * add new to parent # * add old to new # * remove old from parent tree.nodes[p].children[key.substr(j, i - 1)] = int(tree.nodes.len) - tree.nodes.add(RadixNode[T](leaf: true, value: value)) + tree.nodes.add(StaticRadixNode[T](leaf: true, value: value)) tree.nodes[^1].children[t.substr(i)] = n tree.nodes[p].children.del(nodeKey) -func `[]`*[T](tree: RadixTree[T], key: string, at: int = 0): int = +# Non-static insert, for extra fun - and code duplication :( +proc `[]=`*[T](tree: RadixNode[T], key: string, value: T) = + var n = tree + var p: RadixNode[T] = nil + var i = 0 + var j = 0 + var k = 0 + var s = "" + var t = "" + var l = 0 + # find last matching node + while i < key.len: + s &= key[i] + inc i + let pk = n.getOrDefault(s, -1) + if pk != -1: + k = pk + p = n + n = n.children[k].v + t &= s + j = i + s = "" + + l = 0 + for ki in n.keys: + if s.len > 0 and ki[0] == s[0]: + p = n + n = n[ki] + t &= ki + k = l + break + inc l + + # TODO: this below could be a better algorithm for what we do above + # but I'm kinda scared of touching it + #n = tree + #i = 0 + #j = 0 + #k = 0 + #t = "" + #p = nil + + #var conflict = false + #while i < key.len: + # k = 0 + # for pk in n.keys: + # if pk[0] == key[i]: + # var l = 0 + # while l < pk.len and i + l < key.len: + # if pk[l] != key[i + l]: + # conflict = true + # break + # inc l + # if not conflict: + # p = n + # n = n.children[k].v + # t &= pk + # i += l + # j = i + # break + # inc k + # inc i + + + # if first node, just add normally + if n == tree: + tree.add(key, value) + else: + i = 0 + var conflict = false + # compare new key with the one we found so far + while i < t.len and i < key.len: + if key[i] == t[i]: + inc i + else: + conflict = true + break + + if conflict: + # conflict somewhere, so: + # * add new non-leaf to parent + # * add old to non-leaf + # * add new to non-leaf + # * remove old from parent + debugecho "conflict: ", i, " ", j, " ", t, " ", key, ": ", key.substr(j, i - 1) + p[key.substr(j, i - 1)] = RadixNode[T](leaf: false) + p.children[^1].v[t.substr(i)] = n + p.children[^1].v[key.substr(i)] = RadixNode[T](leaf: true, value: value) + p.children.del(k) + else: # new is either substr of old or old is substr of new + # new matches a node, so replace + if key.len == t.len: + p.children[k].v = RadixNode[T](leaf: true, value: value, children: n.children) + elif key.len > t.len: + # new is longer than the old, so add child to old + debugecho "longer: ", i, " ", j, " ", t, " ", key, ": ", key.substr(i) + n[key.substr(i)] = RadixNode[T](leaf: true, value: value) + else: + assert(i > 0) + # new is shorter than old, so: + # * add new to parent + # * add old to new + # * remove old from parent + debugecho "shorter: ", i, " ", j, " ", t, " ", key, ": ", key.substr(i) + p[key.substr(j, i - 1)] = RadixNode[T](leaf: true, value: value) + p.children[^1].v[t.substr(i)] = n + p.children.del(k) + +func `{}`*[T](tree: StaticRadixTree[T], key: string, at: int = 0): int = return tree.nodes[at].children.getOrDefault(key, at) -func hasPrefix*[T](tree: RadixTree[T], prefix: string, at: int = 0): bool = +func `{}`*[T](tree: RadixNode[T], key: string, at: RadixNode[T] = tree): RadixNode[T] = + return tree.getOrDefault(key, at) + +func hasPrefix*[T](tree: StaticRadixTree[T], prefix: string, at: int = 0): bool = var n = at var i = 0 var j = 0 @@ -127,25 +411,30 @@ func hasPrefix*[T](tree: RadixTree[T], prefix: string, at: int = 0): bool = return false -#tests -#var tree = newRadixTree[string]() -#tree.insert("hb", "abc") -#tree.insert("hi", "second") -#tree.insert("hia", "second") -#tree.insert("hia", "third") -#tree.insert("hiahhhooo", "two point fifth") -#tree.insert("hiahhho", "two point sixth") -#assert(tree.hasPrefix("h")) -#assert(tree.hasPrefix("hi")) -#assert(not tree.hasPrefix("hio")) -#assert(tree.hasPrefix("hiah")) -#assert(tree.hasPrefix("hiahhho")) -#assert(tree.hasPrefix("hiahhhooo")) -#assert(tree.lookup("hi", "error") != "error") -#assert(tree.lookup("hb", "error") != "error") -#assert(tree.lookup("hio", "error") == "error") -#assert(tree.lookup("hia", "error") != "error") -#assert(tree.lookup("hiahhhooo", "error") != "error") -#assert(tree.lookup("hiahhho", "error") != "error") -#assert(tree.lookup("hiahhhoo", "error") == "error") -#assert(tree.lookup("h", "error") == "error") +func hasPrefix*[T](tree: RadixNode[T], prefix: string, at: RadixNode[T] = tree): bool = + var n = at + var i = 0 + var j = 0 + var s = "" + while i < prefix.len: + s &= prefix[i] + inc i + if s in n: + n = n[s] + j = i + + if j == prefix.len: + return true + + for k in n.keys: + if prefix.len - j < k.len and k[0] == prefix[j]: + i = 1 + inc j + while j < prefix.len: + inc i + inc j + if k[i] != k[j]: + return false + return true + + return false diff --git a/src/style.nim b/src/style.nim index 7de1e767..e5bee647 100644 --- a/src/style.nim +++ b/src/style.nim @@ -1,6 +1,10 @@ -import enums +import streams import unicode +import enums +import cssparser +import twtio + type CSS2Properties* = ref object rawtext*: string @@ -28,15 +32,43 @@ type selected*: bool indent*: int - CSSToken* = object - case tokenType*: CSSTokenType - of CSS_IDENT_TOKEN, CSS_FUNCTION_TOKEN, CSS_AT_KEYWORD_TOKEN, - CSS_HASH_TOKEN, CSS_STRING_TOKEN, CSS_URL_TOKEN: - value*: seq[Rune] - tflaga*: bool #id / unrestricted - of CSS_DELIM_TOKEN: - rvalue*: Rune - of CSS_NUMBER_TOKEN, CSS_PERCENTAGE_TOKEN, CSS_DIMENSION_TOKEN: - ivalue*: int - tflagb*: bool #integer / number - else: discard + CSSRect* = object + x1*: int + y1*: int + x2*: int + y2*: int + + CSSBox* = ref object + display*: DisplayType + x*: int + y*: int + innerEdge*: CSSRect + paddingEdge*: CSSRect + borderEdge*: CSSRect + marginEdge*: CSSRect + parent*: CSSBox + color*: CSSColor + margintop*: int + marginbottom*: int + marginleft*: int + marginright*: int + margin*: int + +proc applyProperties*(box: var CSSBox, props: string) = + var decls = parseCSSListOfDeclarations(newStringStream(props)) + + for item in decls: + if item of CSSDeclaration: + let d = CSSDeclaration(item) + case $d.name + of "color": + if d.value.len > 0 and d.value[0] of CSSToken and + CSSToken(d.value[0]).tokenType == CSS_HASH_TOKEN: + box.color = toColor(CSSToken(d.value[0]).value) + of "margin-top": + if d.value.len > 0 and d.value[0] of CSSToken: + if CSSToken(d.value[0]).tokenType == CSS_PERCENTAGE_TOKEN: + discard + #box.margintop = CSSToken(d.value[0]).nvalue #TODO represent percentages + else: + printc(d) diff --git a/src/twtio.nim b/src/twtio.nim index 0fdbed7c..20c8c527 100644 --- a/src/twtio.nim +++ b/src/twtio.nim @@ -1,6 +1,7 @@ import terminal import tables import unicode +import strutils import twtstr import config @@ -42,37 +43,50 @@ proc getLinedAction*(s: string): TwtAction = return NO_ACTION proc readLine*(prompt: string, current: var string, termwidth: int): bool = - var news = current.toRunes() let maxlen = termwidth - prompt.len + let promptwidth = prompt.width() + var news = current.toRunes() var s = "" var feedNext = false var escNext = false var comp = false - var compi = 0 + var compi = composeRemap var compa = 0 var comps = "" var cursor = news.len var shift = 0 + var redraw = true + printesc(prompt) while true: - let rl = news.len - if cursor < shift: - shift = max(cursor - 1, 0) - else: - while news.substr(shift, shift + cursor).width() > maxlen - 1: - shift += news[^1].width() + if redraw: + var displen = maxlen - 1 + if cursor >= shift: + while news.substr(shift, cursor).width() > maxlen - 1: + shift += 1 + while news.substr(shift, shift + displen).width() > maxlen - 1: + displen -= 1 - eraseLine() - printesc(prompt & $news.substr(shift, shift + maxlen - 1)) + shift = max(0, min(cursor - 1, shift)) - print('\r') - cursorForward(prompt.len + news.substr(shift, cursor).width()) + print('\r') + cursorForward(promptwidth) + let os = $news.substr(shift, shift + displen) + printesc(os) + print(' '.repeat(max(displen - os.width(), 0))) + + print('\r') + cursorForward(promptwidth + news.substr(shift, cursor).width()) + else: + redraw = true if not feedNext: s = "" else: feedNext = false + let c = getch() s &= c + var action = getLinedAction(s) if escNext: action = NO_ACTION @@ -86,83 +100,140 @@ proc readLine*(prompt: string, current: var string, termwidth: int): bool = if cursor > 0: news = news.substr(0, cursor - 1) & news.substr(cursor) dec cursor + else: + redraw = false of ACTION_LINED_DELETE: - if cursor > 0 and cursor < rl: + if cursor > 0 and cursor < news.len: news = news.substr(0, cursor) & news.substr(cursor + 1) + else: + redraw = false of ACTION_LINED_ESC: escNext = true of ACTION_LINED_CLEAR: news = news.substr(cursor) cursor = 0 of ACTION_LINED_KILL: - news = news.substr(0, cursor) + if cursor > 0: + news = news.substr(0, cursor) + else: + redraw = false of ACTION_LINED_BACK: if cursor > 0: dec cursor + if cursor > shift: + redraw = false + cursorBackward(news[cursor].width()) + else: + redraw = false of ACTION_LINED_FORWARD: - if cursor < rl: + if cursor < news.len: inc cursor + if news.substr(shift, cursor).width() < maxlen: + redraw = false + var n = 1 + if news.len > cursor: + n = news[cursor].width() + cursorForward(n) + else: + redraw = false of ACTION_LINED_PREV_WORD: + let oc = cursor while cursor > 0: dec cursor if news[cursor].breaksWord(): break + if cursor == oc: + redraw = false + elif cursor > shift: + cursorBackward(news.substr(cursor, oc).width()) + redraw = false of ACTION_LINED_NEXT_WORD: - while cursor < rl: + let oc = cursor + while cursor < news.len: inc cursor - if cursor < rl: + if cursor < news.len: if news[cursor].breaksWord(): break + if cursor == oc: + redraw = false + else: + let dw = news.substr(oc, cursor).width() + if oc + dw - shift < maxlen: + cursorForward(dw) + redraw = false of ACTION_LINED_KILL_WORD: var chars = 0 + while cursor > chars: inc chars if news[cursor - chars].breaksWord(): break if chars > 0: + let w = news.substr(cursor - chars, cursor).width() news = news.substr(0, cursor - chars) & news.substr(cursor) cursor -= chars - of ACTION_LINED_COMPOSE_ON: - comp = true - compi = 0 - compa = 0 - comps = "" - of ACTION_LINED_COMPOSE_OFF: - comp = false - compi = 0 - compa = 0 - comps = "" + if cursor > shift: + redraw = false + cursorBackward(w) + print(' '.repeat(w)) + cursorBackward(w) + else: + redraw = false + of ACTION_LINED_BEGIN: + if cursor > 0: + if shift == 0: + redraw = false + cursorBackward(news.substr(0, cursor).width()) + cursor = 0 + else: + redraw = false + of ACTION_LINED_END: + if cursor < news.len: + if news.substr(shift, news.len).width() < maxlen: + redraw = false + cursorForward(news.substr(shift, news.len).width()) + cursor = news.len + else: + redraw = false of ACTION_LINED_COMPOSE_TOGGLE: comp = not comp - compi = 0 + compi = composeRemap compa = 0 comps = "" + redraw = false of ACTION_FEED_NEXT: feedNext = true + redraw = false elif comp: comps &= c - let n = composeRemap[comps, compi] + let n = composeRemap{comps, compi} if n != compi: compi = n compa += comps.len comps = "" - if composeRemap.hasPrefix(comps, compi) and composeRemap.nodes[n].children.len > 0: + if composeRemap.hasPrefix(comps, compi) and n.children.len > 0: feedNext = true else: var cs = "" - if composeRemap.nodes[compi].leaf: - cs = composeRemap.nodes[compi].value + if compi.leaf: + cs = compi.value else: cs = s.substr(0, compa - 1) comps = s.substr(compa) - if not composeRemap.hasPrefix(comps, 0): + if comps.len > 0 and composeRemap.hasPrefix(comps): + compa = comps.len + compi = composeRemap{comps} + s = comps + comps = "" + feedNext = true + else: cs &= comps + compa = 0 + compi = composeRemap comps = "" news = news.substr(0, cursor) & cs.toRunes() & news.substr(cursor) cursor += cs.runeLen() - compi = 0 - compa = 0 elif validateUtf8(s) == -1: var cs = "" for c in s: @@ -173,8 +244,20 @@ proc readLine*(prompt: string, current: var string, termwidth: int): bool = escNext = false escNext = false if cs.len == 0: + redraw = false continue - news = news.substr(0, cursor) & cs.toRunes() & news.substr(cursor) - cursor += cs.runeLen() + + let csr = cs.toRunes() + + if cursor >= news.len and + news.substr(shift, cursor).width() + csr.width() < maxlen - 1: + cursor += csr.len + news &= csr + print(csr) + redraw = false + else: + news = news.substr(0, cursor) & csr & news.substr(cursor) + cursor += csr.len else: feedNext = true + redraw = false diff --git a/src/twtstr.nim b/src/twtstr.nim index 52db36cf..aa2cf2c7 100644 --- a/src/twtstr.nim +++ b/src/twtstr.nim @@ -159,6 +159,11 @@ func decValue*(r: Rune): int = return decValue(char(r)) return -1 +func toAsciiLower*(s: seq[Rune]): string = + for r in s: + if isAscii(r): + result &= lowerChars[int(r)] + func breaksWord*(r: Rune): bool = return r in breakWord @@ -168,12 +173,6 @@ func isAlphaAscii*(r: Rune): bool = func isDigitAscii*(r: Rune): bool = return isAscii(r) and isDigit(char(r)) -func isNameStartCodePoint*(r: Rune): bool = - return not isAscii(r) or r == Rune('_') or isAlphaAscii(r) - -func isNameCodePoint*(r: Rune): bool = - return isNameStartCodePoint(r) or isDigitAscii(r) or r == Rune('-') - func substr*(s: seq[Rune], i: int, j: int): seq[Rune] = if s.len == 0: return @[] @@ -210,33 +209,33 @@ func bisearch(ucs: Rune, table: openarray[(int, int)]): bool = # # - The null character (U+0000) has a column width of 0. # -# - Other C0/C1 control characters and DEL will lead to a return -# value of 2. +# - Other C0/C1 control characters and DEL will lead to a return value of 2 +# (changed from 0 b/c we normally display control chars like ^H - TODO?). # -# - Non-spacing and enclosing combining characters (general -# category code Mn or Me in the Unicode database) have a -# column width of 0. +# - Non-spacing and enclosing combining characters (general category code Mn +# or Me in the Unicode database) have a column width of 0. # # - SOFT HYPHEN (U+00AD) has a column width of 1. # # - Other format characters (general category code Cf in the Unicode # database) and ZERO WIDTH SPACE (U+200B) have a column width of 0. # -# - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) -# have a column width of 0. +# - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) have a +# column width of 0. # -# - Spacing characters in the East Asian Wide (W) or East Asian -# Full-width (F) category as defined in Unicode Technical -# Report #11 have a column width of 2. +# - Spacing characters in the East Asian Wide (W) or East Asian Full-width +# (F) category as defined in Unicode Technical Report #11 have a column +# width of 2. # -# - All remaining characters (including all printable -# ISO 8859-1 and WGL4 characters, Unicode control characters, -# etc.) have a column width of 1. +# - All remaining characters (including all printable ISO 8859-1 and WGL4 +# characters, Unicode control characters, etc.) have a column width of 1. # #This implementation assumes that wchar_t characters are encoded #in ISO 10646. # +# sorted list of non-overlapping intervals of non-spacing characters +# generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" const combining = [ ( 0x0300, 0x036F ), ( 0x0483, 0x0486 ), ( 0x0488, 0x0489 ), ( 0x0591, 0x05BD ), ( 0x05BF, 0x05BF ), ( 0x05C1, 0x05C2 ), @@ -288,21 +287,9 @@ const combining = [ ( 0xE0100, 0xE01EF ) ] -func width*(r: Rune): int = +func is_dwidth(r: Rune): bool = let ucs = int(r) - # sorted list of non-overlapping intervals of non-spacing characters - # generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" - - # binary search in table of non-spacing characters - if bisearch(r, combining): - return 0 - - if r.isControlChar(): - return 2 - - # if we arrive here, ucs is not a combining or C0/C1 control character - - if (ucs >= 0x1100 and + return (ucs >= 0x1100 and (ucs <= 0x115f or # Hangul Jamo init. consonants ucs == 0x2329 or ucs == 0x232a or (ucs >= 0x2e80 and ucs <= 0xa4cf and @@ -314,9 +301,65 @@ func width*(r: Rune): int = (ucs >= 0xff00 and ucs <= 0xff60) or # Fullwidth Forms (ucs >= 0xffe0 and ucs <= 0xffe6) or (ucs >= 0x20000 and ucs <= 0x2fffd) or - (ucs >= 0x30000 and ucs <= 0x3fffd))): - return 2 - return 1 + (ucs >= 0x30000 and ucs <= 0x3fffd))) + +func makewidthtable(): array[0..0x10FFFF, byte] = + for r in low(char)..high(char): + if r.isControlChar(): + result[int(r)] = 2 + else: + result[int(r)] = 1 + + var i = 0 + var next_combining = combining[i] + for ucs in 256..0x10FFFF: + if ucs >= next_combining[0]: + if ucs <= next_combining[1]: + result[ucs] = 0 + continue + elif i + 1 < combining.len: + inc i + next_combining = combining[i] + + if Rune(ucs).is_dwidth(): + result[ucs] = 2 + else: + result[ucs] = 1 + + for range in combining: + for r in range[0]..range[1]: + result[r] = 0 + + +# lowmem: use slow binary search etc method +when defined(lowmem): + func width*(r: Rune): int = + # binary search in table of non-spacing characters + if bisearch(r, combining): + return 0 + + if r.isControlChar(): + return 2 + + # if we arrive here, ucs is not a combining or C0/C1 control character + + if r.is_dwidth(): + return 2 + return 1 + + func width*(r: Rune): int = + return int(width_table[int(r)]) +# small: store lookup table in memory on startup +elif defined(small): + let width_table = makewidthtable() + func width*(r: Rune): int = + {.cast(noSideEffect).}: + return int(width_table[int(r)]) +# release: store lookup table in executable +else: + const width_table = makewidthtable() + func width*(r: Rune): int = + return int(width_table[int(r)]) func width*(s: string): int = for r in s.runes(): @@ -326,15 +369,9 @@ func width*(s: seq[Rune]): int = for r in s: result += width(r) -# -# The following functions are the same as mk_wcwidth() and -# mk_wcswidth(), except that spacing characters in the East Asian -# Ambiguous (A) category as defined in Unicode Technical Report #11 -# have a column width of 2. This variant might be useful for users of -# CJK legacy encodings who want to migrate to UCS without changing -# the traditional terminal character-width behaviour. It is not -# otherwise recommended for general use. -# +# sorted list of non-overlapping intervals of East Asian Ambiguous +# characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" + const ambiguous = [ ( 0x00A1, 0x00A1 ), ( 0x00A4, 0x00A4 ), ( 0x00A7, 0x00A8 ), ( 0x00AA, 0x00AA ), ( 0x00AE, 0x00AE ), ( 0x00B0, 0x00B4 ), @@ -390,15 +427,24 @@ const ambiguous = [ ( 0xFFFD, 0xFFFD ), ( 0xF0000, 0xFFFFD ), ( 0x100000, 0x10FFFD ) ] -func mk_wcwidth_cjk(ucs: Rune): int = - # sorted list of non-overlapping intervals of East Asian Ambiguous - # characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" +# +# The following functions are the same as mk_wcwidth() and +# mk_wcswidth(), except that spacing characters in the East Asian +# Ambiguous (A) category as defined in Unicode Technical Report #11 +# have a column width of 2. This variant might be useful for users of +# CJK legacy encodings who want to migrate to UCS without changing +# the traditional terminal character-width behaviour. It is not +# otherwise recommended for general use. +# +# note: seconded, this should only be used if some option was changed (TODO: +# make such an option available) +func mk_wcwidth_cjk(r: Rune): int = # binary search in table of non-spacing characters - if bisearch(ucs, ambiguous): + if bisearch(r, ambiguous): return 2; - return width(ucs); + return r.width(); func mk_wcswidth_cjk(s: string): int = for r in s.runes: |