diff options
author | bptato <nincsnevem662@gmail.com> | 2024-05-10 14:56:28 +0200 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-05-10 15:07:24 +0200 |
commit | 99c6d7cd15a29ffba54836f26151847176a8569c (patch) | |
tree | b9cc9308ba1fd7d845c186f441b72524c0ae453d /src | |
parent | 2453c63b0b12baa9bd78c0a114b58f1c3833e967 (diff) | |
download | chawan-99c6d7cd15a29ffba54836f26151847176a8569c.tar.gz |
luwrap: use separate context (+ various cleanups)
Use a LUContext to only load required CharRanges once per pager. Also, add kana & hangul vi word break categories for convenience.
Diffstat (limited to 'src')
-rw-r--r-- | src/bindings/libunicode.nim | 2 | ||||
-rw-r--r-- | src/css/values.nim | 90 | ||||
-rw-r--r-- | src/local/container.nim | 98 | ||||
-rw-r--r-- | src/local/lineedit.nim | 11 | ||||
-rw-r--r-- | src/local/pager.nim | 10 | ||||
-rw-r--r-- | src/utils/luwrap.nim | 78 | ||||
-rw-r--r-- | src/utils/strwidth.nim | 45 | ||||
-rw-r--r-- | src/utils/twtstr.nim | 171 | ||||
-rw-r--r-- | src/utils/wordbreak.nim | 44 |
9 files changed, 293 insertions, 256 deletions
diff --git a/src/bindings/libunicode.nim b/src/bindings/libunicode.nim index 13a36da4..18d146fe 100644 --- a/src/bindings/libunicode.nim +++ b/src/bindings/libunicode.nim @@ -25,7 +25,7 @@ proc unicode_normalize*(pdst: ptr ptr uint32; src: ptr uint32; src_len: cint; realloc_func: DynBufReallocFunc): cint proc unicode_script*(cr: ptr CharRange; script_name: cstring; is_ext: cint): - cint {.importc, header: "quickjs/libunicode.h".} + cint proc unicode_prop*(cr: ptr CharRange; prop_name: cstring): cint proc unicode_general_category*(cr: ptr CharRange; gc_name: cstring): cint diff --git a/src/css/values.nim b/src/css/values.nim index b696f0e4..431b0718 100644 --- a/src/css/values.nim +++ b/src/css/values.nim @@ -655,6 +655,96 @@ func numToFixed(n: int; map: openArray[Rune]): string = return $n return $map[n] +func numberAdditive(i: int; range: HSlice[int, int]; + symbols: openArray[(int, string)]): string = + if i notin range: + return $i + var n = i + var at = 0 + while n > 0: + if n >= symbols[at][0]: + n -= symbols[at][0] + result &= symbols[at][1] + continue + inc at + return result + +const romanNumbers = [ + (1000, "M"), (900, "CM"), (500, "D"), (400, "CD"), (100, "C"), (90, "XC"), + (50, "L"), (40, "XL"), (10, "X"), (9, "IX"), (5, "V"), (4, "IV"), (1, "I") +] + +const romanNumbersLower = block: + var res: seq[(int, string)] + for (n, s) in romanNumbers: + res.add((n, s.toLowerAscii())) + res + +func romanNumber(i: int): string = + return numberAdditive(i, 1..3999, romanNumbers) + +func romanNumberLower(i: int): string = + return numberAdditive(i, 1..3999, romanNumbersLower) + +func japaneseNumber(i: int): string = + if i == 0: + return "〇" + var n = i + if i < 0: + result &= "マイナス" + n *= -1 + let o = n + var ss: seq[string] = @[] + var d = 0 + while n > 0: + let m = n mod 10 + if m != 0: + case d + of 1: ss.add("十") + of 2: ss.add("百") + of 3: ss.add("千") + of 4: + ss.add("万") + ss.add("一") + of 5: + ss.add("万") + ss.add("十") + of 6: + ss.add("万") + ss.add("百") + of 7: + ss.add("万") + ss.add("千") + ss.add("一") + of 8: + ss.add("億") + ss.add("一") + of 9: + ss.add("億") + ss.add("十") + else: discard + case m + of 0: + inc d + n = n div 10 + of 1: + if o == n: + ss.add("一") + of 2: ss.add("二") + of 3: ss.add("三") + of 4: ss.add("四") + of 5: ss.add("五") + of 6: ss.add("六") + of 7: ss.add("七") + of 8: ss.add("八") + of 9: ss.add("九") + else: discard + n -= m + n = ss.len - 1 + while n >= 0: + result &= ss[n] + dec n + func listMarker*(t: CSSListStyleType; i: int): string = case t of ListStyleTypeNone: return "" diff --git a/src/local/container.nim b/src/local/container.nim index 985e45ef..502b1de1 100644 --- a/src/local/container.nim +++ b/src/local/container.nim @@ -155,15 +155,16 @@ type mainConfig*: Config flags*: set[ContainerFlag] images*: seq[PosBitmap] + luctx: LUContext jsDestructor(Highlight) jsDestructor(Container) proc newContainer*(config: BufferConfig; loaderConfig: LoaderClientConfig; - url: URL; request: Request; attrs: WindowAttributes; title: string; - redirectDepth: int; flags: set[ContainerFlag]; contentType: Option[string]; - charsetStack: seq[Charset]; cacheId: int; cacheFile: string; - mainConfig: Config): Container = + url: URL; request: Request; luctx: LUContext; attrs: WindowAttributes; + title: string; redirectDepth: int; flags: set[ContainerFlag]; + contentType: Option[string]; charsetStack: seq[Charset]; cacheId: int; + cacheFile: string; mainConfig: Config): Container = return Container( url: url, request: request, @@ -182,7 +183,8 @@ proc newContainer*(config: BufferConfig; loaderConfig: LoaderClientConfig; cacheFile: cacheFile, process: -1, mainConfig: mainConfig, - flags: flags + flags: flags, + luctx: luctx ) func location(container: Container): URL {.jsfget.} = @@ -597,7 +599,7 @@ proc cursorLineTextStart(container: Container) {.jsfunc.} = if container.numLines == 0: return var x = 0 for r in container.currentLine.runes: - if not r.isWhiteSpaceLU(): + if not container.luctx.isWhiteSpaceLU(r): break x += r.twidth(x) if x == 0: @@ -705,7 +707,25 @@ proc cursorLineBegin(container: Container) {.jsfunc.} = proc cursorLineEnd(container: Container) {.jsfunc.} = container.setCursorX(container.currentLineWidth() - 1) -type BreakFunc = proc(r: Rune): BreakCategory {.nimcall.} +type BreakFunc = proc(ctx: LUContext; r: Rune): BreakCategory {.nimcall.} + +proc skipSpace(container: Container; b, x: var int; breakFunc: BreakFunc) = + while b < container.currentLine.len: + var r: Rune + let pb = b + fastRuneAt(container.currentLine, b, r) + if container.luctx.breakFunc(r) != bcSpace: + b = pb + break + x += r.twidth(x) + +proc skipSpaceRev(container: Container; b, x: var int; breakFunc: BreakFunc) = + while b >= 0: + let (r, o) = lastRune(container.currentLine, b) + if container.luctx.breakFunc(r) != bcSpace: + break + b -= o + x -= r.twidth(x) proc cursorNextWord(container: Container; breakFunc: BreakFunc) = if container.numLines == 0: return @@ -714,7 +734,7 @@ proc cursorNextWord(container: Container; breakFunc: BreakFunc) = var x = container.cursorx # meow let currentCat = if b < container.currentLine.len: - container.currentLine.runeAt(b).breakFunc() + container.luctx.breakFunc(container.currentLine.runeAt(b)) else: bcSpace if currentCat != bcSpace: @@ -722,20 +742,11 @@ proc cursorNextWord(container: Container; breakFunc: BreakFunc) = while b < container.currentLine.len: let pb = b fastRuneAt(container.currentLine, b, r) - if r.breakFunc() != currentCat: + if container.luctx.breakFunc(r) != currentCat: b = pb break x += r.twidth(x) - - # skip space - while b < container.currentLine.len: - let pb = b - fastRuneAt(container.currentLine, b, r) - if r.breakFunc() != bcSpace: - b = pb - break - x += r.twidth(x) - + container.skipSpace(b, x, breakFunc) if b < container.currentLine.len: container.setCursorX(x) else: @@ -761,28 +772,20 @@ proc cursorPrevWord(container: Container; breakFunc: BreakFunc) = if container.currentLine.len > 0: b = min(b, container.currentLine.len - 1) let currentCat = if b >= 0: - container.currentLine.runeAt(b).breakFunc() + container.luctx.breakFunc(container.currentLine.runeAt(b)) else: bcSpace if currentCat != bcSpace: # not in space, skip chars that have the same category while b >= 0: let (r, o) = lastRune(container.currentLine, b) - if r.breakFunc() != currentCat: + if container.luctx.breakFunc(r) != currentCat: break b -= o x -= r.twidth(x) - - # skip space - while b >= 0: - let (r, o) = lastRune(container.currentLine, b) - if r.breakFunc() != bcSpace: - break - b -= o - x -= r.twidth(x) + container.skipSpaceRev(b, x, breakFunc) else: b = -1 - if b >= 0: container.setCursorX(x) else: @@ -811,35 +814,25 @@ proc cursorWordEnd(container: Container; breakFunc: BreakFunc) = if b < container.currentLine.len: let pb = b fastRuneAt(container.currentLine, b, r) - if r.breakFunc() == bcSpace: + if container.luctx.breakFunc(r) == bcSpace: b = pb else: px = x x += r.twidth(x) - - # skip space - while b < container.currentLine.len: - let pb = b - fastRuneAt(container.currentLine, b, r) - if r.breakFunc() != bcSpace: - b = pb - break - x += r.twidth(x) - + container.skipSpace(b, x, breakFunc) # move to the last char in the current category let ob = b if b < container.currentLine.len: - let currentCat = container.currentLine.runeAt(b).breakFunc() + let currentCat = container.luctx.breakFunc(container.currentLine.runeAt(b)) while b < container.currentLine.len: let pb = b fastRuneAt(container.currentLine, b, r) - if r.breakFunc() != currentCat: + if container.luctx.breakFunc(r) != currentCat: b = pb break px = x x += r.twidth(x) x = px - if b < container.currentLine.len or ob != b: container.setCursorX(x) else: @@ -869,27 +862,19 @@ proc cursorWordBegin(container: Container; breakFunc: BreakFunc) = if b >= 0: let (r, o) = lastRune(container.currentLine, b) # if not in space, move to the left by one - if r.breakFunc() != bcSpace: + if container.luctx.breakFunc(r) != bcSpace: b -= o px = x x -= r.twidth(x) - - # skip space - while b >= 0: - let (r, o) = lastRune(container.currentLine, b) - if r.breakFunc() != bcSpace: - break - b -= o - x -= r.twidth(x) - + container.skipSpaceRev(b, x, breakFunc) # move to the first char in the current category ob = b if b >= 0: let (r, _) = lastRune(container.currentLine, b) - let currentCat = r.breakFunc() + let currentCat = container.luctx.breakFunc(r) while b >= 0: let (r, o) = lastRune(container.currentLine, b) - if r.breakFunc() != currentCat: + if container.luctx.breakFunc(r) != currentCat: break b -= o px = x @@ -898,7 +883,6 @@ proc cursorWordBegin(container: Container; breakFunc: BreakFunc) = else: b = -1 ob = -1 - if b >= 0 or ob != b: container.setCursorX(x) else: diff --git a/src/local/lineedit.nim b/src/local/lineedit.nim index e2b89f89..04507de7 100644 --- a/src/local/lineedit.nim +++ b/src/local/lineedit.nim @@ -6,6 +6,7 @@ import js/javascript import types/cell import types/opt import types/winattrs +import utils/luwrap import utils/strwidth import utils/twtstr import utils/wordbreak @@ -210,13 +211,14 @@ proc forward(edit: LineEdit) {.jsfunc.} = proc prevWord(edit: LineEdit) {.jsfunc.} = if edit.cursori == 0: return + let ctx = LUContext() let (r, len) = edit.news.lastRune(edit.cursori - 1) - if r.breaksWord(): + if ctx.breaksWord(r): edit.cursori -= len edit.cursorx -= r.width() while edit.cursori > 0: let (r, len) = edit.news.lastRune(edit.cursori - 1) - if r.breaksWord(): + if ctx.breaksWord(r): break edit.cursori -= len edit.cursorx -= r.width() @@ -226,17 +228,18 @@ proc prevWord(edit: LineEdit) {.jsfunc.} = proc nextWord(edit: LineEdit) {.jsfunc.} = if edit.cursori >= edit.news.len: return + let ctx = LUContext() let oc = edit.cursori var r: Rune fastRuneAt(edit.news, edit.cursori, r) - if r.breaksWord(): + if ctx.breaksWord(r): edit.cursorx += r.width() else: edit.cursori = oc while edit.cursori < edit.news.len: let pc = edit.cursori fastRuneAt(edit.news, edit.cursori, r) - if r.breaksWord(): + if ctx.breaksWord(r): edit.cursori = pc break edit.cursorx += r.width() diff --git a/src/local/pager.nim b/src/local/pager.nim index af9855bc..8b25e075 100644 --- a/src/local/pager.nim +++ b/src/local/pager.nim @@ -21,12 +21,12 @@ import io/socketstream import io/stdio import io/tempfile import io/urlfilter -import js/jserror import js/fromjs import js/javascript +import js/jserror +import js/jsregex import js/jstypes import js/jsutils -import js/jsregex import js/tojs import loader/connecterror import loader/headers @@ -44,6 +44,7 @@ import types/cookie import types/opt import types/url import types/winattrs +import utils/luwrap import utils/mimeguess import utils/strwidth import utils/twtstr @@ -134,6 +135,7 @@ type statusgrid*: FixedGrid term*: Terminal unreg*: seq[Container] + luctx: LUContext jsDestructor(Pager) @@ -293,7 +295,8 @@ proc newPager*(config: Config; forkserver: ForkServer; ctx: JSContext; forkserver: forkserver, term: newTerminal(stdout, config), alerts: alerts, - jsctx: ctx + jsctx: ctx, + luctx: LUContext() ) proc genClientKey(pager: Pager): ClientKey = @@ -534,6 +537,7 @@ proc newContainer(pager: Pager; bufferConfig: BufferConfig; loaderConfig, url, request, + pager.luctx, pager.term.attrs, title, redirectDepth, diff --git a/src/utils/luwrap.nim b/src/utils/luwrap.nim index 612982e0..853d3015 100644 --- a/src/utils/luwrap.nim +++ b/src/utils/luwrap.nim @@ -79,22 +79,62 @@ func contains(cr: CharRange; r: Rune): bool = let L = cr.len div 2 - 1 return cps.toOpenArray(0, L).binarySearch(uint32(r), cmpRange) != -1 -proc isGeneralCategoryLU*(r: Rune; s: string): bool = - var cr: CharRange - cr_init(addr cr, nil, passRealloc) - doAssert unicode_general_category(addr cr, s) == 0 - result = r in cr - cr_free(addr cr) - -proc isAlphaLU*(r: Rune): bool = - return r.isGeneralCategoryLU("Letter") - -proc isScriptLU*(r: Rune; s: string): bool = - var cr: CharRange - cr_init(addr cr, nil, passRealloc) - doAssert unicode_script(addr cr, s, 0) == 0 - result = r in cr - cr_free(addr cr) - -proc isWhiteSpaceLU*(r: Rune): bool = - return r.isGeneralCategoryLU("Separator") +type + LURangeType = enum + lurLetter = "Letter" + lurSeparator = "Separator" + lurHan = "Han" + lurHiragana = "Hiragana" + lurKatakana = "Katakana" + lurHangul = "Hangul" + + LUContextObj = object + crs: array[LURangeType, CharRange] + inited: set[LURangeType] + + LUContext* = ref LUContextObj + +{.warning[Deprecated]: off.}: + proc `=destroy`*(ctx: var LUContextObj) = + for lur, cr in ctx.crs.mpairs: + if lur in ctx.inited: + cr_free(addr cr) + ctx.inited = {} + +proc initGeneralCategory(ctx: LUContext; lur: LURangeType) = + if lur notin ctx.inited: + let p = addr ctx.crs[lur] + cr_init(p, nil, passRealloc) + doAssert unicode_general_category(p, cstring($lur)) == 0 + ctx.inited.incl(lur) + +proc initScript(ctx: LUContext; lur: LURangeType) = + if lur notin ctx.inited: + let p = addr ctx.crs[lur] + cr_init(p, nil, passRealloc) + doAssert unicode_script(p, cstring($lur), 0) == 0 + ctx.inited.incl(lur) + +proc isAlphaLU*(ctx: LUContext; r: Rune): bool = + ctx.initGeneralCategory(lurLetter) + return r in ctx.crs[lurLetter] + +proc isWhiteSpaceLU*(ctx: LUContext; r: Rune): bool = + ctx.initGeneralCategory(lurSeparator) + return r in ctx.crs[lurSeparator] + +proc isHan*(ctx: LUContext; r: Rune): bool = + ctx.initScript(lurHan) + return r in ctx.crs[lurHan] + +proc isHiragana*(ctx: LUContext; r: Rune): bool = + ctx.initScript(lurHiragana) + return r in ctx.crs[lurHiragana] + +proc isKatakana*(ctx: LUContext; r: Rune): bool = + ctx.initScript(lurKatakana) + return r in ctx.crs[lurKatakana] + +proc isHangul*(ctx: LUContext; r: Rune): bool = + ctx.initScript(lurHangul) + return r in ctx.crs[lurHangul] diff --git a/src/utils/strwidth.nim b/src/utils/strwidth.nim index fe089328..a3acbef7 100644 --- a/src/utils/strwidth.nim +++ b/src/utils/strwidth.nim @@ -1,4 +1,3 @@ -import std/strutils import std/unicode import utils/proptable @@ -40,40 +39,48 @@ func twidth*(r: Rune; w: int): int = return ((w div 8) + 1) * 8 - w func width*(s: string): int = - for r in s.runes(): + result = 0 + for r in s.runes: result += r.twidth(result) func width*(s: string; start, len: int): int = + result = 0 var i = start var m = len - if m > s.len: m = s.len + if m > s.len: + m = s.len while i < m: var r: Rune fastRuneAt(s, i, r) result += r.twidth(result) func notwidth*(s: string): int = + result = 0 for r in s.runes: result += r.width() func twidth*(s: string; w: int): int = var i = w - for r in s.runes(): + for r in s.runes: i += r.twidth(w) return i - w -func padToWidth*(str: string; size: int; schar = '$'): string = - if str.width() < size: - return str & ' '.repeat(size - str.width()) - else: - let size = size - 1 - result = newStringOfCap(str.len) - var w = 0 - var i = 0 - while i < str.len: - var r: Rune - fastRuneAt(str, i, r) - if w + r.width <= size: - result &= r - w += r.width - result &= schar +func padToWidth*(s: string; size: int; schar = '$'): string = + result = newStringOfCap(s.len) + var w = 0 + var r: Rune + var i = 0 + while i < s.len: + fastRuneAt(s, i, r) + w += r.width() + if w > size - 1: + break + result &= r + if w > size - 1: + if w == size and i == s.len: + result &= r + else: + result &= schar + while w < size: + result &= ' ' + inc w diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim index da234982..c657d15b 100644 --- a/src/utils/twtstr.nim +++ b/src/utils/twtstr.nim @@ -30,16 +30,16 @@ func getControlLetter*(c: char): char = return '?' return char(int(c) or 0x40) -func toHeaderCase*(str: string): string = - result = str +func toHeaderCase*(s: string): string = + result = s var flip = true for c in result.mitems: if flip: c = c.toUpperAscii() flip = c == '-' -func snakeToKebabCase*(str: string): string = - result = str +func snakeToKebabCase*(s: string): string = + result = s for c in result.mitems: if c == '_': c = '-' @@ -61,13 +61,15 @@ func camelToKebabCase*(s: string): string = else: result &= c -func startsWithNoCase*(str, prefix: string): bool = - if str.len < prefix.len: return false +func startsWithNoCase*(s, prefix: string): bool = + if s.len < prefix.len: + return false # prefix.len is always lower var i = 0 while true: if i == prefix.len: return true - if str[i].toLowerAscii() != prefix[i].toLowerAscii(): return false + if s[i].toLowerAscii() != prefix[i].toLowerAscii(): + return false inc i func hexValue*(c: char): int = @@ -126,12 +128,15 @@ func endsWithIgnoreCase*(s1, s2: string): bool = return false return true +func skipBlanks*(buf: string; at: int): int = + result = at + while result < buf.len and buf[result] in AsciiWhitespace: + inc result + func stripAndCollapse*(s: string): string = - var i = 0 - while i < s.len and s[i] in AsciiWhitespace: - inc i var space = false - while i < s.len: + result = "" + for i in s.skipBlanks(0) ..< s.len: if s[i] notin AsciiWhitespace: if space: result &= ' ' @@ -141,19 +146,13 @@ func stripAndCollapse*(s: string): string = space = true else: result &= ' ' - inc i - -func skipBlanks*(buf: string; at: int): int = - result = at - while result < buf.len and buf[result] in AsciiWhitespace: - inc result func until*(s: string; c: set[char]; starti = 0): string = result = "" for i in starti ..< s.len: if s[i] in c: break - result.add(s[i]) + result &= s[i] func untilLower*(s: string; c: set[char]; starti = 0): string = result = "" @@ -163,14 +162,13 @@ func untilLower*(s: string; c: set[char]; starti = 0): string = result.add(s[i].toLowerAscii()) func until*(s: string; c: char; starti = 0): string = - s.until({c}, starti) + return s.until({c}, starti) func after*(s: string; c: set[char]): string = - var i = 0 - while i < s.len: - if s[i] in c: - return s.substr(i + 1) - inc i + let i = s.find(c) + if i != -1: + return s.substr(i + 1) + return "" func after*(s: string; c: char): string = s.after({c}) @@ -215,100 +213,6 @@ func convertSize*(size: int): string = discard c_sprintf(cstring(result), cstring("%.3g%s"), f, SizeUnit[sizepos]) result.setLen(cstring(result).len) -func numberAdditive*(i: int; range: HSlice[int, int]; - symbols: openArray[(int, string)]): string = - if i notin range: - return $i - var n = i - var at = 0 - while n > 0: - if n >= symbols[at][0]: - n -= symbols[at][0] - result &= symbols[at][1] - continue - inc at - return result - -const romanNumbers = [ - (1000, "M"), (900, "CM"), (500, "D"), (400, "CD"), (100, "C"), (90, "XC"), - (50, "L"), (40, "XL"), (10, "X"), (9, "IX"), (5, "V"), (4, "IV"), (1, "I") -] - -const romanNumbersLower = block: - var res: seq[(int, string)] - for (n, s) in romanNumbers: - res.add((n, s.toLowerAscii())) - res - -func romanNumber*(i: int): string = - return numberAdditive(i, 1..3999, romanNumbers) - -func romanNumberLower*(i: int): string = - return numberAdditive(i, 1..3999, romanNumbersLower) - -func japaneseNumber*(i: int): string = - if i == 0: - return "〇" - var n = i - if i < 0: - result &= "マイナス" - n *= -1 - - let o = n - - var ss: seq[string] - var d = 0 - while n > 0: - let m = n mod 10 - - if m != 0: - case d - of 1: ss.add("十") - of 2: ss.add("百") - of 3: ss.add("千") - of 4: - ss.add("万") - ss.add("一") - of 5: - ss.add("万") - ss.add("十") - of 6: - ss.add("万") - ss.add("百") - of 7: - ss.add("万") - ss.add("千") - ss.add("一") - of 8: - ss.add("億") - ss.add("一") - of 9: - ss.add("億") - ss.add("十") - else: discard - case m - of 0: - inc d - n = n div 10 - of 1: - if o == n: - ss.add("一") - of 2: ss.add("二") - of 3: ss.add("三") - of 4: ss.add("四") - of 5: ss.add("五") - of 6: ss.add("六") - of 7: ss.add("七") - of 8: ss.add("八") - of 9: ss.add("九") - else: discard - n -= m - - n = ss.len - 1 - while n >= 0: - result &= ss[n] - dec n - # Implements https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#signed-integers func parseIntImpl[T: SomeSignedInt](s: string; allowed: set[char]; radix: T): Option[T] = @@ -540,28 +444,28 @@ const NameCharRanges = [ # + NameStartCharRanges ] const NameStartCharAscii = {':', '_'} + AsciiAlpha const NameCharAscii = NameStartCharAscii + {'-', '.'} + AsciiDigit -func matchNameProduction*(str: string): bool = - if str.len == 0: +func matchNameProduction*(s: string): bool = + if s.len == 0: return false # NameStartChar var i = 0 var r: Rune - if str[i] in Ascii: - if str[i] notin NameStartCharAscii: + if s[i] in Ascii: + if s[i] notin NameStartCharAscii: return false inc i else: - fastRuneAt(str, i, r) + fastRuneAt(s, i, r) if not isInRange(NameStartCharRanges, int32(r)): return false # NameChar - while i < str.len: - if str[i] in Ascii: - if str[i] notin NameCharAscii: + while i < s.len: + if s[i] in Ascii: + if s[i] notin NameCharAscii: return false inc i else: - fastRuneAt(str, i, r) + fastRuneAt(s, i, r) if not isInRange(NameStartCharRanges, int32(r)) and not isInMap(NameCharRanges, int32(r)): return false @@ -606,21 +510,14 @@ proc expandPath*(path: string): string = return path func deleteChars*(s: string; todel: set[char]): string = - var i = 0 - block earlyret: - for j, c in s: - if c in todel: - i = j - break earlyret + let i = s.find(todel) + if i == -1: return s - var rs = newStringOfCap(s.len - 1) - for j in 0 ..< i: - rs &= s[j] + var rs = s.substr(0, i - 1) for j in i + 1 ..< s.len: if s[j] in todel: continue rs &= s[j] - inc i return rs func replaceControls*(s: string): string = diff --git a/src/utils/wordbreak.nim b/src/utils/wordbreak.nim index 80959be7..c93d63ec 100644 --- a/src/utils/wordbreak.nim +++ b/src/utils/wordbreak.nim @@ -4,30 +4,42 @@ import utils/charcategory import utils/luwrap import utils/strwidth +type BreakCategory* = enum + bcAlpha, bcSpace, bcSymbol, bcHan, bcHiragana, bcKatakana, bcHangul + func isDigitAscii(r: Rune): bool = return uint32(r) < 128 and char(r) in AsciiDigit -type BreakCategory* = enum - bcAlpha, bcSpace, bcSymbol, bcHan +proc breaksWord*(ctx: LUContext; r: Rune): bool = + return not r.isDigitAscii() and r.width() != 0 and not ctx.isAlphaLU(r) -func breaksWord*(r: Rune): bool = - return not r.isDigitAscii() and r.width() != 0 and not r.isAlphaLU() - -func breaksViWordCat*(r: Rune): BreakCategory = - if r.isWhiteSpaceLU(): +proc breaksViWordCat*(ctx: LUContext; r: Rune): BreakCategory = + if int32(r) < 0x80: # ASCII + let c = char(r) + if c in AsciiAlphaNumeric + {'_'}: + return bcAlpha + elif c in AsciiWhitespace: + return bcSpace + elif ctx.isWhiteSpaceLU(r): return bcSpace - elif r.breaksWord() and r != Rune'_': - return bcSymbol - elif r.isScriptLU("Han"): - return bcHan - return bcAlpha + elif ctx.isAlphaLU(r): + if ctx.isHiragana(r): + return bcHiragana + elif ctx.isKatakana(r): + return bcKatakana + elif ctx.isHangul(r): + return bcHangul + elif ctx.isHan(r): + return bcHan + return bcAlpha + return bcSymbol -func breaksWordCat*(r: Rune): BreakCategory = - if not r.breaksWord(): +proc breaksWordCat*(ctx: LUContext; r: Rune): BreakCategory = + if not ctx.breaksWord(r): return bcAlpha return bcSpace -func breaksBigWordCat*(r: Rune): BreakCategory = - if not r.isWhiteSpaceLU(): +proc breaksBigWordCat*(ctx: LUContext; r: Rune): BreakCategory = + if not ctx.isWhiteSpaceLU(r): return bcAlpha return bcSpace |