diff options
author | bptato <nincsnevem662@gmail.com> | 2024-05-10 14:56:28 +0200 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-05-10 15:07:24 +0200 |
commit | 99c6d7cd15a29ffba54836f26151847176a8569c (patch) | |
tree | b9cc9308ba1fd7d845c186f441b72524c0ae453d /src/utils/luwrap.nim | |
parent | 2453c63b0b12baa9bd78c0a114b58f1c3833e967 (diff) | |
download | chawan-99c6d7cd15a29ffba54836f26151847176a8569c.tar.gz |
luwrap: use separate context (+ various cleanups)
Use a LUContext to only load required CharRanges once per pager. Also, add kana & hangul vi word break categories for convenience.
Diffstat (limited to 'src/utils/luwrap.nim')
-rw-r--r-- | src/utils/luwrap.nim | 78 |
1 files changed, 59 insertions, 19 deletions
diff --git a/src/utils/luwrap.nim b/src/utils/luwrap.nim index 612982e0..853d3015 100644 --- a/src/utils/luwrap.nim +++ b/src/utils/luwrap.nim @@ -79,22 +79,62 @@ func contains(cr: CharRange; r: Rune): bool = let L = cr.len div 2 - 1 return cps.toOpenArray(0, L).binarySearch(uint32(r), cmpRange) != -1 -proc isGeneralCategoryLU*(r: Rune; s: string): bool = - var cr: CharRange - cr_init(addr cr, nil, passRealloc) - doAssert unicode_general_category(addr cr, s) == 0 - result = r in cr - cr_free(addr cr) - -proc isAlphaLU*(r: Rune): bool = - return r.isGeneralCategoryLU("Letter") - -proc isScriptLU*(r: Rune; s: string): bool = - var cr: CharRange - cr_init(addr cr, nil, passRealloc) - doAssert unicode_script(addr cr, s, 0) == 0 - result = r in cr - cr_free(addr cr) - -proc isWhiteSpaceLU*(r: Rune): bool = - return r.isGeneralCategoryLU("Separator") +type + LURangeType = enum + lurLetter = "Letter" + lurSeparator = "Separator" + lurHan = "Han" + lurHiragana = "Hiragana" + lurKatakana = "Katakana" + lurHangul = "Hangul" + + LUContextObj = object + crs: array[LURangeType, CharRange] + inited: set[LURangeType] + + LUContext* = ref LUContextObj + +{.warning[Deprecated]: off.}: + proc `=destroy`*(ctx: var LUContextObj) = + for lur, cr in ctx.crs.mpairs: + if lur in ctx.inited: + cr_free(addr cr) + ctx.inited = {} + +proc initGeneralCategory(ctx: LUContext; lur: LURangeType) = + if lur notin ctx.inited: + let p = addr ctx.crs[lur] + cr_init(p, nil, passRealloc) + doAssert unicode_general_category(p, cstring($lur)) == 0 + ctx.inited.incl(lur) + +proc initScript(ctx: LUContext; lur: LURangeType) = + if lur notin ctx.inited: + let p = addr ctx.crs[lur] + cr_init(p, nil, passRealloc) + doAssert unicode_script(p, cstring($lur), 0) == 0 + ctx.inited.incl(lur) + +proc isAlphaLU*(ctx: LUContext; r: Rune): bool = + ctx.initGeneralCategory(lurLetter) + return r in ctx.crs[lurLetter] + +proc isWhiteSpaceLU*(ctx: LUContext; r: Rune): bool = + ctx.initGeneralCategory(lurSeparator) + return r in ctx.crs[lurSeparator] + +proc isHan*(ctx: LUContext; r: Rune): bool = + ctx.initScript(lurHan) + return r in ctx.crs[lurHan] + +proc isHiragana*(ctx: LUContext; r: Rune): bool = + ctx.initScript(lurHiragana) + return r in ctx.crs[lurHiragana] + +proc isKatakana*(ctx: LUContext; r: Rune): bool = + ctx.initScript(lurKatakana) + return r in ctx.crs[lurKatakana] + +proc isHangul*(ctx: LUContext; r: Rune): bool = + ctx.initScript(lurHangul) + return r in ctx.crs[lurHangul] |