about summary refs log tree commit diff stats
path: root/src/utils/luwrap.nim
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2024-05-10 14:56:28 +0200
committerbptato <nincsnevem662@gmail.com>2024-05-10 15:07:24 +0200
commit99c6d7cd15a29ffba54836f26151847176a8569c (patch)
treeb9cc9308ba1fd7d845c186f441b72524c0ae453d /src/utils/luwrap.nim
parent2453c63b0b12baa9bd78c0a114b58f1c3833e967 (diff)
downloadchawan-99c6d7cd15a29ffba54836f26151847176a8569c.tar.gz
luwrap: use separate context (+ various cleanups)
Use a LUContext to only load required CharRanges once per pager.

Also, add kana & hangul vi word break categories for convenience.
Diffstat (limited to 'src/utils/luwrap.nim')
-rw-r--r--src/utils/luwrap.nim78
1 files changed, 59 insertions, 19 deletions
diff --git a/src/utils/luwrap.nim b/src/utils/luwrap.nim
index 612982e0..853d3015 100644
--- a/src/utils/luwrap.nim
+++ b/src/utils/luwrap.nim
@@ -79,22 +79,62 @@ func contains(cr: CharRange; r: Rune): bool =
   let L = cr.len div 2 - 1
   return cps.toOpenArray(0, L).binarySearch(uint32(r), cmpRange) != -1
 
-proc isGeneralCategoryLU*(r: Rune; s: string): bool =
-  var cr: CharRange
-  cr_init(addr cr, nil, passRealloc)
-  doAssert unicode_general_category(addr cr, s) == 0
-  result = r in cr
-  cr_free(addr cr)
-
-proc isAlphaLU*(r: Rune): bool =
-  return r.isGeneralCategoryLU("Letter")
-
-proc isScriptLU*(r: Rune; s: string): bool =
-  var cr: CharRange
-  cr_init(addr cr, nil, passRealloc)
-  doAssert unicode_script(addr cr, s, 0) == 0
-  result = r in cr
-  cr_free(addr cr)
-
-proc isWhiteSpaceLU*(r: Rune): bool =
-  return r.isGeneralCategoryLU("Separator")
+type
+  LURangeType = enum
+    lurLetter = "Letter"
+    lurSeparator = "Separator"
+    lurHan = "Han"
+    lurHiragana = "Hiragana"
+    lurKatakana = "Katakana"
+    lurHangul = "Hangul"
+
+  LUContextObj = object
+    crs: array[LURangeType, CharRange]
+    inited: set[LURangeType]
+
+  LUContext* = ref LUContextObj
+
+{.warning[Deprecated]: off.}:
+  proc `=destroy`*(ctx: var LUContextObj) =
+    for lur, cr in ctx.crs.mpairs:
+      if lur in ctx.inited:
+        cr_free(addr cr)
+    ctx.inited = {}
+
+proc initGeneralCategory(ctx: LUContext; lur: LURangeType) =
+  if lur notin ctx.inited:
+    let p = addr ctx.crs[lur]
+    cr_init(p, nil, passRealloc)
+    doAssert unicode_general_category(p, cstring($lur)) == 0
+    ctx.inited.incl(lur)
+
+proc initScript(ctx: LUContext; lur: LURangeType) =
+  if lur notin ctx.inited:
+    let p = addr ctx.crs[lur]
+    cr_init(p, nil, passRealloc)
+    doAssert unicode_script(p, cstring($lur), 0) == 0
+    ctx.inited.incl(lur)
+
+proc isAlphaLU*(ctx: LUContext; r: Rune): bool =
+  ctx.initGeneralCategory(lurLetter)
+  return r in ctx.crs[lurLetter]
+
+proc isWhiteSpaceLU*(ctx: LUContext; r: Rune): bool =
+  ctx.initGeneralCategory(lurSeparator)
+  return r in ctx.crs[lurSeparator]
+
+proc isHan*(ctx: LUContext; r: Rune): bool =
+  ctx.initScript(lurHan)
+  return r in ctx.crs[lurHan]
+
+proc isHiragana*(ctx: LUContext; r: Rune): bool =
+  ctx.initScript(lurHiragana)
+  return r in ctx.crs[lurHiragana]
+
+proc isKatakana*(ctx: LUContext; r: Rune): bool =
+  ctx.initScript(lurKatakana)
+  return r in ctx.crs[lurKatakana]
+
+proc isHangul*(ctx: LUContext; r: Rune): bool =
+  ctx.initScript(lurHangul)
+  return r in ctx.crs[lurHangul]