luwrap: replace Nim unicode maps with libunicode

Instead of using the built-in (and outdated, and buggy) tables, we now use libunicode from QJS. This shaves some bytes off the executable, though far less than I had imagined it would. Also, a surprising effect of this change: because libunicode's tables aren't glitched out, kanji properly gets classified as alpha. I found this greatly annoying because `w' in Japanese text would now jump through whole sentences. As a band-aid solution I added an extra Han category, but I wish we had a more robust solution that could differentiate between *all* scripts. TODO: I suspect that separately loading the tables for every rune in breaksViWordCat is rather inefficient. Using some context object (at least per operation) would probably be beneficial.
author: bptato <nincsnevem662@gmail.com> 2024-05-09 21:57:00 +0200
committer: bptato <nincsnevem662@gmail.com> 2024-05-09 22:11:30 +0200
commit: 2453c63b0b12baa9bd78c0a114b58f1c3833e967 (patch)
tree: 34b37fa375f8500669877ec726afea0ba2ed2d99 /src/bindings/libunicode.nim
parent: 200a3784de44b90351f7d9a1da47e85e06ff8e15 (diff)
download: chawan-2453c63b0b12baa9bd78c0a114b58f1c3833e967.tar.gz
1 files changed, 19 insertions, 4 deletions
diff --git a/src/bindings/libunicode.nim b/src/bindings/libunicode.nim
index f501ae3f..13a36da4 100644
--- a/src/bindings/libunicode.nim
+++ b/src/bindings/libunicode.nim
@@ -16,14 +16,29 @@ type
 {.push header: "quickjs/libunicode.h", importc.}
 
 proc cr_init*(cr: ptr CharRange; mem_opaque: pointer;
-  realloc_func: DynBufReallocFunc) {.importc.}
+  realloc_func: DynBufReallocFunc)
 
-proc cr_free*(cr: ptr CharRange) {.importc.}
+proc cr_free*(cr: ptr CharRange)
 
 proc unicode_normalize*(pdst: ptr ptr uint32; src: ptr uint32; src_len: cint;
   n_type: UnicodeNormalizationEnum; opaque: pointer;
-  realloc_func: DynBufReallocFunc): cint {.importc.}
+  realloc_func: DynBufReallocFunc): cint
 
+proc unicode_script*(cr: ptr CharRange; script_name: cstring; is_ext: cint):
+  cint {.importc, header: "quickjs/libunicode.h".}
+proc unicode_prop*(cr: ptr CharRange; prop_name: cstring): cint
 proc unicode_general_category*(cr: ptr CharRange; gc_name: cstring): cint
-  {.importc.}
+
+const LRE_CC_RES_LEN_MAX* = 3
+
+# conv_type:
+# 0 = to upper
+# 1 = to lower
+# 2 = case folding
+# res must be an array of LRE_CC_RES_LEN_MAX
+proc lre_case_conv*(res: ptr UncheckedArray[uint32]; c: uint32;
+  conv_type: cint): cint
+
+proc lre_is_space*(c: uint32): cint
+
 {.pop.}
author	bptato <nincsnevem662@gmail.com>	2024-05-09 21:57:00 +0200
committer	bptato <nincsnevem662@gmail.com>	2024-05-09 22:11:30 +0200
commit	2453c63b0b12baa9bd78c0a114b58f1c3833e967 (patch)
tree	34b37fa375f8500669877ec726afea0ba2ed2d99 /src/bindings/libunicode.nim
parent	200a3784de44b90351f7d9a1da47e85e06ff8e15 (diff)
download	chawan-2453c63b0b12baa9bd78c0a114b58f1c3833e967.tar.gz