From 2453c63b0b12baa9bd78c0a114b58f1c3833e967 Mon Sep 17 00:00:00 2001 From: bptato Date: Thu, 9 May 2024 21:57:00 +0200 Subject: luwrap: replace Nim unicode maps with libunicode Instead of using the built-in (and outdated, and buggy) tables, we now use libunicode from QJS. This shaves some bytes off the executable, though far less than I had imagined it would. Also, a surprising effect of this change: because libunicode's tables aren't glitched out, kanji properly gets classified as alpha. I found this greatly annoying because `w' in Japanese text would now jump through whole sentences. As a band-aid solution I added an extra Han category, but I wish we had a more robust solution that could differentiate between *all* scripts. TODO: I suspect that separately loading the tables for every rune in breaksViWordCat is rather inefficient. Using some context object (at least per operation) would probably be beneficial. --- src/bindings/libunicode.nim | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'src/bindings/libunicode.nim') diff --git a/src/bindings/libunicode.nim b/src/bindings/libunicode.nim index f501ae3f..13a36da4 100644 --- a/src/bindings/libunicode.nim +++ b/src/bindings/libunicode.nim @@ -16,14 +16,29 @@ type {.push header: "quickjs/libunicode.h", importc.} proc cr_init*(cr: ptr CharRange; mem_opaque: pointer; - realloc_func: DynBufReallocFunc) {.importc.} + realloc_func: DynBufReallocFunc) -proc cr_free*(cr: ptr CharRange) {.importc.} +proc cr_free*(cr: ptr CharRange) proc unicode_normalize*(pdst: ptr ptr uint32; src: ptr uint32; src_len: cint; n_type: UnicodeNormalizationEnum; opaque: pointer; - realloc_func: DynBufReallocFunc): cint {.importc.} + realloc_func: DynBufReallocFunc): cint +proc unicode_script*(cr: ptr CharRange; script_name: cstring; is_ext: cint): + cint {.importc, header: "quickjs/libunicode.h".} +proc unicode_prop*(cr: ptr CharRange; prop_name: cstring): cint proc unicode_general_category*(cr: ptr CharRange; gc_name: cstring): cint - {.importc.} + +const LRE_CC_RES_LEN_MAX* = 3 + +# conv_type: +# 0 = to upper +# 1 = to lower +# 2 = case folding +# res must be an array of LRE_CC_RES_LEN_MAX +proc lre_case_conv*(res: ptr UncheckedArray[uint32]; c: uint32; + conv_type: cint): cint + +proc lre_is_space*(c: uint32): cint + {.pop.} -- cgit 1.4.1-2-gfad0