diff options
author | bptato <nincsnevem662@gmail.com> | 2023-12-13 15:00:34 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2023-12-13 15:00:34 +0100 |
commit | 53bc47531543fe55997f4c6875fa03745a17e754 (patch) | |
tree | a7d3b3de2f68580a79415818bad84bf607e58c14 /src/utils | |
parent | 4818cb28debf4601213707f6c1b9b22348b51fbc (diff) | |
download | chawan-53bc47531543fe55997f4c6875fa03745a17e754.tar.gz |
Various fixes
* Makefile: fix parallel build, add new binaries to install target * twtstr: split out libunicode-related stuff to luwrap * config: quote default gopher2html URL env var for unquote * adapter/: get rid of types/url dependency, use CURL url in all cases
Diffstat (limited to 'src/utils')
-rw-r--r-- | src/utils/luwrap.nim | 46 | ||||
-rw-r--r-- | src/utils/twtstr.nim | 37 |
2 files changed, 46 insertions, 37 deletions
diff --git a/src/utils/luwrap.nim b/src/utils/luwrap.nim new file mode 100644 index 00000000..41a58701 --- /dev/null +++ b/src/utils/luwrap.nim @@ -0,0 +1,46 @@ +import std/strutils +import std/unicode + +import bindings/libunicode +import utils/charcategory + +proc passRealloc(opaque: pointer, p: pointer, size: csize_t): pointer + {.cdecl.} = + return realloc(p, size) + +proc mnormalize*(rs: var seq[Rune], form = UNICODE_NFC) = {.cast(noSideEffect).}: + if rs.len == 0: return + var outbuf: ptr uint32 + let p = cast[ptr uint32](unsafeAddr rs[0]) + let out_len = unicode_normalize(addr outbuf, p, cint(rs.len), form, nil, + passRealloc) + if out_len < 0: + raise newException(Defect, "Unicode normalization failed") + if out_len == 0: + return + rs = cast[seq[Rune]](newSeqUninitialized[uint32](out_len)) + copyMem(addr rs[0], outbuf, out_len * sizeof(uint32)) + dealloc(outbuf) + +#TODO maybe a utf8 normalization procedure? +proc mnormalize*(s: var string) = + if NonAscii notin s: + return # no need to normalize ascii + var rs = s.toRunes() + rs.mnormalize() + s = $rs + +func normalize*(rs: seq[Rune], form = UNICODE_NFC): seq[Rune] = + {.cast(noSideEffect).}: + if rs.len == 0: return + var outbuf: ptr uint32 + let p = cast[ptr uint32](unsafeAddr rs[0]) + let out_len = unicode_normalize(addr outbuf, p, cint(rs.len), form, + nil, passRealloc) + if out_len < 0: + raise newException(Defect, "Unicode normalization failed") + if out_len == 0: + return + result = cast[seq[Rune]](newSeqUninitialized[uint32](out_len)) + copyMem(addr result[0], outbuf, out_len * sizeof(uint32)) + dealloc(outbuf) diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim index b235651f..da9af19a 100644 --- a/src/utils/twtstr.nim +++ b/src/utils/twtstr.nim @@ -7,7 +7,6 @@ import strutils import tables import unicode -import bindings/libunicode import types/opt import utils/charcategory import utils/map @@ -576,42 +575,6 @@ proc passRealloc*(opaque: pointer, p: pointer, size: csize_t): pointer {.cdecl.} = return realloc(p, size) -proc mnormalize*(rs: var seq[Rune], form = UNICODE_NFC) = {.cast(noSideEffect).}: - if rs.len == 0: return - var outbuf: ptr uint32 - let p = cast[ptr uint32](unsafeAddr rs[0]) - let out_len = unicode_normalize(addr outbuf, p, cint(rs.len), form, nil, - passRealloc) - if out_len < 0: - raise newException(Defect, "Unicode normalization failed") - if out_len == 0: - return - rs = cast[seq[Rune]](newSeqUninitialized[uint32](out_len)) - copyMem(addr rs[0], outbuf, out_len * sizeof(uint32)) - dealloc(outbuf) - -#TODO maybe a utf8 normalization procedure? -proc mnormalize*(s: var string) = - if s.isAscii(): - return # no need to normalize ascii - var rs = s.toRunes() - rs.mnormalize() - s = $rs - -func normalize*(rs: seq[Rune], form = UNICODE_NFC): seq[Rune] = {.cast(noSideEffect).}: - if rs.len == 0: return - var outbuf: ptr uint32 - let out_len = unicode_normalize(addr outbuf, - cast[ptr uint32](unsafeAddr rs[0]), - cint(rs.len), form, nil, passRealloc) - if out_len < 0: - raise newException(Defect, "Unicode normalization failed") - if out_len == 0: - return - result = cast[seq[Rune]](newSeqUninitialized[uint32](out_len)) - copyMem(addr result[0], outbuf, out_len * sizeof(uint32)) - dealloc(outbuf) - # https://www.w3.org/TR/xml/#NT-Name const NameStartCharRanges = [ (0xC0, 0xD6), |