utils: add twtuni

std/unicode has the following issues: * Rune is an int32, which implies overflow checking. Also, it is distinct, so you have to convert it manually to do arithmetic. * QJS libunicode and Chagashi work with uint32, interfacing with these required pointless type conversions. * fastRuneAt is a template, meaning it's pasted into every call site. Also, it decodes to UCS-4, so it generates two branches that aren't even used. Overall this lead to quite some code bloat. * fastRuneAt and lastRune have frustratingly different interfaces. Writing code to handle both cases is error prone. * On older Nim versions which we still support, std/unicode takes strings, not openArray[char]'s. Replace it with "twtuni", which includes some improved versions of the few procedures from std/unicode that we actually use.
author: bptato <nincsnevem662@gmail.com> 2024-09-08 15:18:45 +0200
committer: bptato <nincsnevem662@gmail.com> 2024-09-08 16:06:02 +0200
commit: 4124c041ed2e3b497ede72fdae229aa2c6aca249 (patch)
tree: e8488449de6f0be54b9c79547352829b998833d3 /src/config
parent: 5a64e3193924c7e503dddb10a99989148b26e922 (diff)
download: chawan-4124c041ed2e3b497ede72fdae229aa2c6aca249.tar.gz
1 files changed, 4 insertions, 4 deletions
diff --git a/src/config/toml.nim b/src/config/toml.nim
index 992a0cbc..2978585c 100644
--- a/src/config/toml.nim
+++ b/src/config/toml.nim
@@ -2,10 +2,10 @@ import std/options
 import std/streams
 import std/tables
 import std/times
-import std/unicode
 
 import types/opt
 import utils/twtstr
+import utils/twtuni
 
 type
   TomlValueType* = enum
@@ -144,7 +144,7 @@ proc reconsume(state: var TomlParser) =
 proc has(state: var TomlParser; i: int = 0): bool =
   return state.at + i < state.buf.len
 
-proc consumeEscape(state: var TomlParser; c: char): Result[Rune, TomlError] =
+proc consumeEscape(state: var TomlParser; c: char): Result[uint32, TomlError] =
   var len = 4
   if c == 'U':
     len = 8
@@ -166,7 +166,7 @@ proc consumeEscape(state: var TomlParser; c: char): Result[Rune, TomlError] =
     if num > 0x10FFFF or num in 0xD800..0xDFFF:
       return state.err("invalid escaped codepoint: " & $num)
     else:
-      return ok(Rune(num))
+      return ok(uint32(num))
   else:
     return state.err("invalid escaped codepoint: " & $c)
 
@@ -213,7 +213,7 @@ proc consumeString(state: var TomlParser; first: char): Result[string, string] =
       of 'r': res &= '\r'
       of '"': res &= '"'
       of '\\': res &= '\\'
-      of 'u', 'U': res &= ?state.consumeEscape(c)
+      of 'u', 'U': res.addUTF8(?state.consumeEscape(c))
       of '\n': ml_trim = true
       of '$': res &= "\\$" # special case for substitution in paths
       else: return state.err("invalid escape sequence \\" & c)
author	bptato <nincsnevem662@gmail.com>	2024-09-08 15:18:45 +0200
committer	bptato <nincsnevem662@gmail.com>	2024-09-08 16:06:02 +0200
commit	4124c041ed2e3b497ede72fdae229aa2c6aca249 (patch)
tree	e8488449de6f0be54b9c79547352829b998833d3 /src/config
parent	5a64e3193924c7e503dddb10a99989148b26e922 (diff)
download	chawan-4124c041ed2e3b497ede72fdae229aa2c6aca249.tar.gz