diff options
author | bptato <nincsnevem662@gmail.com> | 2022-12-10 19:05:38 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2022-12-10 19:05:38 +0100 |
commit | 1e858c874804444bc4b95b6e89eb96a0deb8473c (patch) | |
tree | 3151b498e19c6d6eed3d90827483eb270314f3da /src/js | |
parent | d963385cd9fd77f0a950c5b92be7774bbf76d661 (diff) | |
download | chawan-1e858c874804444bc4b95b6e89eb96a0deb8473c.tar.gz |
Add support for the encoding standard, fix parseLegacyColor
Also, fix a bug in the
Diffstat (limited to 'src/js')
-rw-r--r-- | src/js/regex.nim | 69 |
1 files changed, 68 insertions, 1 deletions
diff --git a/src/js/regex.nim b/src/js/regex.nim index 492ae031..e4b31c23 100644 --- a/src/js/regex.nim +++ b/src/js/regex.nim @@ -6,7 +6,6 @@ import unicode import bindings/libregexp import bindings/quickjs import js/javascript -import strings/charset import utils/twtstr export @@ -33,6 +32,74 @@ type rule: string global: bool +type string16 = distinct string + +# Convert a UTF-8 string to UTF-16. +# Note: this doesn't check for (invalid) UTF-8 containing surrogates. +proc toUTF16*(s: string): string16 = + var res = "" + var i = 0 + template put16(c: uint16) = + res.setLen(res.len + 2) + res[i] = cast[char](c) + inc i + res[i] = cast[char](c shr 8) + inc i + for r in s.runes: + var c = uint32(r) + if c < 0x10000: # ucs-2 + put16 uint16(c) + elif c <= 0x10FFFF: # surrogate + c -= 0x10000 + put16 uint16((c shr 10) + 0xD800) + put16 uint16((c and 0x3FF) + 0xDC00) + else: # invalid + put16 uint16(0xFFFD) + result = string16(res) + +func len(s: string16): int {.borrow.} +func `[]`(s: string16, i: int): char = string(s)[i] +func `[]`(s: string16, i: BackwardsIndex): char = string(s)[i] + +template fastRuneAt(s: string16, i: int, r: untyped, doInc = true, be = false) = + if i + 1 == s.len: # unmatched byte + when doInc: inc i + r = Rune(0xFFFD) + else: + when be: + var c1: uint32 = (uint32(s[i]) shl 8) + uint32(s[i + 1]) + else: + var c1: uint32 = uint32(s[i]) + (uint32(s[i + 1]) shl 8) + if c1 >= 0xD800 or c1 < 0xDC00: + if i + 2 == s.len or i + 3 == s.len: + when doInc: i += 2 + r = Rune(c1) # unmatched surrogate + else: + when be: + var c2: uint32 = (uint32(s[i + 2]) shl 8) + uint32(s[i + 3]) + else: + var c2: uint32 = uint32(s[i + 2]) + (uint32(s[i + 3]) shl 8) + if c2 >= 0xDC00 and c2 < 0xE000: + r = Rune((((c1 and 0x3FF) shl 10) or (c2 and 0x3FF)) + 0x10000) + when doInc: i += 4 + else: + r = Rune(c1) # unmatched surrogate + when doInc: i += 2 + else: + r = Rune(c1) # ucs-2 + when doInc: i += 2 + +iterator runes(s: string16): Rune = + var i = 0 + var r: Rune + while i < s.len: + fastRuneAt(s, i, r) + yield r + +proc fromUTF16(s: string16): string = + for r in s.runes: + result &= r + var dummyRuntime = newJSRuntime() var dummyContext = dummyRuntime.newJSContextRaw() |