about summary refs log tree commit diff stats
path: root/src/js
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2022-12-10 19:05:38 +0100
committerbptato <nincsnevem662@gmail.com>2022-12-10 19:05:38 +0100
commit1e858c874804444bc4b95b6e89eb96a0deb8473c (patch)
tree3151b498e19c6d6eed3d90827483eb270314f3da /src/js
parentd963385cd9fd77f0a950c5b92be7774bbf76d661 (diff)
downloadchawan-1e858c874804444bc4b95b6e89eb96a0deb8473c.tar.gz
Add support for the encoding standard, fix parseLegacyColor
Also, fix a bug in the
Diffstat (limited to 'src/js')
-rw-r--r--src/js/regex.nim69
1 files changed, 68 insertions, 1 deletions
diff --git a/src/js/regex.nim b/src/js/regex.nim
index 492ae031..e4b31c23 100644
--- a/src/js/regex.nim
+++ b/src/js/regex.nim
@@ -6,7 +6,6 @@ import unicode
 import bindings/libregexp
 import bindings/quickjs
 import js/javascript
-import strings/charset
 import utils/twtstr
 
 export
@@ -33,6 +32,74 @@ type
     rule: string
     global: bool
 
+type string16 = distinct string
+
+# Convert a UTF-8 string to UTF-16.
+# Note: this doesn't check for (invalid) UTF-8 containing surrogates.
+proc toUTF16*(s: string): string16 =
+  var res = ""
+  var i = 0
+  template put16(c: uint16) =
+    res.setLen(res.len + 2)
+    res[i] = cast[char](c)
+    inc i
+    res[i] = cast[char](c shr 8)
+    inc i
+  for r in s.runes:
+    var c = uint32(r)
+    if c < 0x10000: # ucs-2
+      put16 uint16(c)
+    elif c <= 0x10FFFF: # surrogate
+      c -= 0x10000
+      put16 uint16((c shr 10) + 0xD800)
+      put16 uint16((c and 0x3FF) + 0xDC00)
+    else: # invalid
+      put16 uint16(0xFFFD)
+  result = string16(res)
+
+func len(s: string16): int {.borrow.}
+func `[]`(s: string16, i: int): char = string(s)[i]
+func `[]`(s: string16, i: BackwardsIndex): char = string(s)[i]
+
+template fastRuneAt(s: string16, i: int, r: untyped, doInc = true, be = false) =
+  if i + 1 == s.len: # unmatched byte
+    when doInc: inc i
+    r = Rune(0xFFFD)
+  else:
+    when be:
+      var c1: uint32 = (uint32(s[i]) shl 8) + uint32(s[i + 1])
+    else:
+      var c1: uint32 = uint32(s[i]) + (uint32(s[i + 1]) shl 8)
+    if c1 >= 0xD800 or c1 < 0xDC00:
+      if i + 2 == s.len or i + 3 == s.len:
+        when doInc: i += 2
+        r = Rune(c1) # unmatched surrogate
+      else:
+        when be:
+          var c2: uint32 = (uint32(s[i + 2]) shl 8) + uint32(s[i + 3])
+        else:
+          var c2: uint32 = uint32(s[i + 2]) + (uint32(s[i + 3]) shl 8)
+        if c2 >= 0xDC00 and c2 < 0xE000:
+          r = Rune((((c1 and 0x3FF) shl 10) or (c2 and 0x3FF)) + 0x10000)
+          when doInc: i += 4
+        else:
+          r = Rune(c1) # unmatched surrogate
+          when doInc: i += 2
+    else:
+      r = Rune(c1) # ucs-2
+      when doInc: i += 2
+
+iterator runes(s: string16): Rune =
+  var i = 0
+  var r: Rune
+  while i < s.len:
+    fastRuneAt(s, i, r)
+    yield r
+
+proc fromUTF16(s: string16): string =
+  for r in s.runes:
+    result &= r
+
 var dummyRuntime = newJSRuntime()
 var dummyContext = dummyRuntime.newJSContextRaw()