about summary refs log tree commit diff stats
path: root/src/utils/twtuni.nim
diff options
context:
space:
mode:
Diffstat (limited to 'src/utils/twtuni.nim')
-rw-r--r--src/utils/twtuni.nim95
1 files changed, 95 insertions, 0 deletions
diff --git a/src/utils/twtuni.nim b/src/utils/twtuni.nim
new file mode 100644
index 00000000..c617ac4e
--- /dev/null
+++ b/src/utils/twtuni.nim
@@ -0,0 +1,95 @@
+func nextUTF8*(s: openArray[char]; i: var int): uint32 =
+  let j = i
+  var u = uint32(s[j])
+  if u <= 0x7F:
+    inc i
+  elif u shr 5 == 0b110:
+    let e = j + 2
+    if likely(e <= s.len):
+      u = (u and 0x1F) shl 6 or (uint32(s[j + 1]) and 0x3F)
+    i = e
+  elif u shr 4 == 0b1110:
+    let e = j + 3
+    if likely(e <= s.len):
+      u = (u and 0xF) shl 12 or
+        (uint32(s[j + 1]) and 0x3F) shl 6 or
+        (uint32(s[j + 2]) and 0x3F)
+    i = e
+  elif u shr 3 == 0b11110:
+    let e = j + 4
+    if likely(e <= s.len):
+      u = (u and 7) shl 18 or
+        (uint32(s[j + 1]) and 0x3F) shl 12 or
+        (uint32(s[j + 2]) and 0x3F) shl 6 or
+        (uint32(s[j + 3]) and 0x3F)
+    i = e
+  else:
+    u = 0xFFFD
+    inc i
+  return u
+
+func prevUTF8*(s: openArray[char]; i: var int): uint32 =
+  var j = i - 1
+  while uint32(s[j]) shr 6 == 2:
+    dec j
+  i = j
+  return s.nextUTF8(j)
+
+func pointLenAt*(s: openArray[char]; i: int): int =
+  let u = uint8(s[i])
+  if u <= 0x7F:
+    return 1
+  elif u shr 5 == 0b110:
+    return 2
+  elif u shr 4 == 0b1110:
+    return 3
+  elif u shr 3 == 0b11110:
+    return 4
+  return 1
+
+iterator points*(s: openArray[char]): uint32 {.inline.} =
+  var i = 0
+  while i < s.len:
+    let u = s.nextUTF8(i)
+    yield u
+
+func toPoints*(s: openArray[char]): seq[uint32] =
+  result = @[]
+  for u in s.points:
+    result.add(u)
+
+proc addUTF8*(res: var string; u: uint32) =
+  if u < 0x80:
+    res &= char(u)
+  elif u < 0x800:
+    res &= char(u shr 6 or 0xC0)
+    res &= char(u and 0x3F or 0x80)
+  elif u < 0x10000:
+    res &= char(u shr 12 or 0xE0)
+    res &= char(u shr 6 and 0x3F or 0x80)
+    res &= char(u and 0x3F or 0x80)
+  else:
+    res &= char(u shr 18 or 0xF0)
+    res &= char(u shr 12 and 0x3F or 0x80)
+    res &= char(u shr 6 and 0x3F or 0x80)
+    res &= char(u and 0x3F or 0x80)
+
+func addUTF8*(res: var string; us: openArray[uint32]) =
+  for u in us:
+    res.addUTF8(u)
+
+func toUTF8*(u: uint32): string =
+  var s = ""
+  s.addUTF8(u)
+  return s
+
+func toUTF8*(us: openArray[uint32]): string =
+  var s = newStringOfCap(us.len shr 2)
+  s.addUTF8(us)
+  return s
+
+func pointLen*(s: openArray[char]): int =
+  var n = 0
+  for u in s.points:
+    inc n
+  return n