about summary refs log tree commit diff stats
path: root/src/data
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2023-02-05 16:24:56 +0100
committerbptato <nincsnevem662@gmail.com>2023-02-05 16:24:56 +0100
commit6190ae1071c8d07a9a2d10037801838718363e1f (patch)
tree68581290381ecebb011750908f21b6a0c3d45633 /src/data
parentc5a318c85da3ae3cee30c839a40068e7f5fd514b (diff)
downloadchawan-6190ae1071c8d07a9a2d10037801838718363e1f.tar.gz
Update character width ranges
Use the Unicode 15 EastAsianWidth.txt file instead of an ancient wcwidth
implementation's ranges.
Diffstat (limited to 'src/data')
-rw-r--r--src/data/charwidth.nim42
1 files changed, 42 insertions, 0 deletions
diff --git a/src/data/charwidth.nim b/src/data/charwidth.nim
new file mode 100644
index 00000000..daec31ee
--- /dev/null
+++ b/src/data/charwidth.nim
@@ -0,0 +1,42 @@
+import strutils
+
+const (
+  DoubleWidthRanges*,
+  DoubleWidthAmbiguousRanges*,
+  HalfWidthRanges*
+) = (func(): (
+    seq[(uint32, uint32)],
+    seq[(uint32, uint32)],
+    seq[(uint32, uint32)]
+  ) =
+  template add(firstcol: string, res: var seq[(uint32, uint32)]) =
+    if firstcol.contains(".."):
+      let fcs = firstcol.split("..")
+      let rstart = uint32(parseHexInt(fcs[0]))
+      let rend = uint32(parseHexInt(fcs[1]))
+      res.add((rstart, rend))
+    else:
+      let cp = uint32(parseHexInt(firstcol))
+      res.add((cp, cp))
+  const map = staticRead"res/map/EastAsianWidth.txt"
+  for line in map.split('\n'):
+    if line.len == 0 or line[0] == '#':
+      continue
+    var i = 0
+    var firstcol = ""
+    var status = ""
+    while i < line.len and line[i] notin {'#', ';'}:
+      if line[i] != ' ':
+        firstcol &= line[i]
+      inc i
+    if line[i] != '#':
+      inc i
+    while i < line.len and line[i] notin {'#', ';'}:
+      if line[i] != ' ':
+        status &= line[i]
+      inc i
+    case status
+    of "W": add(firstcol, result[0])
+    of "A": add(firstcol, result[1])
+    of "H": add(firstcol, result[2])
+)()