From 6190ae1071c8d07a9a2d10037801838718363e1f Mon Sep 17 00:00:00 2001 From: bptato Date: Sun, 5 Feb 2023 16:24:56 +0100 Subject: Update character width ranges Use the Unicode 15 EastAsianWidth.txt file instead of an ancient wcwidth implementation's ranges. --- src/data/charwidth.nim | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 src/data/charwidth.nim (limited to 'src/data') diff --git a/src/data/charwidth.nim b/src/data/charwidth.nim new file mode 100644 index 00000000..daec31ee --- /dev/null +++ b/src/data/charwidth.nim @@ -0,0 +1,42 @@ +import strutils + +const ( + DoubleWidthRanges*, + DoubleWidthAmbiguousRanges*, + HalfWidthRanges* +) = (func(): ( + seq[(uint32, uint32)], + seq[(uint32, uint32)], + seq[(uint32, uint32)] + ) = + template add(firstcol: string, res: var seq[(uint32, uint32)]) = + if firstcol.contains(".."): + let fcs = firstcol.split("..") + let rstart = uint32(parseHexInt(fcs[0])) + let rend = uint32(parseHexInt(fcs[1])) + res.add((rstart, rend)) + else: + let cp = uint32(parseHexInt(firstcol)) + res.add((cp, cp)) + const map = staticRead"res/map/EastAsianWidth.txt" + for line in map.split('\n'): + if line.len == 0 or line[0] == '#': + continue + var i = 0 + var firstcol = "" + var status = "" + while i < line.len and line[i] notin {'#', ';'}: + if line[i] != ' ': + firstcol &= line[i] + inc i + if line[i] != '#': + inc i + while i < line.len and line[i] notin {'#', ';'}: + if line[i] != ' ': + status &= line[i] + inc i + case status + of "W": add(firstcol, result[0]) + of "A": add(firstcol, result[1]) + of "H": add(firstcol, result[2]) +)() -- cgit 1.4.1-2-gfad0