about summary refs log tree commit diff stats
path: root/src/utils
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2025-01-02 18:00:36 +0100
committerbptato <nincsnevem662@gmail.com>2025-01-02 18:00:36 +0100
commitfa8db221388eaca1ffac1d47e61722563999e278 (patch)
treee3aa1d67871730f94e1b06aae80ee86831926fdd /src/utils
parent6ac50899d942db740797c9d3277d04b7a535159e (diff)
downloadchawan-fa8db221388eaca1ffac1d47e61722563999e278.tar.gz
layout: ignore general category Me, Mn
I'm not sure if this is the best way.  On one hand, it probably breaks
text in some languages that cannot be represented in normal form.
On the other hand, some terminals may crash on such strange sequences
(or worse).

I'll just skip them for now, because it's marginally faster than
returning 0 from width.  We'll see if somebody complains.
Diffstat (limited to 'src/utils')
-rw-r--r--src/utils/luwrap.nim10
1 files changed, 10 insertions, 0 deletions
diff --git a/src/utils/luwrap.nim b/src/utils/luwrap.nim
index 06057d8d..e2748958 100644
--- a/src/utils/luwrap.nim
+++ b/src/utils/luwrap.nim
@@ -85,6 +85,8 @@ type
     lurHiragana = "Hiragana"
     lurKatakana = "Katakana"
     lurHangul = "Hangul"
+    lurEnclosingMark = "Me"
+    lurNonspacingMark = "Mn"
 
   LUContextObj = object
     crs: array[LURangeType, CharRange]
@@ -139,3 +141,11 @@ proc isKatakana*(ctx: LUContext; u: uint32): bool =
 proc isHangul*(ctx: LUContext; u: uint32): bool =
   ctx.initScript(lurHangul)
   return u in ctx.crs[lurHangul]
+
+proc isEnclosingMark*(ctx: LUContext; u: uint32): bool =
+  ctx.initGeneralCategory(lurEnclosingMark)
+  return u in ctx.crs[lurEnclosingMark]
+
+proc isNonspacingMark*(ctx: LUContext; u: uint32): bool =
+  ctx.initGeneralCategory(lurNonspacingMark)
+  return u in ctx.crs[lurNonspacingMark]