about summary refs log tree commit diff stats
path: root/src/utils/wordbreak.nim
diff options
context:
space:
mode:
Diffstat (limited to 'src/utils/wordbreak.nim')
-rw-r--r--src/utils/wordbreak.nim33
1 files changed, 33 insertions, 0 deletions
diff --git a/src/utils/wordbreak.nim b/src/utils/wordbreak.nim
new file mode 100644
index 00000000..80959be7
--- /dev/null
+++ b/src/utils/wordbreak.nim
@@ -0,0 +1,33 @@
+import std/unicode
+
+import utils/charcategory
+import utils/luwrap
+import utils/strwidth
+
+func isDigitAscii(r: Rune): bool =
+  return uint32(r) < 128 and char(r) in AsciiDigit
+
+type BreakCategory* = enum
+  bcAlpha, bcSpace, bcSymbol, bcHan
+
+func breaksWord*(r: Rune): bool =
+  return not r.isDigitAscii() and r.width() != 0 and not r.isAlphaLU()
+
+func breaksViWordCat*(r: Rune): BreakCategory =
+  if r.isWhiteSpaceLU():
+    return bcSpace
+  elif r.breaksWord() and r != Rune'_':
+    return bcSymbol
+  elif r.isScriptLU("Han"):
+    return bcHan
+  return bcAlpha
+
+func breaksWordCat*(r: Rune): BreakCategory =
+  if not r.breaksWord():
+    return bcAlpha
+  return bcSpace
+
+func breaksBigWordCat*(r: Rune): BreakCategory =
+  if not r.isWhiteSpaceLU():
+    return bcAlpha
+  return bcSpace