summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAraq <rumpf_a@web.de>2015-09-21 15:14:04 +0200
committerAraq <rumpf_a@web.de>2015-09-21 15:49:46 +0200
commit73279aba39bfdb6ae7e8ff06e9e0a9c2ba9e8da6 (patch)
treed6cbfd56638a967172f4a12449f5709bc947b189
parent08f9baeddcdfeef67ad52ae4bc47046da587acd8 (diff)
downloadNim-73279aba39bfdb6ae7e8ff06e9e0a9c2ba9e8da6.tar.gz
added unicode.lastRun, unicode.graphemeLen
-rw-r--r--lib/pure/unicode.nim41
-rw-r--r--web/news.txt1
2 files changed, 36 insertions, 6 deletions
diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim
index 396957f6c..d3dc77909 100644
--- a/lib/pure/unicode.nim
+++ b/lib/pure/unicode.nim
@@ -1319,15 +1319,44 @@ proc reversed*(s: string): string =
 
   reverseUntil(len(s))
 
+proc graphemeLen*(s: string; i: Natural): Natural =
+  ## The number of bytes belonging to 's[i]' including following combining
+  ## characters.
+  var j = i.int
+  var r, r2: Rune
+  if j < s.len:
+    fastRuneAt(s, j, r, true)
+    result = j-i
+    while j < s.len:
+      fastRuneAt(s, j, r2, true)
+      if not isCombining(r2): break
+      result = j-i
+
+proc lastRune*(s: string; last: int): (Rune, int) =
+  ## length of the last rune in 's[0..last]'. Returns the rune and its length
+  ## in bytes.
+  if s[last] <= chr(127):
+    result = (Rune(s[last]), 1)
+  else:
+    var L = 0
+    while last-L >= 0 and ord(s[last-L]) shr 6 == 0b10: inc(L)
+    inc(L)
+    var r: Rune
+    fastRuneAt(s, last-L, r, false)
+    result = (r, L)
+
 when isMainModule:
   let
     someString = "öÑ"
     someRunes = @[runeAt(someString, 0), runeAt(someString, 2)]
     compared = (someString == $someRunes)
-  assert compared == true
+  doAssert compared == true
 
-  assert reversed("Reverse this!") == "!siht esreveR"
-  assert reversed("先秦兩漢") == "漢兩秦先"
-  assert reversed("as⃝df̅") == "f̅ds⃝a"
-  assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞"
-  assert len(toRunes("as⃝df̅")) == runeLen("as⃝df̅")
+  doAssert reversed("Reverse this!") == "!siht esreveR"
+  doAssert reversed("先秦兩漢") == "漢兩秦先"
+  doAssert reversed("as⃝df̅") == "f̅ds⃝a"
+  doAssert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞"
+  doAssert len(toRunes("as⃝df̅")) == runeLen("as⃝df̅")
+  const test = "as⃝"
+  doAssert lastRune(test, test.len-1)[1] == 3
+  doAssert graphemeLen("è", 0) == 2
diff --git a/web/news.txt b/web/news.txt
index c70824b87..af8168f36 100644
--- a/web/news.txt
+++ b/web/news.txt
@@ -97,6 +97,7 @@ News
     to benchmark it.
   - ``strutils.formatFloat`` and ``formatBiggestFloat`` do not depend on the C
     locale anymore and now take an optional ``decimalSep = '.'`` parameter.
+  - Added ``unicode.lastRune``, ``unicode.graphemeLen``.
 
 
   Compiler Additions