diff options
author | Araq <rumpf_a@web.de> | 2015-09-21 15:14:04 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2015-09-21 15:49:46 +0200 |
commit | 73279aba39bfdb6ae7e8ff06e9e0a9c2ba9e8da6 (patch) | |
tree | d6cbfd56638a967172f4a12449f5709bc947b189 | |
parent | 08f9baeddcdfeef67ad52ae4bc47046da587acd8 (diff) | |
download | Nim-73279aba39bfdb6ae7e8ff06e9e0a9c2ba9e8da6.tar.gz |
added unicode.lastRun, unicode.graphemeLen
-rw-r--r-- | lib/pure/unicode.nim | 41 | ||||
-rw-r--r-- | web/news.txt | 1 |
2 files changed, 36 insertions, 6 deletions
diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim index 396957f6c..d3dc77909 100644 --- a/lib/pure/unicode.nim +++ b/lib/pure/unicode.nim @@ -1319,15 +1319,44 @@ proc reversed*(s: string): string = reverseUntil(len(s)) +proc graphemeLen*(s: string; i: Natural): Natural = + ## The number of bytes belonging to 's[i]' including following combining + ## characters. + var j = i.int + var r, r2: Rune + if j < s.len: + fastRuneAt(s, j, r, true) + result = j-i + while j < s.len: + fastRuneAt(s, j, r2, true) + if not isCombining(r2): break + result = j-i + +proc lastRune*(s: string; last: int): (Rune, int) = + ## length of the last rune in 's[0..last]'. Returns the rune and its length + ## in bytes. + if s[last] <= chr(127): + result = (Rune(s[last]), 1) + else: + var L = 0 + while last-L >= 0 and ord(s[last-L]) shr 6 == 0b10: inc(L) + inc(L) + var r: Rune + fastRuneAt(s, last-L, r, false) + result = (r, L) + when isMainModule: let someString = "öÑ" someRunes = @[runeAt(someString, 0), runeAt(someString, 2)] compared = (someString == $someRunes) - assert compared == true + doAssert compared == true - assert reversed("Reverse this!") == "!siht esreveR" - assert reversed("先秦兩漢") == "漢兩秦先" - assert reversed("as⃝df̅") == "f̅ds⃝a" - assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞" - assert len(toRunes("as⃝df̅")) == runeLen("as⃝df̅") + doAssert reversed("Reverse this!") == "!siht esreveR" + doAssert reversed("先秦兩漢") == "漢兩秦先" + doAssert reversed("as⃝df̅") == "f̅ds⃝a" + doAssert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞" + doAssert len(toRunes("as⃝df̅")) == runeLen("as⃝df̅") + const test = "as⃝" + doAssert lastRune(test, test.len-1)[1] == 3 + doAssert graphemeLen("è", 0) == 2 diff --git a/web/news.txt b/web/news.txt index c70824b87..af8168f36 100644 --- a/web/news.txt +++ b/web/news.txt @@ -97,6 +97,7 @@ News to benchmark it. - ``strutils.formatFloat`` and ``formatBiggestFloat`` do not depend on the C locale anymore and now take an optional ``decimalSep = '.'`` parameter. + - Added ``unicode.lastRune``, ``unicode.graphemeLen``. Compiler Additions |