diff options
author | def <dennis@felsin9.de> | 2015-01-15 23:11:02 +0100 |
---|---|---|
committer | def <dennis@felsin9.de> | 2015-01-15 23:11:02 +0100 |
commit | ae7ca46a092204e7dbfb642309600f036de65e6e (patch) | |
tree | 0501689c33eca19a27eac7adffe2e4c8655a312f | |
parent | 0a82b6eb628a5ec4dc4aadc1fb120067845f8443 (diff) | |
download | Nim-ae7ca46a092204e7dbfb642309600f036de65e6e.tar.gz |
Optimize unicode.reversed
Runs about 18 times faster: - combining characters with boolean logic instead of binary search - No more temporary sequence - Optimize for ASCII characters
-rw-r--r-- | lib/pure/unicode.nim | 50 |
1 files changed, 31 insertions, 19 deletions
diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim index 306509d26..b892ec8b4 100644 --- a/lib/pure/unicode.nim +++ b/lib/pure/unicode.nim @@ -1102,13 +1102,6 @@ const 0x01f1, 501, # 0x01f3, 499] # - combiningRanges = [ - 0x0300, 0x036f, - 0x1ab0, 0x1aff, - 0x1dc0, 0x1dff, - 0x20d0, 0x20ff, - 0xfe20, 0xfe2f] - proc binarySearch(c: RuneImpl, tab: openArray[RuneImpl], len, stride: int): int = var n = len var t = 0 @@ -1204,9 +1197,13 @@ proc isWhiteSpace*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} = proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} = ## returns true iff `c` is a Unicode combining character var c = RuneImpl(c) - var p = binarySearch(c, combiningRanges, len(combiningRanges) div 2, 2) - if p >= 0 and c >= combiningRanges[p] and c <= combiningRanges[p+1]: - return true + + # Optimized to return false immediately for ASCII + return c >= 0x0300 and (c <= 0x036f or + (c >= 0x1ab0 and c <= 0x1aff) or + (c >= 0x1dc0 and c <= 0x1dff) or + (c >= 0x20d0 and c <= 0x20ff) or + (c >= 0xfe20 and c <= 0xfe2f)) iterator runes*(s: string): Rune = ## iterates over any unicode character of the string `s`. @@ -1243,15 +1240,30 @@ proc reversed*(s: string): string = ## assert reversed("先秦兩漢") == "漢兩秦先" ## assert reversed("as⃝df̅") == "f̅ds⃝a" ## assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞" - result = newStringOfCap(len(s)) - var tmp: seq[Rune] = @[] - for r in runes(s): - if isCombining(r): - tmp.insert(r, high(tmp)) - else: - tmp.add(r) - for i in countdown(high(tmp), 0): - result.add(toUTF8(tmp[i])) + var + i = 0 + lastI = 0 + newPos = len(s) - 1 + blockPos = 0 + r: Rune + + template reverseUntil(pos): stmt = + var j = pos - 1 + while j > blockPos: + result[newPos] = s[j] + dec j + dec newPos + blockPos = pos - 1 + + result = newString(len(s)) + + while i < len(s): + lastI = i + fastRuneAt(s, i, r, true) + if not isCombining(r): + reverseUntil(lastI) + + reverseUntil(len(s)) when isMainModule: let |