summary refs log tree commit diff stats
diff options
context:
space:
mode:
authordef <dennis@felsin9.de>2015-01-15 23:11:02 +0100
committerdef <dennis@felsin9.de>2015-01-15 23:11:02 +0100
commitae7ca46a092204e7dbfb642309600f036de65e6e (patch)
tree0501689c33eca19a27eac7adffe2e4c8655a312f
parent0a82b6eb628a5ec4dc4aadc1fb120067845f8443 (diff)
downloadNim-ae7ca46a092204e7dbfb642309600f036de65e6e.tar.gz
Optimize unicode.reversed
Runs about 18 times faster:
- combining characters with boolean logic instead of binary search
- No more temporary sequence
- Optimize for ASCII characters
-rw-r--r--lib/pure/unicode.nim50
1 files changed, 31 insertions, 19 deletions
diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim
index 306509d26..b892ec8b4 100644
--- a/lib/pure/unicode.nim
+++ b/lib/pure/unicode.nim
@@ -1102,13 +1102,6 @@ const
     0x01f1, 501,  #     
     0x01f3, 499]  #     
 
-  combiningRanges = [
-    0x0300, 0x036f,
-    0x1ab0, 0x1aff,
-    0x1dc0, 0x1dff,
-    0x20d0, 0x20ff,
-    0xfe20, 0xfe2f]
-
 proc binarySearch(c: RuneImpl, tab: openArray[RuneImpl], len, stride: int): int = 
   var n = len
   var t = 0
@@ -1204,9 +1197,13 @@ proc isWhiteSpace*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} =
 proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} =
   ## returns true iff `c` is a Unicode combining character
   var c = RuneImpl(c)
-  var p = binarySearch(c, combiningRanges, len(combiningRanges) div 2, 2)
-  if p >= 0 and c >= combiningRanges[p] and c <= combiningRanges[p+1]:
-    return true
+
+  # Optimized to return false immediately for ASCII
+  return c >= 0x0300 and (c <= 0x036f or
+    (c >= 0x1ab0 and c <= 0x1aff) or
+    (c >= 0x1dc0 and c <= 0x1dff) or
+    (c >= 0x20d0 and c <= 0x20ff) or
+    (c >= 0xfe20 and c <= 0xfe2f))
 
 iterator runes*(s: string): Rune =
   ## iterates over any unicode character of the string `s`.
@@ -1243,15 +1240,30 @@ proc reversed*(s: string): string =
   ##   assert reversed("先秦兩漢") == "漢兩秦先"
   ##   assert reversed("as⃝df̅") == "f̅ds⃝a"
   ## assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞"
-  result = newStringOfCap(len(s))
-  var tmp: seq[Rune] = @[]
-  for r in runes(s):
-    if isCombining(r):
-      tmp.insert(r, high(tmp))
-    else:
-      tmp.add(r)
-  for i in countdown(high(tmp), 0):
-    result.add(toUTF8(tmp[i]))
+  var
+    i = 0
+    lastI = 0
+    newPos = len(s) - 1
+    blockPos = 0
+    r: Rune
+
+  template reverseUntil(pos): stmt =
+    var j = pos - 1
+    while j > blockPos:
+      result[newPos] = s[j]
+      dec j
+      dec newPos
+    blockPos = pos - 1
+
+  result = newString(len(s))
+
+  while i < len(s):
+    lastI = i
+    fastRuneAt(s, i, r, true)
+    if not isCombining(r):
+      reverseUntil(lastI)
+
+  reverseUntil(len(s))
 
 when isMainModule:
   let