refactor cmpIgnoreStyle and cmpIgnoreCase (#16399)

* init * support strutils * more * better * Call len once per string/cstring * Change var to let * Compare ternary on first char * More appropriate param name * fix * better * one test * impl * more efficient * minor Co-authored-by: Clyybber <darkmine956@gmail.com>
author: flywind <43030857+xflywind@users.noreply.github.com> 2020-12-31 04:54:40 -0600
committer: GitHub <noreply@github.com> 2020-12-31 11:54:40 +0100
commit: 5fb56a3b2c83b62d72c72a9d56ef1333671bc2b6 (patch)
tree: e32cffb18e3180a81f8179130cc99800d6e505df
parent: 5984f7a7dda5e6fb3119cd5705d5758e1b8f3fc7 (diff)
download: Nim-5fb56a3b2c83b62d72c72a9d56ef1333671bc2b6.tar.gz
8 files changed, 141 insertions, 147 deletions
diff --git a/lib/core/macros.nim b/lib/core/macros.nim
index 97c3a46c5..748464061 100644
--- a/lib/core/macros.nim
+++ b/lib/core/macros.nim
@@ -1389,25 +1389,10 @@ when defined(nimVmEqIdent):
     ## these nodes will be unwrapped.
 
 else:
+  from std/private/strimpl import cmpIgnoreStyleImpl
   # this procedure is optimized for native code, it should not be compiled to nimVM bytecode.
   proc cmpIgnoreStyle(a, b: cstring): int {.noSideEffect.} =
-    proc toLower(c: char): char {.inline.} =
-      if c in {'A'..'Z'}: result = chr(ord(c) + (ord('a') - ord('A')))
-      else: result = c
-    var i = 0
-    var j = 0
-    # first char is case sensitive
-    if a[0] != b[0]: return 1
-    while true:
-      while a[i] == '_': inc(i)
-      while b[j] == '_': inc(j) # BUGFIX: typo
-      var aa = toLower(a[i])
-      var bb = toLower(b[j])
-      result = ord(aa) - ord(bb)
-      if result != 0 or aa == '\0': break
-      inc(i)
-      inc(j)
-
+    cmpIgnoreStyleImpl(a, b, true)
 
   proc eqIdent*(a, b: string): bool = cmpIgnoreStyle(a, b) == 0
     ## Check if two idents are equal.
diff --git a/lib/core/typeinfo.nim b/lib/core/typeinfo.nim
index c15f6dc1f..9b7e32466 100644
--- a/lib/core/typeinfo.nim
+++ b/lib/core/typeinfo.nim
@@ -91,6 +91,8 @@ when not defined(gcDestructors):
 else:
   include system/seqs_v2_reimpl
 
+from std/private/strimpl import cmpIgnoreStyleImpl
+
 when not defined(js):
   template rawType(x: Any): PNimType =
     cast[PNimType](x.rawTypePtr)
@@ -366,36 +368,22 @@ iterator fields*(x: Any): tuple[name: string, any: Any] =
   for name, any in items(ret):
     yield ($name, any)
 
-proc cmpIgnoreStyle(a, b: cstring): int {.noSideEffect.} =
-  proc toLower(c: char): char {.inline.} =
-    if c in {'A'..'Z'}: result = chr(ord(c) + (ord('a') - ord('A')))
-    else: result = c
-  var i = 0
-  var j = 0
-  if a[0] != b[0]: return 1
-  while true:
-    while a[i] == '_': inc(i)
-    while b[j] == '_': inc(j) # BUGFIX: typo
-    var aa = toLower(a[i])
-    var bb = toLower(b[j])
-    result = ord(aa) - ord(bb)
-    if result != 0 or aa == '\0': break
-    inc(i)
-    inc(j)
+proc cmpNimIdentifier(a, b: cstring): int {.noSideEffect.} =
+  cmpIgnoreStyleImpl(a, b, true)
 
 proc getFieldNode(p: pointer, n: ptr TNimNode,
                   name: cstring): ptr TNimNode =
   case n.kind
   of nkNone: assert(false)
   of nkSlot:
-    if cmpIgnoreStyle(n.name, name) == 0:
+    if cmpNimIdentifier(n.name, name) == 0:
       result = n
   of nkList:
     for i in 0..n.len-1:
       result = getFieldNode(p, n.sons[i], name)
       if result != nil: break
   of nkCase:
-    if cmpIgnoreStyle(n.name, name) == 0:
+    if cmpNimIdentifier(n.name, name) == 0:
       result = n
     else:
       var m = selectBranch(p, n)
@@ -599,7 +587,7 @@ proc getEnumOrdinal*(x: Any, name: string): int =
   var n = typ.node
   var s = n.sons
   for i in 0 .. n.len-1:
-    if cmpIgnoreStyle($s[i].name, name) == 0:
+    if cmpNimIdentifier($s[i].name, name) == 0:
       if ntfEnumHole notin typ.flags:
         return i
       else:
diff --git a/lib/pure/cstrutils.nim b/lib/pure/cstrutils.nim
index 601508e2e..a95c13fb5 100644
--- a/lib/pure/cstrutils.nim
+++ b/lib/pure/cstrutils.nim
@@ -12,12 +12,8 @@
 ## save allocations.
 
 include "system/inclrtl"
+import std/private/strimpl
 
-proc toLowerAscii(c: char): char {.inline.} =
-  if c in {'A'..'Z'}:
-    result = chr(ord(c) + (ord('a') - ord('A')))
-  else:
-    result = c
 
 when defined(js):
   proc startsWith*(s, prefix: cstring): bool {.noSideEffect,
@@ -25,7 +21,13 @@ when defined(js):
 
   proc endsWith*(s, suffix: cstring): bool {.noSideEffect,
     importjs: "#.endsWith(#)".}
-  
+
+  proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect.} =
+    cmpIgnoreStyleImpl(a, b)
+
+  proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect.} =
+    cmpIgnoreCaseImpl(a, b)
+
   # JS string has more operations that might warrant its own module:
   # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String
 else:
@@ -57,45 +59,39 @@ else:
       inc(i)
     if suffix[i] == '\0': return true
 
-proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect,
-  rtl, extern: "csuCmpIgnoreStyle".} =
-  ## Semantically the same as ``cmp(normalize($a), normalize($b))``. It
-  ## is just optimized to not allocate temporary strings.  This should
-  ## NOT be used to compare Nim identifier names. use `macros.eqIdent`
-  ## for that.  Returns:
-  ##
-  ## | 0 if a == b
-  ## | < 0 if a < b
-  ## | > 0 if a > b
-  ## 
-  ## Not supported for JS backend, use `strutils.cmpIgnoreStyle
-  ## <strutils.html#cmpIgnoreStyle%2Cstring%2Cstring>`_ instead.
-  var i = 0
-  var j = 0
-  while true:
-    while a[i] == '_': inc(i)
-    while b[j] == '_': inc(j) # BUGFIX: typo
-    var aa = toLowerAscii(a[i])
-    var bb = toLowerAscii(b[j])
-    result = ord(aa) - ord(bb)
-    if result != 0 or aa == '\0': break
-    inc(i)
-    inc(j)
+  proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect,
+    rtl, extern: "csuCmpIgnoreStyle".} =
+    ## Semantically the same as ``cmp(normalize($a), normalize($b))``. It
+    ## is just optimized to not allocate temporary strings.  This should
+    ## NOT be used to compare Nim identifier names. use `macros.eqIdent`
+    ## for that. Returns:
+    ##
+    ## | 0 if a == b
+    ## | < 0 if a < b
+    ## | > 0 if a > b
+    var i = 0
+    var j = 0
+    while true:
+      while a[i] == '_': inc(i)
+      while b[j] == '_': inc(j) # BUGFIX: typo
+      var aa = toLowerAscii(a[i])
+      var bb = toLowerAscii(b[j])
+      result = ord(aa) - ord(bb)
+      if result != 0 or aa == '\0': break
+      inc(i)
+      inc(j)
 
-proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect,
-  rtl, extern: "csuCmpIgnoreCase".} =
-  ## Compares two strings in a case insensitive manner. Returns:
-  ##
-  ## | 0 if a == b
-  ## | < 0 if a < b
-  ## | > 0 if a > b
-  ## 
-  ## Not supported for JS backend, use `strutils.cmpIgnoreCase
-  ## <strutils.html#cmpIgnoreCase%2Cstring%2Cstring>`_ instead.
-  var i = 0
-  while true:
-    var aa = toLowerAscii(a[i])
-    var bb = toLowerAscii(b[i])
-    result = ord(aa) - ord(bb)
-    if result != 0 or aa == '\0': break
-    inc(i)
+  proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect,
+    rtl, extern: "csuCmpIgnoreCase".} =
+    ## Compares two strings in a case insensitive manner. Returns:
+    ##
+    ## | 0 if a == b
+    ## | < 0 if a < b
+    ## | > 0 if a > b
+    var i = 0
+    while true:
+      var aa = toLowerAscii(a[i])
+      var bb = toLowerAscii(b[i])
+      result = ord(aa) - ord(bb)
+      if result != 0 or aa == '\0': break
+      inc(i)
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index 0028ac4fb..b1418a3ec 100644
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -81,6 +81,8 @@ when defined(nimVmExportFixed):
 
 include "system/inclrtl"
 import std/private/since
+from std/private/strimpl import cmpIgnoreStyleImpl, cmpIgnoreCaseImpl
+
 
 const
   Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'}
@@ -319,13 +321,7 @@ func cmpIgnoreCase*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreCase".} =
     doAssert cmpIgnoreCase("FooBar", "foobar") == 0
     doAssert cmpIgnoreCase("bar", "Foo") < 0
     doAssert cmpIgnoreCase("Foo5", "foo4") > 0
-  var i = 0
-  var m = min(a.len, b.len)
-  while i < m:
-    result = ord(toLowerAscii(a[i])) - ord(toLowerAscii(b[i]))
-    if result != 0: return
-    inc(i)
-  result = a.len - b.len
+  cmpIgnoreCaseImpl(a, b)
 
 {.push checks: off, line_trace: off.} # this is a hot-spot in the compiler!
                                       # thus we compile without checks here
@@ -344,25 +340,7 @@ func cmpIgnoreStyle*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreStyle".} =
   runnableExamples:
     doAssert cmpIgnoreStyle("foo_bar", "FooBar") == 0
     doAssert cmpIgnoreStyle("foo_bar_5", "FooBar4") > 0
-  var i = 0
-  var j = 0
-  while true:
-    while i < a.len and a[i] == '_': inc i
-    while j < b.len and b[j] == '_': inc j
-    var aa = if i < a.len: toLowerAscii(a[i]) else: '\0'
-    var bb = if j < b.len: toLowerAscii(b[j]) else: '\0'
-    result = ord(aa) - ord(bb)
-    if result != 0: return result
-    # the characters are identical:
-    if i >= a.len:
-      # both cursors at the end:
-      if j >= b.len: return 0
-      # not yet at the end of 'b':
-      return -1
-    elif j >= b.len:
-      return 1
-    inc i
-    inc j
+  cmpIgnoreStyleImpl(a, b)
 {.pop.}
 
 # --------- Private templates for different split separators -----------
diff --git a/lib/std/private/strimpl.nim b/lib/std/private/strimpl.nim
new file mode 100644
index 000000000..ae752165a
--- /dev/null
+++ b/lib/std/private/strimpl.nim
@@ -0,0 +1,53 @@
+func toLowerAscii*(c: char): char {.inline.} =
+  if c in {'A'..'Z'}:
+    result = chr(ord(c) + (ord('a') - ord('A')))
+  else:
+    result = c
+
+template firstCharCaseSensitiveImpl(a, b: typed, aLen, bLen: int) =
+  if aLen == 0 or bLen == 0:
+    return aLen - bLen
+  if a[0] != b[0]: return ord(a[0]) - ord(b[0])
+
+template cmpIgnoreStyleImpl*(a, b: typed, firstCharCaseSensitive: static bool = false) =
+  # a, b are string or cstring
+  let aLen = a.len
+  let bLen = b.len
+  var i = 0
+  var j = 0
+  when firstCharCaseSensitive:
+    firstCharCaseSensitiveImpl(a, b, aLen, bLen)
+    inc i
+    inc j
+  while true:
+    while i < aLen and a[i] == '_': inc i
+    while j < bLen and b[j] == '_': inc j
+    let aa = if i < aLen: toLowerAscii(a[i]) else: '\0'
+    let bb = if j < bLen: toLowerAscii(b[j]) else: '\0'
+    result = ord(aa) - ord(bb)
+    if result != 0: return result
+    # the characters are identical:
+    if i >= aLen:
+      # both cursors at the end:
+      if j >= bLen: return 0
+      # not yet at the end of 'b':
+      return -1
+    elif j >= bLen:
+      return 1
+    inc i
+    inc j
+
+template cmpIgnoreCaseImpl*(a, b: typed, firstCharCaseSensitive: static bool = false) =
+  # a, b are string or cstring
+  let aLen = a.len
+  let bLen = b.len
+  var i = 0
+  when firstCharCaseSensitive:
+    firstCharCaseSensitiveImpl(a, b, aLen, bLen)
+    inc i
+  var m = min(aLen, bLen)
+  while i < m:
+    result = ord(toLowerAscii(a[i])) - ord(toLowerAscii(b[i]))
+    if result != 0: return
+    inc i
+  result = aLen - bLen
diff --git a/tests/js/tstdlib_various.nim b/tests/js/tstdlib_various.nim
index d19f40c39..a1bb63d46 100644
--- a/tests/js/tstdlib_various.nim
+++ b/tests/js/tstdlib_various.nim
@@ -29,7 +29,7 @@ Hi Andreas! How do you feel, Rumpf?
 """
 
 import
-  critbits, cstrutils, sets, strutils, tables, random, algorithm, ropes,
+  critbits, sets, strutils, tables, random, algorithm, ropes,
   lists, htmlgen, xmltree, strtabs
 
 
@@ -177,18 +177,3 @@ block txmltree:
     ])
   ])
   doAssert(y.innerText == "foobar")
-
-
-
-block tcstrutils:
-  let s = cstring "abcdef"
-  doAssert s.startsWith("a")
-  doAssert not s.startsWith("b")
-  doAssert s.endsWith("f")
-  doAssert not s.endsWith("a")
-
-  let a = cstring "abracadabra"
-  doAssert a.startsWith("abra")
-  doAssert not a.startsWith("bra")
-  doAssert a.endsWith("abra")
-  doAssert not a.endsWith("dab")
diff --git a/tests/stdlib/tcstrutils.nim b/tests/stdlib/tcstrutils.nim
new file mode 100644
index 000000000..1daf32aa5
--- /dev/null
+++ b/tests/stdlib/tcstrutils.nim
@@ -0,0 +1,30 @@
+discard """
+  targets: "c cpp js"
+"""
+
+import cstrutils
+
+
+block tcstrutils:
+  let s = cstring "abcdef"
+  doAssert s.startsWith("a")
+  doAssert not s.startsWith("b")
+  doAssert s.endsWith("f")
+  doAssert not s.endsWith("a")
+
+  let a = cstring "abracadabra"
+  doAssert a.startsWith("abra")
+  doAssert not a.startsWith("bra")
+  doAssert a.endsWith("abra")
+  doAssert not a.endsWith("dab")
+
+  doAssert cmpIgnoreCase(cstring "FooBar", "foobar") == 0
+  doAssert cmpIgnoreCase(cstring "bar", "Foo") < 0
+  doAssert cmpIgnoreCase(cstring "Foo5", "foo4") > 0
+
+  doAssert cmpIgnoreStyle(cstring "foo_bar", "FooBar") == 0
+  doAssert cmpIgnoreStyle(cstring "foo_bar_5", "FooBar4") > 0
+
+  doAssert cmpIgnoreCase(cstring "", cstring "") == 0
+  doAssert cmpIgnoreCase(cstring "", cstring "Hello") < 0
+  doAssert cmpIgnoreCase(cstring "wind", cstring "") > 0
diff --git a/tests/stdlib/tstdlib_various.nim b/tests/stdlib/tstdlib_various.nim
index cddd43f6e..b153fd2ba 100644
--- a/tests/stdlib/tstdlib_various.nim
+++ b/tests/stdlib/tstdlib_various.nim
@@ -38,7 +38,7 @@ true
 """
 
 import
-  critbits, cstrutils, sets, strutils, tables, random, algorithm, re, ropes,
+  critbits, sets, strutils, tables, random, algorithm, re, ropes,
   segfaults, lists, parsesql, streams, os, htmlgen, xmltree, strtabs
 
 
@@ -245,24 +245,3 @@ block txmltree:
     ])
   ])
   doAssert(y.innerText == "foobar")
-
-
-block tcstrutils:
-  let s = cstring "abcdef"
-  doAssert s.startsWith("a")
-  doAssert not s.startsWith("b")
-  doAssert s.endsWith("f")
-  doAssert not s.endsWith("a")
-
-  let a = cstring "abracadabra"
-  doAssert a.startsWith("abra")
-  doAssert not a.startsWith("bra")
-  doAssert a.endsWith("abra")
-  doAssert not a.endsWith("dab")
-
-  doAssert cmpIgnoreCase(cstring "FooBar", "foobar") == 0
-  doAssert cmpIgnoreCase(cstring "bar", "Foo") < 0
-  doAssert cmpIgnoreCase(cstring "Foo5", "foo4") > 0
-
-  doAssert cmpIgnoreStyle(cstring "foo_bar", "FooBar") == 0
-  doAssert cmpIgnoreStyle(cstring "foo_bar_5", "FooBar4") > 0
author	flywind <43030857+xflywind@users.noreply.github.com>	2020-12-31 04:54:40 -0600
committer	GitHub <noreply@github.com>	2020-12-31 11:54:40 +0100
commit	5fb56a3b2c83b62d72c72a9d56ef1333671bc2b6 (patch)
tree	e32cffb18e3180a81f8179130cc99800d6e505df
parent	5984f7a7dda5e6fb3119cd5705d5758e1b8f3fc7 (diff)
download	Nim-5fb56a3b2c83b62d72c72a9d56ef1333671bc2b6.tar.gz