summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorflywind <43030857+xflywind@users.noreply.github.com>2020-12-31 04:54:40 -0600
committerGitHub <noreply@github.com>2020-12-31 11:54:40 +0100
commit5fb56a3b2c83b62d72c72a9d56ef1333671bc2b6 (patch)
treee32cffb18e3180a81f8179130cc99800d6e505df
parent5984f7a7dda5e6fb3119cd5705d5758e1b8f3fc7 (diff)
downloadNim-5fb56a3b2c83b62d72c72a9d56ef1333671bc2b6.tar.gz
refactor cmpIgnoreStyle and cmpIgnoreCase (#16399)
* init

* support strutils

* more

* better

* Call len once per string/cstring

* Change var to let

* Compare ternary on first char

* More appropriate param name

* fix

* better

* one test

* impl

* more efficient

* minor

Co-authored-by: Clyybber <darkmine956@gmail.com>
-rw-r--r--lib/core/macros.nim19
-rw-r--r--lib/core/typeinfo.nim26
-rw-r--r--lib/pure/cstrutils.nim90
-rw-r--r--lib/pure/strutils.nim30
-rw-r--r--lib/std/private/strimpl.nim53
-rw-r--r--tests/js/tstdlib_various.nim17
-rw-r--r--tests/stdlib/tcstrutils.nim30
-rw-r--r--tests/stdlib/tstdlib_various.nim23
8 files changed, 141 insertions, 147 deletions
diff --git a/lib/core/macros.nim b/lib/core/macros.nim
index 97c3a46c5..748464061 100644
--- a/lib/core/macros.nim
+++ b/lib/core/macros.nim
@@ -1389,25 +1389,10 @@ when defined(nimVmEqIdent):
     ## these nodes will be unwrapped.
 
 else:
+  from std/private/strimpl import cmpIgnoreStyleImpl
   # this procedure is optimized for native code, it should not be compiled to nimVM bytecode.
   proc cmpIgnoreStyle(a, b: cstring): int {.noSideEffect.} =
-    proc toLower(c: char): char {.inline.} =
-      if c in {'A'..'Z'}: result = chr(ord(c) + (ord('a') - ord('A')))
-      else: result = c
-    var i = 0
-    var j = 0
-    # first char is case sensitive
-    if a[0] != b[0]: return 1
-    while true:
-      while a[i] == '_': inc(i)
-      while b[j] == '_': inc(j) # BUGFIX: typo
-      var aa = toLower(a[i])
-      var bb = toLower(b[j])
-      result = ord(aa) - ord(bb)
-      if result != 0 or aa == '\0': break
-      inc(i)
-      inc(j)
-
+    cmpIgnoreStyleImpl(a, b, true)
 
   proc eqIdent*(a, b: string): bool = cmpIgnoreStyle(a, b) == 0
     ## Check if two idents are equal.
diff --git a/lib/core/typeinfo.nim b/lib/core/typeinfo.nim
index c15f6dc1f..9b7e32466 100644
--- a/lib/core/typeinfo.nim
+++ b/lib/core/typeinfo.nim
@@ -91,6 +91,8 @@ when not defined(gcDestructors):
 else:
   include system/seqs_v2_reimpl
 
+from std/private/strimpl import cmpIgnoreStyleImpl
+
 when not defined(js):
   template rawType(x: Any): PNimType =
     cast[PNimType](x.rawTypePtr)
@@ -366,36 +368,22 @@ iterator fields*(x: Any): tuple[name: string, any: Any] =
   for name, any in items(ret):
     yield ($name, any)
 
-proc cmpIgnoreStyle(a, b: cstring): int {.noSideEffect.} =
-  proc toLower(c: char): char {.inline.} =
-    if c in {'A'..'Z'}: result = chr(ord(c) + (ord('a') - ord('A')))
-    else: result = c
-  var i = 0
-  var j = 0
-  if a[0] != b[0]: return 1
-  while true:
-    while a[i] == '_': inc(i)
-    while b[j] == '_': inc(j) # BUGFIX: typo
-    var aa = toLower(a[i])
-    var bb = toLower(b[j])
-    result = ord(aa) - ord(bb)
-    if result != 0 or aa == '\0': break
-    inc(i)
-    inc(j)
+proc cmpNimIdentifier(a, b: cstring): int {.noSideEffect.} =
+  cmpIgnoreStyleImpl(a, b, true)
 
 proc getFieldNode(p: pointer, n: ptr TNimNode,
                   name: cstring): ptr TNimNode =
   case n.kind
   of nkNone: assert(false)
   of nkSlot:
-    if cmpIgnoreStyle(n.name, name) == 0:
+    if cmpNimIdentifier(n.name, name) == 0:
       result = n
   of nkList:
     for i in 0..n.len-1:
       result = getFieldNode(p, n.sons[i], name)
       if result != nil: break
   of nkCase:
-    if cmpIgnoreStyle(n.name, name) == 0:
+    if cmpNimIdentifier(n.name, name) == 0:
       result = n
     else:
       var m = selectBranch(p, n)
@@ -599,7 +587,7 @@ proc getEnumOrdinal*(x: Any, name: string): int =
   var n = typ.node
   var s = n.sons
   for i in 0 .. n.len-1:
-    if cmpIgnoreStyle($s[i].name, name) == 0:
+    if cmpNimIdentifier($s[i].name, name) == 0:
       if ntfEnumHole notin typ.flags:
         return i
       else:
diff --git a/lib/pure/cstrutils.nim b/lib/pure/cstrutils.nim
index 601508e2e..a95c13fb5 100644
--- a/lib/pure/cstrutils.nim
+++ b/lib/pure/cstrutils.nim
@@ -12,12 +12,8 @@
 ## save allocations.
 
 include "system/inclrtl"
+import std/private/strimpl
 
-proc toLowerAscii(c: char): char {.inline.} =
-  if c in {'A'..'Z'}:
-    result = chr(ord(c) + (ord('a') - ord('A')))
-  else:
-    result = c
 
 when defined(js):
   proc startsWith*(s, prefix: cstring): bool {.noSideEffect,
@@ -25,7 +21,13 @@ when defined(js):
 
   proc endsWith*(s, suffix: cstring): bool {.noSideEffect,
     importjs: "#.endsWith(#)".}
-  
+
+  proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect.} =
+    cmpIgnoreStyleImpl(a, b)
+
+  proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect.} =
+    cmpIgnoreCaseImpl(a, b)
+
   # JS string has more operations that might warrant its own module:
   # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String
 else:
@@ -57,45 +59,39 @@ else:
       inc(i)
     if suffix[i] == '\0': return true
 
-proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect,
-  rtl, extern: "csuCmpIgnoreStyle".} =
-  ## Semantically the same as ``cmp(normalize($a), normalize($b))``. It
-  ## is just optimized to not allocate temporary strings.  This should
-  ## NOT be used to compare Nim identifier names. use `macros.eqIdent`
-  ## for that.  Returns:
-  ##
-  ## | 0 if a == b
-  ## | < 0 if a < b
-  ## | > 0 if a > b
-  ## 
-  ## Not supported for JS backend, use `strutils.cmpIgnoreStyle
-  ## <strutils.html#cmpIgnoreStyle%2Cstring%2Cstring>`_ instead.
-  var i = 0
-  var j = 0
-  while true:
-    while a[i] == '_': inc(i)
-    while b[j] == '_': inc(j) # BUGFIX: typo
-    var aa = toLowerAscii(a[i])
-    var bb = toLowerAscii(b[j])
-    result = ord(aa) - ord(bb)
-    if result != 0 or aa == '\0': break
-    inc(i)
-    inc(j)
+  proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect,
+    rtl, extern: "csuCmpIgnoreStyle".} =
+    ## Semantically the same as ``cmp(normalize($a), normalize($b))``. It
+    ## is just optimized to not allocate temporary strings.  This should
+    ## NOT be used to compare Nim identifier names. use `macros.eqIdent`
+    ## for that. Returns:
+    ##
+    ## | 0 if a == b
+    ## | < 0 if a < b
+    ## | > 0 if a > b
+    var i = 0
+    var j = 0
+    while true:
+      while a[i] == '_': inc(i)
+      while b[j] == '_': inc(j) # BUGFIX: typo
+      var aa = toLowerAscii(a[i])
+      var bb = toLowerAscii(b[j])
+      result = ord(aa) - ord(bb)
+      if result != 0 or aa == '\0': break
+      inc(i)
+      inc(j)
 
-proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect,
-  rtl, extern: "csuCmpIgnoreCase".} =
-  ## Compares two strings in a case insensitive manner. Returns:
-  ##
-  ## | 0 if a == b
-  ## | < 0 if a < b
-  ## | > 0 if a > b
-  ## 
-  ## Not supported for JS backend, use `strutils.cmpIgnoreCase
-  ## <strutils.html#cmpIgnoreCase%2Cstring%2Cstring>`_ instead.
-  var i = 0
-  while true:
-    var aa = toLowerAscii(a[i])
-    var bb = toLowerAscii(b[i])
-    result = ord(aa) - ord(bb)
-    if result != 0 or aa == '\0': break
-    inc(i)
+  proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect,
+    rtl, extern: "csuCmpIgnoreCase".} =
+    ## Compares two strings in a case insensitive manner. Returns:
+    ##
+    ## | 0 if a == b
+    ## | < 0 if a < b
+    ## | > 0 if a > b
+    var i = 0
+    while true:
+      var aa = toLowerAscii(a[i])
+      var bb = toLowerAscii(b[i])
+      result = ord(aa) - ord(bb)
+      if result != 0 or aa == '\0': break
+      inc(i)
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index 0028ac4fb..b1418a3ec 100644
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -81,6 +81,8 @@ when defined(nimVmExportFixed):
 
 include "system/inclrtl"
 import std/private/since
+from std/private/strimpl import cmpIgnoreStyleImpl, cmpIgnoreCaseImpl
+
 
 const
   Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'}
@@ -319,13 +321,7 @@ func cmpIgnoreCase*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreCase".} =
     doAssert cmpIgnoreCase("FooBar", "foobar") == 0
     doAssert cmpIgnoreCase("bar", "Foo") < 0
     doAssert cmpIgnoreCase("Foo5", "foo4") > 0
-  var i = 0
-  var m = min(a.len, b.len)
-  while i < m:
-    result = ord(toLowerAscii(a[i])) - ord(toLowerAscii(b[i]))
-    if result != 0: return
-    inc(i)
-  result = a.len - b.len
+  cmpIgnoreCaseImpl(a, b)
 
 {.push checks: off, line_trace: off.} # this is a hot-spot in the compiler!
                                       # thus we compile without checks here
@@ -344,25 +340,7 @@ func cmpIgnoreStyle*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreStyle".} =
   runnableExamples:
     doAssert cmpIgnoreStyle("foo_bar", "FooBar") == 0
     doAssert cmpIgnoreStyle("foo_bar_5", "FooBar4") > 0
-  var i = 0
-  var j = 0
-  while true:
-    while i < a.len and a[i] == '_': inc i
-    while j < b.len and b[j] == '_': inc j
-    var aa = if i < a.len: toLowerAscii(a[i]) else: '\0'
-    var bb = if j < b.len: toLowerAscii(b[j]) else: '\0'
-    result = ord(aa) - ord(bb)
-    if result != 0: return result
-    # the characters are identical:
-    if i >= a.len:
-      # both cursors at the end:
-      if j >= b.len: return 0
-      # not yet at the end of 'b':
-      return -1
-    elif j >= b.len:
-      return 1
-    inc i
-    inc j
+  cmpIgnoreStyleImpl(a, b)
 {.pop.}
 
 # --------- Private templates for different split separators -----------
diff --git a/lib/std/private/strimpl.nim b/lib/std/private/strimpl.nim
new file mode 100644
index 000000000..ae752165a
--- /dev/null
+++ b/lib/std/private/strimpl.nim
@@ -0,0 +1,53 @@
+func toLowerAscii*(c: char): char {.inline.} =
+  if c in {'A'..'Z'}:
+    result = chr(ord(c) + (ord('a') - ord('A')))
+  else:
+    result = c
+
+template firstCharCaseSensitiveImpl(a, b: typed, aLen, bLen: int) =
+  if aLen == 0 or bLen == 0:
+    return aLen - bLen
+  if a[0] != b[0]: return ord(a[0]) - ord(b[0])
+
+template cmpIgnoreStyleImpl*(a, b: typed, firstCharCaseSensitive: static bool = false) =
+  # a, b are string or cstring
+  let aLen = a.len
+  let bLen = b.len
+  var i = 0
+  var j = 0
+  when firstCharCaseSensitive:
+    firstCharCaseSensitiveImpl(a, b, aLen, bLen)
+    inc i
+    inc j
+  while true:
+    while i < aLen and a[i] == '_': inc i
+    while j < bLen and b[j] == '_': inc j
+    let aa = if i < aLen: toLowerAscii(a[i]) else: '\0'
+    let bb = if j < bLen: toLowerAscii(b[j]) else: '\0'
+    result = ord(aa) - ord(bb)
+    if result != 0: return result
+    # the characters are identical:
+    if i >= aLen:
+      # both cursors at the end:
+      if j >= bLen: return 0
+      # not yet at the end of 'b':
+      return -1
+    elif j >= bLen:
+      return 1
+    inc i
+    inc j
+
+template cmpIgnoreCaseImpl*(a, b: typed, firstCharCaseSensitive: static bool = false) =
+  # a, b are string or cstring
+  let aLen = a.len
+  let bLen = b.len
+  var i = 0
+  when firstCharCaseSensitive:
+    firstCharCaseSensitiveImpl(a, b, aLen, bLen)
+    inc i
+  var m = min(aLen, bLen)
+  while i < m:
+    result = ord(toLowerAscii(a[i])) - ord(toLowerAscii(b[i]))
+    if result != 0: return
+    inc i
+  result = aLen - bLen
diff --git a/tests/js/tstdlib_various.nim b/tests/js/tstdlib_various.nim
index d19f40c39..a1bb63d46 100644
--- a/tests/js/tstdlib_various.nim
+++ b/tests/js/tstdlib_various.nim
@@ -29,7 +29,7 @@ Hi Andreas! How do you feel, Rumpf?
 """
 
 import
-  critbits, cstrutils, sets, strutils, tables, random, algorithm, ropes,
+  critbits, sets, strutils, tables, random, algorithm, ropes,
   lists, htmlgen, xmltree, strtabs
 
 
@@ -177,18 +177,3 @@ block txmltree:
     ])
   ])
   doAssert(y.innerText == "foobar")
-
-
-
-block tcstrutils:
-  let s = cstring "abcdef"
-  doAssert s.startsWith("a")
-  doAssert not s.startsWith("b")
-  doAssert s.endsWith("f")
-  doAssert not s.endsWith("a")
-
-  let a = cstring "abracadabra"
-  doAssert a.startsWith("abra")
-  doAssert not a.startsWith("bra")
-  doAssert a.endsWith("abra")
-  doAssert not a.endsWith("dab")
diff --git a/tests/stdlib/tcstrutils.nim b/tests/stdlib/tcstrutils.nim
new file mode 100644
index 000000000..1daf32aa5
--- /dev/null
+++ b/tests/stdlib/tcstrutils.nim
@@ -0,0 +1,30 @@
+discard """
+  targets: "c cpp js"
+"""
+
+import cstrutils
+
+
+block tcstrutils:
+  let s = cstring "abcdef"
+  doAssert s.startsWith("a")
+  doAssert not s.startsWith("b")
+  doAssert s.endsWith("f")
+  doAssert not s.endsWith("a")
+
+  let a = cstring "abracadabra"
+  doAssert a.startsWith("abra")
+  doAssert not a.startsWith("bra")
+  doAssert a.endsWith("abra")
+  doAssert not a.endsWith("dab")
+
+  doAssert cmpIgnoreCase(cstring "FooBar", "foobar") == 0
+  doAssert cmpIgnoreCase(cstring "bar", "Foo") < 0
+  doAssert cmpIgnoreCase(cstring "Foo5", "foo4") > 0
+
+  doAssert cmpIgnoreStyle(cstring "foo_bar", "FooBar") == 0
+  doAssert cmpIgnoreStyle(cstring "foo_bar_5", "FooBar4") > 0
+
+  doAssert cmpIgnoreCase(cstring "", cstring "") == 0
+  doAssert cmpIgnoreCase(cstring "", cstring "Hello") < 0
+  doAssert cmpIgnoreCase(cstring "wind", cstring "") > 0
diff --git a/tests/stdlib/tstdlib_various.nim b/tests/stdlib/tstdlib_various.nim
index cddd43f6e..b153fd2ba 100644
--- a/tests/stdlib/tstdlib_various.nim
+++ b/tests/stdlib/tstdlib_various.nim
@@ -38,7 +38,7 @@ true
 """
 
 import
-  critbits, cstrutils, sets, strutils, tables, random, algorithm, re, ropes,
+  critbits, sets, strutils, tables, random, algorithm, re, ropes,
   segfaults, lists, parsesql, streams, os, htmlgen, xmltree, strtabs
 
 
@@ -245,24 +245,3 @@ block txmltree:
     ])
   ])
   doAssert(y.innerText == "foobar")
-
-
-block tcstrutils:
-  let s = cstring "abcdef"
-  doAssert s.startsWith("a")
-  doAssert not s.startsWith("b")
-  doAssert s.endsWith("f")
-  doAssert not s.endsWith("a")
-
-  let a = cstring "abracadabra"
-  doAssert a.startsWith("abra")
-  doAssert not a.startsWith("bra")
-  doAssert a.endsWith("abra")
-  doAssert not a.endsWith("dab")
-
-  doAssert cmpIgnoreCase(cstring "FooBar", "foobar") == 0
-  doAssert cmpIgnoreCase(cstring "bar", "Foo") < 0
-  doAssert cmpIgnoreCase(cstring "Foo5", "foo4") > 0
-
-  doAssert cmpIgnoreStyle(cstring "foo_bar", "FooBar") == 0
-  doAssert cmpIgnoreStyle(cstring "foo_bar_5", "FooBar4") > 0
pre>104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158