diff options
author | flywind <43030857+xflywind@users.noreply.github.com> | 2020-12-31 04:54:40 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-12-31 11:54:40 +0100 |
commit | 5fb56a3b2c83b62d72c72a9d56ef1333671bc2b6 (patch) | |
tree | e32cffb18e3180a81f8179130cc99800d6e505df | |
parent | 5984f7a7dda5e6fb3119cd5705d5758e1b8f3fc7 (diff) | |
download | Nim-5fb56a3b2c83b62d72c72a9d56ef1333671bc2b6.tar.gz |
refactor cmpIgnoreStyle and cmpIgnoreCase (#16399)
* init * support strutils * more * better * Call len once per string/cstring * Change var to let * Compare ternary on first char * More appropriate param name * fix * better * one test * impl * more efficient * minor Co-authored-by: Clyybber <darkmine956@gmail.com>
-rw-r--r-- | lib/core/macros.nim | 19 | ||||
-rw-r--r-- | lib/core/typeinfo.nim | 26 | ||||
-rw-r--r-- | lib/pure/cstrutils.nim | 90 | ||||
-rw-r--r-- | lib/pure/strutils.nim | 30 | ||||
-rw-r--r-- | lib/std/private/strimpl.nim | 53 | ||||
-rw-r--r-- | tests/js/tstdlib_various.nim | 17 | ||||
-rw-r--r-- | tests/stdlib/tcstrutils.nim | 30 | ||||
-rw-r--r-- | tests/stdlib/tstdlib_various.nim | 23 |
8 files changed, 141 insertions, 147 deletions
diff --git a/lib/core/macros.nim b/lib/core/macros.nim index 97c3a46c5..748464061 100644 --- a/lib/core/macros.nim +++ b/lib/core/macros.nim @@ -1389,25 +1389,10 @@ when defined(nimVmEqIdent): ## these nodes will be unwrapped. else: + from std/private/strimpl import cmpIgnoreStyleImpl # this procedure is optimized for native code, it should not be compiled to nimVM bytecode. proc cmpIgnoreStyle(a, b: cstring): int {.noSideEffect.} = - proc toLower(c: char): char {.inline.} = - if c in {'A'..'Z'}: result = chr(ord(c) + (ord('a') - ord('A'))) - else: result = c - var i = 0 - var j = 0 - # first char is case sensitive - if a[0] != b[0]: return 1 - while true: - while a[i] == '_': inc(i) - while b[j] == '_': inc(j) # BUGFIX: typo - var aa = toLower(a[i]) - var bb = toLower(b[j]) - result = ord(aa) - ord(bb) - if result != 0 or aa == '\0': break - inc(i) - inc(j) - + cmpIgnoreStyleImpl(a, b, true) proc eqIdent*(a, b: string): bool = cmpIgnoreStyle(a, b) == 0 ## Check if two idents are equal. diff --git a/lib/core/typeinfo.nim b/lib/core/typeinfo.nim index c15f6dc1f..9b7e32466 100644 --- a/lib/core/typeinfo.nim +++ b/lib/core/typeinfo.nim @@ -91,6 +91,8 @@ when not defined(gcDestructors): else: include system/seqs_v2_reimpl +from std/private/strimpl import cmpIgnoreStyleImpl + when not defined(js): template rawType(x: Any): PNimType = cast[PNimType](x.rawTypePtr) @@ -366,36 +368,22 @@ iterator fields*(x: Any): tuple[name: string, any: Any] = for name, any in items(ret): yield ($name, any) -proc cmpIgnoreStyle(a, b: cstring): int {.noSideEffect.} = - proc toLower(c: char): char {.inline.} = - if c in {'A'..'Z'}: result = chr(ord(c) + (ord('a') - ord('A'))) - else: result = c - var i = 0 - var j = 0 - if a[0] != b[0]: return 1 - while true: - while a[i] == '_': inc(i) - while b[j] == '_': inc(j) # BUGFIX: typo - var aa = toLower(a[i]) - var bb = toLower(b[j]) - result = ord(aa) - ord(bb) - if result != 0 or aa == '\0': break - inc(i) - inc(j) +proc cmpNimIdentifier(a, b: cstring): int {.noSideEffect.} = + cmpIgnoreStyleImpl(a, b, true) proc getFieldNode(p: pointer, n: ptr TNimNode, name: cstring): ptr TNimNode = case n.kind of nkNone: assert(false) of nkSlot: - if cmpIgnoreStyle(n.name, name) == 0: + if cmpNimIdentifier(n.name, name) == 0: result = n of nkList: for i in 0..n.len-1: result = getFieldNode(p, n.sons[i], name) if result != nil: break of nkCase: - if cmpIgnoreStyle(n.name, name) == 0: + if cmpNimIdentifier(n.name, name) == 0: result = n else: var m = selectBranch(p, n) @@ -599,7 +587,7 @@ proc getEnumOrdinal*(x: Any, name: string): int = var n = typ.node var s = n.sons for i in 0 .. n.len-1: - if cmpIgnoreStyle($s[i].name, name) == 0: + if cmpNimIdentifier($s[i].name, name) == 0: if ntfEnumHole notin typ.flags: return i else: diff --git a/lib/pure/cstrutils.nim b/lib/pure/cstrutils.nim index 601508e2e..a95c13fb5 100644 --- a/lib/pure/cstrutils.nim +++ b/lib/pure/cstrutils.nim @@ -12,12 +12,8 @@ ## save allocations. include "system/inclrtl" +import std/private/strimpl -proc toLowerAscii(c: char): char {.inline.} = - if c in {'A'..'Z'}: - result = chr(ord(c) + (ord('a') - ord('A'))) - else: - result = c when defined(js): proc startsWith*(s, prefix: cstring): bool {.noSideEffect, @@ -25,7 +21,13 @@ when defined(js): proc endsWith*(s, suffix: cstring): bool {.noSideEffect, importjs: "#.endsWith(#)".} - + + proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect.} = + cmpIgnoreStyleImpl(a, b) + + proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect.} = + cmpIgnoreCaseImpl(a, b) + # JS string has more operations that might warrant its own module: # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String else: @@ -57,45 +59,39 @@ else: inc(i) if suffix[i] == '\0': return true -proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect, - rtl, extern: "csuCmpIgnoreStyle".} = - ## Semantically the same as ``cmp(normalize($a), normalize($b))``. It - ## is just optimized to not allocate temporary strings. This should - ## NOT be used to compare Nim identifier names. use `macros.eqIdent` - ## for that. Returns: - ## - ## | 0 if a == b - ## | < 0 if a < b - ## | > 0 if a > b - ## - ## Not supported for JS backend, use `strutils.cmpIgnoreStyle - ## <strutils.html#cmpIgnoreStyle%2Cstring%2Cstring>`_ instead. - var i = 0 - var j = 0 - while true: - while a[i] == '_': inc(i) - while b[j] == '_': inc(j) # BUGFIX: typo - var aa = toLowerAscii(a[i]) - var bb = toLowerAscii(b[j]) - result = ord(aa) - ord(bb) - if result != 0 or aa == '\0': break - inc(i) - inc(j) + proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect, + rtl, extern: "csuCmpIgnoreStyle".} = + ## Semantically the same as ``cmp(normalize($a), normalize($b))``. It + ## is just optimized to not allocate temporary strings. This should + ## NOT be used to compare Nim identifier names. use `macros.eqIdent` + ## for that. Returns: + ## + ## | 0 if a == b + ## | < 0 if a < b + ## | > 0 if a > b + var i = 0 + var j = 0 + while true: + while a[i] == '_': inc(i) + while b[j] == '_': inc(j) # BUGFIX: typo + var aa = toLowerAscii(a[i]) + var bb = toLowerAscii(b[j]) + result = ord(aa) - ord(bb) + if result != 0 or aa == '\0': break + inc(i) + inc(j) -proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect, - rtl, extern: "csuCmpIgnoreCase".} = - ## Compares two strings in a case insensitive manner. Returns: - ## - ## | 0 if a == b - ## | < 0 if a < b - ## | > 0 if a > b - ## - ## Not supported for JS backend, use `strutils.cmpIgnoreCase - ## <strutils.html#cmpIgnoreCase%2Cstring%2Cstring>`_ instead. - var i = 0 - while true: - var aa = toLowerAscii(a[i]) - var bb = toLowerAscii(b[i]) - result = ord(aa) - ord(bb) - if result != 0 or aa == '\0': break - inc(i) + proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect, + rtl, extern: "csuCmpIgnoreCase".} = + ## Compares two strings in a case insensitive manner. Returns: + ## + ## | 0 if a == b + ## | < 0 if a < b + ## | > 0 if a > b + var i = 0 + while true: + var aa = toLowerAscii(a[i]) + var bb = toLowerAscii(b[i]) + result = ord(aa) - ord(bb) + if result != 0 or aa == '\0': break + inc(i) diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index 0028ac4fb..b1418a3ec 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -81,6 +81,8 @@ when defined(nimVmExportFixed): include "system/inclrtl" import std/private/since +from std/private/strimpl import cmpIgnoreStyleImpl, cmpIgnoreCaseImpl + const Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'} @@ -319,13 +321,7 @@ func cmpIgnoreCase*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreCase".} = doAssert cmpIgnoreCase("FooBar", "foobar") == 0 doAssert cmpIgnoreCase("bar", "Foo") < 0 doAssert cmpIgnoreCase("Foo5", "foo4") > 0 - var i = 0 - var m = min(a.len, b.len) - while i < m: - result = ord(toLowerAscii(a[i])) - ord(toLowerAscii(b[i])) - if result != 0: return - inc(i) - result = a.len - b.len + cmpIgnoreCaseImpl(a, b) {.push checks: off, line_trace: off.} # this is a hot-spot in the compiler! # thus we compile without checks here @@ -344,25 +340,7 @@ func cmpIgnoreStyle*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreStyle".} = runnableExamples: doAssert cmpIgnoreStyle("foo_bar", "FooBar") == 0 doAssert cmpIgnoreStyle("foo_bar_5", "FooBar4") > 0 - var i = 0 - var j = 0 - while true: - while i < a.len and a[i] == '_': inc i - while j < b.len and b[j] == '_': inc j - var aa = if i < a.len: toLowerAscii(a[i]) else: '\0' - var bb = if j < b.len: toLowerAscii(b[j]) else: '\0' - result = ord(aa) - ord(bb) - if result != 0: return result - # the characters are identical: - if i >= a.len: - # both cursors at the end: - if j >= b.len: return 0 - # not yet at the end of 'b': - return -1 - elif j >= b.len: - return 1 - inc i - inc j + cmpIgnoreStyleImpl(a, b) {.pop.} # --------- Private templates for different split separators ----------- diff --git a/lib/std/private/strimpl.nim b/lib/std/private/strimpl.nim new file mode 100644 index 000000000..ae752165a --- /dev/null +++ b/lib/std/private/strimpl.nim @@ -0,0 +1,53 @@ +func toLowerAscii*(c: char): char {.inline.} = + if c in {'A'..'Z'}: + result = chr(ord(c) + (ord('a') - ord('A'))) + else: + result = c + +template firstCharCaseSensitiveImpl(a, b: typed, aLen, bLen: int) = + if aLen == 0 or bLen == 0: + return aLen - bLen + if a[0] != b[0]: return ord(a[0]) - ord(b[0]) + +template cmpIgnoreStyleImpl*(a, b: typed, firstCharCaseSensitive: static bool = false) = + # a, b are string or cstring + let aLen = a.len + let bLen = b.len + var i = 0 + var j = 0 + when firstCharCaseSensitive: + firstCharCaseSensitiveImpl(a, b, aLen, bLen) + inc i + inc j + while true: + while i < aLen and a[i] == '_': inc i + while j < bLen and b[j] == '_': inc j + let aa = if i < aLen: toLowerAscii(a[i]) else: '\0' + let bb = if j < bLen: toLowerAscii(b[j]) else: '\0' + result = ord(aa) - ord(bb) + if result != 0: return result + # the characters are identical: + if i >= aLen: + # both cursors at the end: + if j >= bLen: return 0 + # not yet at the end of 'b': + return -1 + elif j >= bLen: + return 1 + inc i + inc j + +template cmpIgnoreCaseImpl*(a, b: typed, firstCharCaseSensitive: static bool = false) = + # a, b are string or cstring + let aLen = a.len + let bLen = b.len + var i = 0 + when firstCharCaseSensitive: + firstCharCaseSensitiveImpl(a, b, aLen, bLen) + inc i + var m = min(aLen, bLen) + while i < m: + result = ord(toLowerAscii(a[i])) - ord(toLowerAscii(b[i])) + if result != 0: return + inc i + result = aLen - bLen diff --git a/tests/js/tstdlib_various.nim b/tests/js/tstdlib_various.nim index d19f40c39..a1bb63d46 100644 --- a/tests/js/tstdlib_various.nim +++ b/tests/js/tstdlib_various.nim @@ -29,7 +29,7 @@ Hi Andreas! How do you feel, Rumpf? """ import - critbits, cstrutils, sets, strutils, tables, random, algorithm, ropes, + critbits, sets, strutils, tables, random, algorithm, ropes, lists, htmlgen, xmltree, strtabs @@ -177,18 +177,3 @@ block txmltree: ]) ]) doAssert(y.innerText == "foobar") - - - -block tcstrutils: - let s = cstring "abcdef" - doAssert s.startsWith("a") - doAssert not s.startsWith("b") - doAssert s.endsWith("f") - doAssert not s.endsWith("a") - - let a = cstring "abracadabra" - doAssert a.startsWith("abra") - doAssert not a.startsWith("bra") - doAssert a.endsWith("abra") - doAssert not a.endsWith("dab") diff --git a/tests/stdlib/tcstrutils.nim b/tests/stdlib/tcstrutils.nim new file mode 100644 index 000000000..1daf32aa5 --- /dev/null +++ b/tests/stdlib/tcstrutils.nim @@ -0,0 +1,30 @@ +discard """ + targets: "c cpp js" +""" + +import cstrutils + + +block tcstrutils: + let s = cstring "abcdef" + doAssert s.startsWith("a") + doAssert not s.startsWith("b") + doAssert s.endsWith("f") + doAssert not s.endsWith("a") + + let a = cstring "abracadabra" + doAssert a.startsWith("abra") + doAssert not a.startsWith("bra") + doAssert a.endsWith("abra") + doAssert not a.endsWith("dab") + + doAssert cmpIgnoreCase(cstring "FooBar", "foobar") == 0 + doAssert cmpIgnoreCase(cstring "bar", "Foo") < 0 + doAssert cmpIgnoreCase(cstring "Foo5", "foo4") > 0 + + doAssert cmpIgnoreStyle(cstring "foo_bar", "FooBar") == 0 + doAssert cmpIgnoreStyle(cstring "foo_bar_5", "FooBar4") > 0 + + doAssert cmpIgnoreCase(cstring "", cstring "") == 0 + doAssert cmpIgnoreCase(cstring "", cstring "Hello") < 0 + doAssert cmpIgnoreCase(cstring "wind", cstring "") > 0 diff --git a/tests/stdlib/tstdlib_various.nim b/tests/stdlib/tstdlib_various.nim index cddd43f6e..b153fd2ba 100644 --- a/tests/stdlib/tstdlib_various.nim +++ b/tests/stdlib/tstdlib_various.nim @@ -38,7 +38,7 @@ true """ import - critbits, cstrutils, sets, strutils, tables, random, algorithm, re, ropes, + critbits, sets, strutils, tables, random, algorithm, re, ropes, segfaults, lists, parsesql, streams, os, htmlgen, xmltree, strtabs @@ -245,24 +245,3 @@ block txmltree: ]) ]) doAssert(y.innerText == "foobar") - - -block tcstrutils: - let s = cstring "abcdef" - doAssert s.startsWith("a") - doAssert not s.startsWith("b") - doAssert s.endsWith("f") - doAssert not s.endsWith("a") - - let a = cstring "abracadabra" - doAssert a.startsWith("abra") - doAssert not a.startsWith("bra") - doAssert a.endsWith("abra") - doAssert not a.endsWith("dab") - - doAssert cmpIgnoreCase(cstring "FooBar", "foobar") == 0 - doAssert cmpIgnoreCase(cstring "bar", "Foo") < 0 - doAssert cmpIgnoreCase(cstring "Foo5", "foo4") > 0 - - doAssert cmpIgnoreStyle(cstring "foo_bar", "FooBar") == 0 - doAssert cmpIgnoreStyle(cstring "foo_bar_5", "FooBar4") > 0 |