strutils: code cleanups and do not rely on the terminating binary zero anymore

author: Andreas Rumpf <rumpf_a@web.de> 2018-04-28 21:59:43 +0200
committer: Andreas Rumpf <rumpf_a@web.de> 2018-04-28 21:59:43 +0200
commit: 9b8603adcd7ce8fa9563137bcbe6c350e07a6fe0 (patch)
tree: d349c873a1e2c157698da659f28f6306b47ff74c
parent: 9d77f61038ddabeb4b59847d709e6d722e743082 (diff)
download: Nim-9b8603adcd7ce8fa9563137bcbe6c350e07a6fe0.tar.gz
2 files changed, 101 insertions, 297 deletions
diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim
index 562d6d165..eb67db2b3 100644
--- a/compiler/ccgexprs.nim
+++ b/compiler/ccgexprs.nim
@@ -63,6 +63,10 @@ proc genLiteral(p: BProc, n: PNode, ty: PType): Rope =
     of tyNil:
       result = genNilStringLiteral(p.module, n.info)
     of tyString:
+      # with the new semantics for 'nil' strings, we can map "" to nil and
+      # save tons of allocations:
+      #if n.strVal.len == 0: result = genNilStringLiteral(p.module, n.info)
+      #else:
       result = genStringLiteral(p.module, n)
     else:
       if n.strVal.isNil: result = rope("NIM_NIL")
@@ -905,14 +909,9 @@ proc genSeqElem(p: BProc, n, x, y: PNode, d: var TLoc) =
   if ty.kind in {tyRef, tyPtr}:
     ty = skipTypes(ty.lastSon, abstractVarRange) # emit range check:
   if optBoundsCheck in p.options:
-    if ty.kind == tyString:
-      linefmt(p, cpsStmts,
-           "if (!$2 || (NU)($1) > (NU)($2->$3)) #raiseIndexError();$n",
-           rdLoc(b), rdLoc(a), lenField(p))
-    else:
-      linefmt(p, cpsStmts,
-           "if (!$2 || (NU)($1) >= (NU)($2->$3)) #raiseIndexError();$n",
-           rdLoc(b), rdLoc(a), lenField(p))
+    linefmt(p, cpsStmts,
+          "if (!$2 || (NU)($1) >= (NU)($2->$3)) #raiseIndexError();$n",
+          rdLoc(b), rdLoc(a), lenField(p))
   if d.k == locNone: d.storage = OnHeap
   if skipTypes(a.t, abstractVar).kind in {tyRef, tyPtr}:
     a.r = rfmt(nil, "(*$1)", a.r)
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index cdc5ec4f9..4500a163f 100644
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -106,6 +106,12 @@ proc isUpperAscii*(c: char): bool {.noSideEffect, procvar,
   ## This checks ASCII characters only.
   return c in {'A'..'Z'}
 
+template isImpl(call) =
+  if s.len == 0: return false
+  result = true
+  for c in s:
+    if not call(c): return false
+
 proc isAlphaAscii*(s: string): bool {.noSideEffect, procvar,
   rtl, extern: "nsuIsAlphaAsciiStr".} =
   ## Checks whether or not `s` is alphabetical.
@@ -114,12 +120,7 @@ proc isAlphaAscii*(s: string): bool {.noSideEffect, procvar,
   ## Returns true if all characters in `s` are
   ## alphabetic and there is at least one character
   ## in `s`.
-  if s.len() == 0:
-    return false
-
-  result = true
-  for c in s:
-    if not c.isAlphaAscii(): return false
+  isImpl isAlphaAscii
 
 proc isAlphaNumeric*(s: string): bool {.noSideEffect, procvar,
   rtl, extern: "nsuIsAlphaNumericStr".} =
@@ -129,13 +130,7 @@ proc isAlphaNumeric*(s: string): bool {.noSideEffect, procvar,
   ## Returns true if all characters in `s` are
   ## alpanumeric and there is at least one character
   ## in `s`.
-  if s.len() == 0:
-    return false
-
-  result = true
-  for c in s:
-    if not c.isAlphaNumeric():
-      return false
+  isImpl isAlphaNumeric
 
 proc isDigit*(s: string): bool {.noSideEffect, procvar,
   rtl, extern: "nsuIsDigitStr".} =
@@ -145,13 +140,7 @@ proc isDigit*(s: string): bool {.noSideEffect, procvar,
   ## Returns true if all characters in `s` are
   ## numeric and there is at least one character
   ## in `s`.
-  if s.len() == 0:
-    return false
-
-  result = true
-  for c in s:
-    if not c.isDigit():
-      return false
+  isImpl isDigit
 
 proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar,
   rtl, extern: "nsuIsSpaceAsciiStr".} =
@@ -159,13 +148,7 @@ proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar,
   ##
   ## Returns true if all characters in `s` are whitespace
   ## characters and there is at least one character in `s`.
-  if s.len() == 0:
-    return false
-
-  result = true
-  for c in s:
-    if not c.isSpaceAscii():
-      return false
+  isImpl isSpaceAscii
 
 proc isLowerAscii*(s: string): bool {.noSideEffect, procvar,
   rtl, extern: "nsuIsLowerAsciiStr".} =
@@ -174,13 +157,7 @@ proc isLowerAscii*(s: string): bool {.noSideEffect, procvar,
   ## This checks ASCII characters only.
   ## Returns true if all characters in `s` are lower case
   ## and there is at least one character  in `s`.
-  if s.len() == 0:
-    return false
-
-  for c in s:
-    if not c.isLowerAscii():
-      return false
-  true
+  isImpl isLowerAscii
 
 proc isUpperAscii*(s: string): bool {.noSideEffect, procvar,
   rtl, extern: "nsuIsUpperAsciiStr".} =
@@ -189,13 +166,7 @@ proc isUpperAscii*(s: string): bool {.noSideEffect, procvar,
   ## This checks ASCII characters only.
   ## Returns true if all characters in `s` are upper case
   ## and there is at least one character in `s`.
-  if s.len() == 0:
-    return false
-
-  for c in s:
-    if not c.isUpperAscii():
-      return false
-  true
+  isImpl isUpperAscii
 
 proc toLowerAscii*(c: char): char {.noSideEffect, procvar,
   rtl, extern: "nsuToLowerAsciiChar".} =
@@ -209,6 +180,11 @@ proc toLowerAscii*(c: char): char {.noSideEffect, procvar,
   else:
     result = c
 
+template toImpl(call) =
+  result = newString(len(s))
+  for i in 0..len(s) - 1:
+    result[i] = call(s[i])
+
 proc toLowerAscii*(s: string): string {.noSideEffect, procvar,
   rtl, extern: "nsuToLowerAsciiStr".} =
   ## Converts `s` into lower case.
@@ -216,9 +192,7 @@ proc toLowerAscii*(s: string): string {.noSideEffect, procvar,
   ## This works only for the letters ``A-Z``. See `unicode.toLower
   ## <unicode.html#toLower>`_ for a version that works for any Unicode
   ## character.
-  result = newString(len(s))
-  for i in 0..len(s) - 1:
-    result[i] = toLowerAscii(s[i])
+  toImpl toLowerAscii
 
 proc toUpperAscii*(c: char): char {.noSideEffect, procvar,
   rtl, extern: "nsuToUpperAsciiChar".} =
@@ -239,147 +213,15 @@ proc toUpperAscii*(s: string): string {.noSideEffect, procvar,
   ## This works only for the letters ``A-Z``.  See `unicode.toUpper
   ## <unicode.html#toUpper>`_ for a version that works for any Unicode
   ## character.
-  result = newString(len(s))
-  for i in 0..len(s) - 1:
-    result[i] = toUpperAscii(s[i])
+  toImpl toUpperAscii
 
 proc capitalizeAscii*(s: string): string {.noSideEffect, procvar,
   rtl, extern: "nsuCapitalizeAscii".} =
   ## Converts the first character of `s` into upper case.
   ##
   ## This works only for the letters ``A-Z``.
-  result = toUpperAscii(s[0]) & substr(s, 1)
-
-proc isSpace*(c: char): bool {.noSideEffect, procvar,
-  rtl, deprecated, extern: "nsuIsSpaceChar".}=
-  ## Checks whether or not `c` is a whitespace character.
-  ##
-  ## **Deprecated since version 0.15.0**: use ``isSpaceAscii`` instead.
-  isSpaceAscii(c)
-
-proc isLower*(c: char): bool {.noSideEffect, procvar,
-  rtl, deprecated, extern: "nsuIsLowerChar".}=
-  ## Checks whether or not `c` is a lower case character.
-  ##
-  ## This checks ASCII characters only.
-  ##
-  ## **Deprecated since version 0.15.0**: use ``isLowerAscii`` instead.
-  isLowerAscii(c)
-
-proc isUpper*(c: char): bool {.noSideEffect, procvar,
-  rtl, deprecated, extern: "nsuIsUpperChar".}=
-  ## Checks whether or not `c` is an upper case character.
-  ##
-  ## This checks ASCII characters only.
-  ##
-  ## **Deprecated since version 0.15.0**: use ``isUpperAscii`` instead.
-  isUpperAscii(c)
-
-proc isAlpha*(c: char): bool {.noSideEffect, procvar,
-  rtl, deprecated, extern: "nsuIsAlphaChar".}=
-  ## Checks whether or not `c` is alphabetical.
-  ##
-  ## This checks a-z, A-Z ASCII characters only.
-  ##
-  ## **Deprecated since version 0.15.0**: use ``isAlphaAscii`` instead.
-  isAlphaAscii(c)
-
-proc isAlpha*(s: string): bool {.noSideEffect, procvar,
-  rtl, deprecated, extern: "nsuIsAlphaStr".}=
-  ## Checks whether or not `s` is alphabetical.
-  ##
-  ## This checks a-z, A-Z ASCII characters only.
-  ## Returns true if all characters in `s` are
-  ## alphabetic and there is at least one character
-  ## in `s`.
-  ##
-  ## **Deprecated since version 0.15.0**: use ``isAlphaAscii`` instead.
-  isAlphaAscii(s)
-
-proc isSpace*(s: string): bool {.noSideEffect, procvar,
-  rtl, deprecated, extern: "nsuIsSpaceStr".}=
-  ## Checks whether or not `s` is completely whitespace.
-  ##
-  ## Returns true if all characters in `s` are whitespace
-  ## characters and there is at least one character in `s`.
-  ##
-  ## **Deprecated since version 0.15.0**: use ``isSpaceAscii`` instead.
-  isSpaceAscii(s)
-
-proc isLower*(s: string): bool {.noSideEffect, procvar,
-  rtl, deprecated, extern: "nsuIsLowerStr".}=
-  ## Checks whether or not `s` contains all lower case characters.
-  ##
-  ## This checks ASCII characters only.
-  ## Returns true if all characters in `s` are lower case
-  ## and there is at least one character  in `s`.
-  ##
-  ## **Deprecated since version 0.15.0**: use ``isLowerAscii`` instead.
-  isLowerAscii(s)
-
-proc isUpper*(s: string): bool {.noSideEffect, procvar,
-  rtl, deprecated, extern: "nsuIsUpperStr".}=
-  ## Checks whether or not `s` contains all upper case characters.
-  ##
-  ## This checks ASCII characters only.
-  ## Returns true if all characters in `s` are upper case
-  ## and there is at least one character in `s`.
-  ##
-  ## **Deprecated since version 0.15.0**: use ``isUpperAscii`` instead.
-  isUpperAscii(s)
-
-proc toLower*(c: char): char {.noSideEffect, procvar,
-  rtl, deprecated, extern: "nsuToLowerChar".} =
-  ## Converts `c` into lower case.
-  ##
-  ## This works only for the letters ``A-Z``. See `unicode.toLower
-  ## <unicode.html#toLower>`_ for a version that works for any Unicode
-  ## character.
-  ##
-  ## **Deprecated since version 0.15.0**: use ``toLowerAscii`` instead.
-  toLowerAscii(c)
-
-proc toLower*(s: string): string {.noSideEffect, procvar,
-  rtl, deprecated, extern: "nsuToLowerStr".} =
-  ## Converts `s` into lower case.
-  ##
-  ## This works only for the letters ``A-Z``. See `unicode.toLower
-  ## <unicode.html#toLower>`_ for a version that works for any Unicode
-  ## character.
-  ##
-  ## **Deprecated since version 0.15.0**: use ``toLowerAscii`` instead.
-  toLowerAscii(s)
-
-proc toUpper*(c: char): char {.noSideEffect, procvar,
-  rtl, deprecated, extern: "nsuToUpperChar".} =
-  ## Converts `c` into upper case.
-  ##
-  ## This works only for the letters ``A-Z``.  See `unicode.toUpper
-  ## <unicode.html#toUpper>`_ for a version that works for any Unicode
-  ## character.
-  ##
-  ## **Deprecated since version 0.15.0**: use ``toUpperAscii`` instead.
-  toUpperAscii(c)
-
-proc toUpper*(s: string): string {.noSideEffect, procvar,
-  rtl, deprecated, extern: "nsuToUpperStr".} =
-  ## Converts `s` into upper case.
-  ##
-  ## This works only for the letters ``A-Z``.  See `unicode.toUpper
-  ## <unicode.html#toUpper>`_ for a version that works for any Unicode
-  ## character.
-  ##
-  ## **Deprecated since version 0.15.0**: use ``toUpperAscii`` instead.
-  toUpperAscii(s)
-
-proc capitalize*(s: string): string {.noSideEffect, procvar,
-  rtl, deprecated, extern: "nsuCapitalize".} =
-  ## Converts the first character of `s` into upper case.
-  ##
-  ## This works only for the letters ``A-Z``.
-  ##
-  ## **Deprecated since version 0.15.0**: use ``capitalizeAscii`` instead.
-  capitalizeAscii(s)
+  if s.len == 0: result = ""
+  else: result = toUpperAscii(s[0]) & substr(s, 1)
 
 proc normalize*(s: string): string {.noSideEffect, procvar,
   rtl, extern: "nsuNormalize".} =
@@ -419,9 +261,9 @@ proc cmpIgnoreCase*(a, b: string): int {.noSideEffect,
 proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect,
   rtl, extern: "nsuCmpIgnoreStyle", procvar.} =
   ## Semantically the same as ``cmp(normalize(a), normalize(b))``. It
-  ## is just optimized to not allocate temporary strings.  This should
+  ## is just optimized to not allocate temporary strings. This should
   ## NOT be used to compare Nim identifier names. use `macros.eqIdent`
-  ## for that.  Returns:
+  ## for that. Returns:
   ##
   ## | 0 iff a == b
   ## | < 0 iff a < b
@@ -429,14 +271,22 @@ proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect,
   var i = 0
   var j = 0
   while true:
-    while a[i] == '_': inc(i)
-    while b[j] == '_': inc(j) # BUGFIX: typo
-    var aa = toLowerAscii(a[i])
-    var bb = toLowerAscii(b[j])
+    while i < a.len and a[i] == '_': inc i
+    while j < b.len and b[j] == '_': inc j
+    var aa = if i < a.len: toLowerAscii(a[i]) else: '\0'
+    var bb = if j < b.len: toLowerAscii(b[j]) else: '\0'
     result = ord(aa) - ord(bb)
-    if result != 0 or aa == '\0': break
-    inc(i)
-    inc(j)
+    if result != 0: return result
+    # the characters are identical:
+    if i >= a.len:
+      # both cursors at the end:
+      if j >= b.len: return 0
+      # not yet at the end of 'b':
+      return -1
+    elif j >= b.len:
+      return 1
+    inc i
+    inc j
 
 proc strip*(s: string, leading = true, trailing = true,
             chars: set[char] = Whitespace): string
@@ -451,7 +301,7 @@ proc strip*(s: string, leading = true, trailing = true,
     first = 0
     last = len(s)-1
   if leading:
-    while s[first] in chars: inc(first)
+    while first <= last and s[first] in chars: inc(first)
   if trailing:
     while last >= 0 and s[last] in chars: dec(last)
   result = substr(s, first, last)
@@ -467,7 +317,9 @@ proc toOctal*(c: char): string {.noSideEffect, rtl, extern: "nsuToOctal".} =
     result[i] = chr(val mod 8 + ord('0'))
     val = val div 8
 
-proc isNilOrEmpty*(s: string): bool {.noSideEffect, procvar, rtl, extern: "nsuIsNilOrEmpty".} =
+proc isNilOrEmpty*(s: string): bool {.noSideEffect, procvar, rtl,
+                                      extern: "nsuIsNilOrEmpty",
+                                      deprecated: "use 'x.len == 0' instead".} =
   ## Checks if `s` is nil or empty.
   result = len(s) == 0
 
@@ -486,7 +338,6 @@ proc substrEq(s: string, pos: int, substr: string): bool =
   var length = substr.len
   while i < length and s[pos+i] == substr[i]:
     inc i
-
   return i == length
 
 # --------- Private templates for different split separators -----------
@@ -520,7 +371,7 @@ template oldSplit(s, seps, maxsplit) =
   var splits = maxsplit
   assert(not ('\0' in seps))
   while last < len(s):
-    while s[last] in seps: inc(last)
+    while last < len(s) and s[last] in seps: inc(last)
     var first = last
     while last < len(s) and s[last] notin seps: inc(last)
     if first <= last-1:
@@ -571,10 +422,7 @@ iterator split*(s: string, seps: set[char] = Whitespace,
   ##   "08"
   ##   "08.398990"
   ##
-  when defined(nimOldSplit):
-    oldSplit(s, seps, maxsplit)
-  else:
-    splitCommon(s, seps, maxsplit, 1)
+  splitCommon(s, seps, maxsplit, 1)
 
 iterator splitWhitespace*(s: string, maxsplit: int = -1): string =
   ## Splits the string ``s`` at whitespace stripping leading and trailing
@@ -660,7 +508,6 @@ iterator split*(s: string, sep: string, maxsplit: int = -1): string =
   ##   "is"
   ##   "corrupted"
   ##
-
   splitCommon(s, sep, maxsplit, sep.len)
 
 template rsplitCommon(s, sep, maxsplit, sepLen) =
@@ -670,29 +517,21 @@ template rsplitCommon(s, sep, maxsplit, sepLen) =
     first = last
     splits = maxsplit
     startPos = 0
-
   # go to -1 in order to get separators at the beginning
   while first >= -1:
     while first >= 0 and not stringHasSep(s, first, sep):
       dec(first)
-
     if splits == 0:
       # No more splits means set first to the beginning
       first = -1
-
     if first == -1:
       startPos = 0
     else:
       startPos = first + sepLen
-
     yield substr(s, startPos, last)
-
-    if splits == 0:
-      break
-
+    if splits == 0: break
     dec(splits)
     dec(first)
-
     last = first
 
 iterator rsplit*(s: string, seps: set[char] = Whitespace,
@@ -712,7 +551,6 @@ iterator rsplit*(s: string, seps: set[char] = Whitespace,
   ##   "foo"
   ##
   ## Substrings are separated from the right by the set of chars `seps`
-
   rsplitCommon(s, seps, maxsplit, 1)
 
 iterator rsplit*(s: string, sep: char,
@@ -779,14 +617,14 @@ iterator splitLines*(s: string): string =
   var first = 0
   var last = 0
   while true:
-    while s[last] notin {'\0', '\c', '\l'}: inc(last)
+    while last < s.len and s[last] notin {'\c', '\l'}: inc(last)
     yield substr(s, first, last-1)
     # skip newlines:
+    if last >= s.len: break
     if s[last] == '\l': inc(last)
     elif s[last] == '\c':
       inc(last)
-      if s[last] == '\l': inc(last)
-    else: break # was '\0'
+      if last < s.len and s[last] == '\l': inc(last)
     first = last
 
 proc splitLines*(s: string): seq[string] {.noSideEffect,
@@ -811,7 +649,7 @@ proc countLines*(s: string): int {.noSideEffect,
   while i < s.len:
     case s[i]
     of '\c':
-      if s[i+1] == '\l': inc i
+      if i+1 < s.len and s[i+1] == '\l': inc i
       inc result
     of '\l': inc result
     else: discard
@@ -1025,9 +863,9 @@ proc parseHexInt*(s: string): int {.noSideEffect, procvar,
   ## of the following optional prefixes: ``0x``, ``0X``, ``#``.  Underscores
   ## within `s` are ignored.
   var i = 0
-  if s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2)
-  elif s[i] == '#': inc(i)
-  while true:
+  if i+1 < s.len and s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2)
+  elif i < s.len and s[i] == '#': inc(i)
+  while i < s.len:
     case s[i]
     of '_': inc(i)
     of '0'..'9':
@@ -1039,7 +877,6 @@ proc parseHexInt*(s: string): int {.noSideEffect, procvar,
     of 'A'..'F':
       result = result shl 4 or (ord(s[i]) - ord('A') + 10)
       inc(i)
-    of '\0': break
     else: raise newException(ValueError, "invalid integer: " & s)
 
 proc generateHexCharToValueMap(): string =
@@ -1148,14 +985,6 @@ template spaces*(n: Natural): string = repeat(' ', n)
   ##   echo text1 & spaces(max(0, width - text1.len)) & "|"
   ##   echo text2 & spaces(max(0, width - text2.len)) & "|"
 
-proc repeatChar*(count: Natural, c: char = ' '): string {.deprecated.} =
-  ## deprecated: use repeat() or spaces()
-  repeat(c, count)
-
-proc repeatStr*(count: Natural, s: string): string {.deprecated.} =
-  ## deprecated: use repeat(string, count) or string.repeat(count)
-  repeat(s, count)
-
 proc align*(s: string, count: Natural, padding = ' '): string {.
   noSideEffect, rtl, extern: "nsuAlignString".} =
   ## Aligns a string `s` with `padding`, so that it is of length `count`.
@@ -1226,7 +1055,7 @@ iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[
   var i = 0
   while true:
     var j = i
-    var isSep = s[j] in seps
+    var isSep = j < s.len and s[j] in seps
     while j < s.len and (s[j] in seps) == isSep: inc(j)
     if j > i:
       yield (substr(s, i, j-1), isSep)
@@ -1325,13 +1154,13 @@ proc startsWith*(s, prefix: string): bool {.noSideEffect,
   ## If ``prefix == ""`` true is returned.
   var i = 0
   while true:
-    if prefix[i] == '\0': return true
-    if s[i] != prefix[i]: return false
+    if i >= prefix.len: return true
+    if i >= s.len or s[i] != prefix[i]: return false
     inc(i)
 
 proc startsWith*(s: string, prefix: char): bool {.noSideEffect, inline.} =
   ## Returns true iff ``s`` starts with ``prefix``.
-  result = s[0] == prefix
+  result = s.len > 0 and s[0] == prefix
 
 proc endsWith*(s, suffix: string): bool {.noSideEffect,
   rtl, extern: "nsuEndsWith".} =
@@ -1343,11 +1172,11 @@ proc endsWith*(s, suffix: string): bool {.noSideEffect,
   while i+j <% s.len:
     if s[i+j] != suffix[i]: return false
     inc(i)
-  if suffix[i] == '\0': return true
+  if i >= suffix.len: return true
 
 proc endsWith*(s: string, suffix: char): bool {.noSideEffect, inline.} =
   ## Returns true iff ``s`` ends with ``suffix``.
-  result = s[s.high] == suffix
+  result = s.len > 0 and s[s.high] == suffix
 
 proc continuesWith*(s, substr: string, start: Natural): bool {.noSideEffect,
   rtl, extern: "nsuContinuesWith".} =
@@ -1356,8 +1185,8 @@ proc continuesWith*(s, substr: string, start: Natural): bool {.noSideEffect,
   ## If ``substr == ""`` true is returned.
   var i = 0
   while true:
-    if substr[i] == '\0': return true
-    if s[i+start] != substr[i]: return false
+    if i >= substr.len: return true
+    if i+start >= s.len or s[i+start] != substr[i]: return false
     inc(i)
 
 proc addSep*(dest: var string, sep = ", ", startLen: Natural = 0)
@@ -1502,12 +1331,8 @@ proc find*(s, sub: string, start: Natural = 0, last: Natural = 0): int {.noSideE
   ## If `last` is unspecified, it defaults to `s.high`.
   ##
   ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
-  if sub.len > s.len:
-    return -1
-
-  if sub.len == 1:
-    return find(s, sub[0], start, last)
-
+  if sub.len > s.len: return -1
+  if sub.len == 1: return find(s, sub[0], start, last)
   var a {.noinit.}: SkipTable
   initSkipTable(a, sub)
   result = find(a, s, sub, start, last)
@@ -1564,18 +1389,14 @@ proc center*(s: string, width: int, fillChar: char = ' '): string {.
   ##
   ## The original string is returned if `width` is less than or equal
   ## to `s.len`.
-  if width <= s.len:
-    return s
-
+  if width <= s.len: return s
   result = newString(width)
-
   # Left padding will be one fillChar
   # smaller if there are an odd number
   # of characters
   let
     charsLeft = (width - s.len)
     leftPadding = charsLeft div 2
-
   for i in 0 ..< width:
     if i >= leftPadding and i < leftPadding + s.len:
       # we are where the string should be located
@@ -1593,27 +1414,22 @@ proc count*(s: string, sub: string, overlapping: bool = false): int {.
   var i = 0
   while true:
     i = s.find(sub, i)
-    if i < 0:
-      break
-    if overlapping:
-      inc i
-    else:
-      i += sub.len
+    if i < 0: break
+    if overlapping: inc i
+    else: i += sub.len
     inc result
 
 proc count*(s: string, sub: char): int {.noSideEffect,
   rtl, extern: "nsuCountChar".} =
   ## Count the occurrences of the character `sub` in the string `s`.
   for c in s:
-    if c == sub:
-      inc result
+    if c == sub: inc result
 
 proc count*(s: string, subs: set[char]): int {.noSideEffect,
   rtl, extern: "nsuCountCharSet".} =
   ## Count the occurrences of the group of character `subs` in the string `s`.
   for c in s:
-    if c in subs:
-      inc result
+    if c in subs: inc result
 
 proc quoteIfContainsWhite*(s: string): string {.deprecated.} =
   ## Returns ``'"' & s & '"'`` if `s` contains a space and does not
@@ -1621,10 +1437,8 @@ proc quoteIfContainsWhite*(s: string): string {.deprecated.} =
   ##
   ## **DEPRECATED** as it was confused for shell quoting function.  For this
   ## application use `osproc.quoteShell <osproc.html#quoteShell>`_.
-  if find(s, {' ', '\t'}) >= 0 and s[0] != '"':
-    result = '"' & s & '"'
-  else:
-    result = s
+  if find(s, {' ', '\t'}) >= 0 and s[0] != '"': result = '"' & s & '"'
+  else: result = s
 
 proc contains*(s: string, c: char): bool {.noSideEffect.} =
   ## Same as ``find(s, c) >= 0``.
@@ -1704,9 +1518,8 @@ proc multiReplace*(s: string, replacements: varargs[(string, string)]): string {
   ## Same as replace, but specialized for doing multiple replacements in a single
   ## pass through the input string.
   ##
-  ## Calling replace multiple times after each other is inefficient and result in too many allocations
-  ## follwed by immediate deallocations as portions of the string gets replaced.
-  ## multiReplace performs all replacements in a single pass.
+  ## multiReplace performs all replacements in a single pass, this means it can be used
+  ## to swap the occurences of "a" and "b", for instance.
   ##
   ## If the resulting string is not longer than the original input string, only a single
   ## memory allocation is required.
@@ -1753,14 +1566,13 @@ proc parseOctInt*(s: string): int {.noSideEffect,
   ## of the following optional prefixes: ``0o``, ``0O``.  Underscores within
   ## `s` are ignored.
   var i = 0
-  if s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2)
-  while true:
+  if i+1 < s.len and s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2)
+  while i < s.len:
     case s[i]
     of '_': inc(i)
     of '0'..'7':
       result = result shl 3 or (ord(s[i]) - ord('0'))
       inc(i)
-    of '\0': break
     else: raise newException(ValueError, "invalid integer: " & s)
 
 proc toOct*(x: BiggestInt, len: Positive): string {.noSideEffect,
@@ -1849,16 +1661,18 @@ proc unescape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect,
   ## If `s` does not begin with ``prefix`` and end with ``suffix`` a
   ## ValueError exception will be raised.
   ##
-  ## **Warning:** This procedure is deprecated because it's to easy to missuse.  
+  ## **Warning:** This procedure is deprecated because it's to easy to missuse.
   result = newStringOfCap(s.len)
   var i = prefix.len
   if not s.startsWith(prefix):
     raise newException(ValueError,
-                       "String does not start with a prefix of: " & prefix)
+                       "String does not start with: " & prefix)
   while true:
-    if i == s.len-suffix.len: break
-    case s[i]
-    of '\\':
+    if i >= s.len-suffix.len: break
+    if s[i] == '\\':
+      if i+1 >= s.len:
+        result.add('\\')
+        break
       case s[i+1]:
       of 'x':
         inc i, 2
@@ -1872,15 +1686,15 @@ proc unescape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect,
         result.add('\'')
       of '\"':
         result.add('\"')
-      else: result.add("\\" & s[i+1])
-      inc(i)
-    of '\0': break
+      else:
+        result.add("\\" & s[i+1])
+      inc(i, 2)
     else:
       result.add(s[i])
-    inc(i)
+      inc(i)
   if not s.endsWith(suffix):
     raise newException(ValueError,
-                       "String does not end with a suffix of: " & suffix)
+                       "String does not end in: " & suffix)
 
 proc validIdentifier*(s: string): bool {.noSideEffect,
   rtl, extern: "nsuValidIdentifier".} =
@@ -1890,7 +1704,7 @@ proc validIdentifier*(s: string): bool {.noSideEffect,
   ## and is followed by any number of characters of the set `IdentChars`.
   runnableExamples:
     doAssert "abc_def08".validIdentifier
-  if s[0] in IdentStartChars:
+  if s.len > 0 and s[0] in IdentStartChars:
     for i in 1..s.len-1:
       if s[i] notin IdentChars: return false
     return true
@@ -1909,7 +1723,7 @@ proc editDistance*(a, b: string): int {.noSideEffect,
 
   # strip common prefix:
   var s = 0
-  while a[s] == b[s] and a[s] != '\0':
+  while s < len1 and a[s] == b[s]:
     inc(s)
     dec(len1)
     dec(len2)
@@ -1982,8 +1796,6 @@ proc editDistance*(a, b: string): int {.noSideEffect,
       if x > c3: x = c3
       row[p] = x
   result = row[e]
-  #dealloc(row)
-
 
 # floating point formating:
 when not defined(js):
@@ -2092,7 +1904,7 @@ proc trimZeros*(x: var string) {.noSideEffect.} =
   var spl: seq[string]
   if x.contains('.') or x.contains(','):
     if x.contains('e'):
-      spl= x.split('e')
+      spl = x.split('e')
       x = spl[0]
     while x[x.high] == '0':
       x.setLen(x.len-1)
@@ -2310,9 +2122,8 @@ proc addf*(s: var string, formatstr: string, a: varargs[string, `$`]) {.
   var i = 0
   var num = 0
   while i < len(formatstr):
-    if formatstr[i] == '$':
-      case formatstr[i+1] # again we use the fact that strings
-                          # are zero-terminated here
+    if formatstr[i] == '$' and i+1 < len(formatstr):
+      case formatstr[i+1]
       of '#':
         if num > a.high: invalidFormatString()
         add s, a[num]
@@ -2326,7 +2137,7 @@ proc addf*(s: var string, formatstr: string, a: varargs[string, `$`]) {.
         inc(i) # skip $
         var negative = formatstr[i] == '-'
         if negative: inc i
-        while formatstr[i] in Digits:
+        while i < formatstr.len and formatstr[i] in Digits:
           j = j * 10 + ord(formatstr[i]) - ord('0')
           inc(i)
         let idx = if not negative: j-1 else: a.len-j
@@ -2338,7 +2149,7 @@ proc addf*(s: var string, formatstr: string, a: varargs[string, `$`]) {.
         var negative = formatstr[j] == '-'
         if negative: inc j
         var isNumber = 0
-        while formatstr[j] notin {'\0', '}'}:
+        while j < formatstr.len and formatstr[j] notin {'\0', '}'}:
           if formatstr[j] in Digits:
             k = k * 10 + ord(formatstr[j]) - ord('0')
             if isNumber == 0: isNumber = 1
@@ -2356,7 +2167,7 @@ proc addf*(s: var string, formatstr: string, a: varargs[string, `$`]) {.
         i = j+1
       of 'a'..'z', 'A'..'Z', '\128'..'\255', '_':
         var j = i+1
-        while formatstr[j] in PatternChars: inc(j)
+        while j < formatstr.len and formatstr[j] in PatternChars: inc(j)
         var x = findNormalized(substr(formatstr, i+1, j-1), a)
         if x >= 0 and x < high(a): add s, a[x+1]
         else: invalidFormatString()
@@ -2628,13 +2439,7 @@ when isMainModule:
   doAssert isSpaceAscii("       ")
   doAssert(not isSpaceAscii("ABc   \td"))
 
-  doAssert(isNilOrEmpty(""))
-  doAssert(isNilOrEmpty(nil))
-  doAssert(not isNilOrEmpty("test"))
-  doAssert(not isNilOrEmpty(" "))
-
   doAssert(isNilOrWhitespace(""))
-  doAssert(isNilOrWhitespace(nil))
   doAssert(isNilOrWhitespace("       "))
   doAssert(isNilOrWhitespace("\t\l \v\r\f"))
   doAssert(not isNilOrWhitespace("ABc   \td"))
author	Andreas Rumpf <rumpf_a@web.de>	2018-04-28 21:59:43 +0200
committer	Andreas Rumpf <rumpf_a@web.de>	2018-04-28 21:59:43 +0200
commit	9b8603adcd7ce8fa9563137bcbe6c350e07a6fe0 (patch)
tree	d349c873a1e2c157698da659f28f6306b47ff74c
parent	9d77f61038ddabeb4b59847d709e6d722e743082 (diff)
download	Nim-9b8603adcd7ce8fa9563137bcbe6c350e07a6fe0.tar.gz