summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--changelogs/changelog_2_0_0.md3
-rw-r--r--lib/pure/strutils.nim60
-rw-r--r--tests/js/tstdlib_various.nim7
-rw-r--r--tests/stdlib/tstdlib_various.nim7
-rw-r--r--tests/stdlib/tstrutils.nim16
5 files changed, 50 insertions, 43 deletions
diff --git a/changelogs/changelog_2_0_0.md b/changelogs/changelog_2_0_0.md
index b56a3e71c..4a0d505c8 100644
--- a/changelogs/changelog_2_0_0.md
+++ b/changelogs/changelog_2_0_0.md
@@ -256,8 +256,6 @@
   declared when they are not available on the backend. Previously it would call
   `doAssert false` at runtime despite the condition being compile-time.
 
-- `strutils.split` and `strutils.rsplit` now forbid an empty separator.
-
 - Custom destructors now supports non-var parameters, e.g. `proc =destroy[T: object](x: T)` is valid. `proc =destroy[T: object](x: var T)` is deprecated.
 
 - Relative imports will not resolve to searched paths anymore, e.g. `import ./tables` now reports an error properly.
@@ -275,6 +273,7 @@
 - Changed `mimedb` to use an `OrderedTable` instead of `OrderedTableRef` to support `const` tables.
 - `strutils.find` now uses and defaults to `last = -1` for whole string searches,
   making limiting it to just the first char (`last = 0`) valid.
+- `strutils.split` and `strutils.rsplit` now return a source string as a single element for an empty separator.
 - `random.rand` now works with `Ordinal`s.
 - Undeprecated `os.isvalidfilename`.
 - `std/oids` now uses `int64` to store time internally (before it was int32).
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index fe9f2a8c3..6c1e49564 100644
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -366,11 +366,14 @@ func cmpIgnoreStyle*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreStyle".} =
 # --------- Private templates for different split separators -----------
 
 func substrEq(s: string, pos: int, substr: string): bool =
-  var i = 0
+  # Always returns false for empty `substr`
   var length = substr.len
-  while i < length and pos+i < s.len and s[pos+i] == substr[i]:
-    inc i
-  return i == length
+  if length > 0:
+    var i = 0
+    while i < length and pos+i < s.len and s[pos+i] == substr[i]:
+      inc i
+    i == length
+  else: false
 
 template stringHasSep(s: string, index: int, seps: set[char]): bool =
   s[index] in seps
@@ -487,15 +490,15 @@ iterator split*(s: string, seps: set[char] = Whitespace,
   ##   "22"
   ##   "08"
   ##   "08.398990"
-  ## 
-  ## .. warning:: `seps` should not be empty.
+  ##
+  ##  .. note:: Empty separator set results in returning an original string,
+  ##   following the interpretation "split by no element".
   ##
   ## See also:
   ## * `rsplit iterator<#rsplit.i,string,set[char],int>`_
   ## * `splitLines iterator<#splitLines.i,string>`_
   ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
   ## * `split func<#split,string,set[char],int>`_
-  assert seps.card > 0, "Empty separator"
   splitCommon(s, seps, maxsplit, 1)
 
 iterator split*(s: string, sep: string, maxsplit: int = -1): string =
@@ -515,15 +518,17 @@ iterator split*(s: string, sep: string, maxsplit: int = -1): string =
   ##   "is"
   ##   "corrupted"
   ##
-  ## .. warning:: `sep` should not be empty.
+  ##  .. note:: Empty separator string results in returning an original string,
+  ##   following the interpretation "split by no element".
   ##
   ## See also:
   ## * `rsplit iterator<#rsplit.i,string,string,int,bool>`_
   ## * `splitLines iterator<#splitLines.i,string>`_
   ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
   ## * `split func<#split,string,string,int>`_
-  assert sep.len > 0, "Empty separator"
-  splitCommon(s, sep, maxsplit, sep.len)
+  let sepLen = if sep.len == 0: 1 # prevents infinite loop
+    else: sep.len
+  splitCommon(s, sep, maxsplit, sepLen)
 
 
 template rsplitCommon(s, sep, maxsplit, sepLen) =
@@ -591,16 +596,16 @@ iterator rsplit*(s: string, seps: set[char] = Whitespace,
   ##   "bar"
   ##   "foo"
   ##
-  ## Substrings are separated from the right by the set of chars `seps`.
-  ## 
-  ## .. warning:: `seps` should not be empty.
+  ## Substrings are separated from the right by the set of chars `seps`
+  ##
+  ##  .. note:: Empty separator set results in returning an original string,
+  ##   following the interpretation "split by no element".
   ##
   ## See also:
   ## * `split iterator<#split.i,string,set[char],int>`_
   ## * `splitLines iterator<#splitLines.i,string>`_
   ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
   ## * `rsplit func<#rsplit,string,set[char],int>`_
-  assert seps.card > 0, "Empty separator"
   rsplitCommon(s, seps, maxsplit, 1)
 
 iterator rsplit*(s: string, sep: string, maxsplit: int = -1,
@@ -619,17 +624,19 @@ iterator rsplit*(s: string, sep: string, maxsplit: int = -1,
   ##   "bar"
   ##   "foo"
   ##
-  ## Substrings are separated from the right by the string `sep`.
-  ## 
-  ## .. warning:: `sep` should not be empty.
+  ## Substrings are separated from the right by the string `sep`
+  ##
+  ##  .. note:: Empty separator string results in returning an original string,
+  ##   following the interpretation "split by no element".
   ##
   ## See also:
   ## * `split iterator<#split.i,string,string,int>`_
   ## * `splitLines iterator<#splitLines.i,string>`_
   ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
   ## * `rsplit func<#rsplit,string,string,int>`_
-  assert sep.len > 0, "Empty separator"
-  rsplitCommon(s, sep, maxsplit, sep.len)
+  let sepLen = if sep.len == 0: 1 # prevents infinite loop
+    else: sep.len
+  rsplitCommon(s, sep, maxsplit, sepLen)
 
 iterator splitLines*(s: string, keepEol = false): string =
   ## Splits the string `s` into its containing lines.
@@ -740,7 +747,8 @@ func split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[
   ## The same as the `split iterator <#split.i,string,set[char],int>`_ (see its
   ## documentation), but is a func that returns a sequence of substrings.
   ##
-  ## .. warning:: `seps` should not be empty.
+  ##  .. note:: Empty separator set results in returning an original string,
+  ##   following the interpretation "split by no element".
   ##
   ## See also:
   ## * `split iterator <#split.i,string,set[char],int>`_
@@ -750,6 +758,7 @@ func split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[
   runnableExamples:
     doAssert "a,b;c".split({',', ';'}) == @["a", "b", "c"]
     doAssert "".split({' '}) == @[""]
+    doAssert "empty seps return unsplit s".split({}) == @["empty seps return unsplit s"]
   accResult(split(s, seps, maxsplit))
 
 func split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
@@ -759,7 +768,8 @@ func split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
   ## Substrings are separated by the string `sep`. This is a wrapper around the
   ## `split iterator <#split.i,string,string,int>`_.
   ##
-  ## .. warning:: `sep` should not be empty.
+  ##  .. note:: Empty separator string results in returning an original string,
+  ##   following the interpretation "split by no element".
   ##
   ## See also:
   ## * `split iterator <#split.i,string,string,int>`_
@@ -773,6 +783,7 @@ func split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
     doAssert "a  largely    spaced sentence".split(" ") == @["a", "", "largely",
         "", "", "", "spaced", "sentence"]
     doAssert "a  largely    spaced sentence".split(" ", maxsplit = 1) == @["a", " largely    spaced sentence"]
+    doAssert "empty sep returns unsplit s".split("") == @["empty sep returns unsplit s"]
   accResult(split(s, sep, maxsplit))
 
 func rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string] {.rtl,
@@ -822,7 +833,8 @@ func rsplit*(s: string, seps: set[char] = Whitespace,
   ## .. code-block:: nim
   ##   @["Root#Object#Method", "Index"]
   ##
-  ## .. warning:: `seps` should not be empty.
+  ##  .. note:: Empty separator set results in returning an original string,
+  ##   following the interpretation "split by no element".
   ##
   ## See also:
   ## * `rsplit iterator <#rsplit.i,string,set[char],int>`_
@@ -851,7 +863,8 @@ func rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
   ## .. code-block:: nim
   ##   @["Root#Object#Method", "Index"]
   ##
-  ## .. warning:: `sep` should not be empty.
+  ##  .. note:: Empty separator string results in returning an original string,
+  ##   following the interpretation "split by no element".
   ##
   ## See also:
   ## * `rsplit iterator <#rsplit.i,string,string,int,bool>`_
@@ -867,6 +880,7 @@ func rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
     doAssert "".rsplit("Elon Musk") == @[""]
     doAssert "a  largely    spaced sentence".rsplit(" ") == @["a", "",
         "largely", "", "", "", "spaced", "sentence"]
+    doAssert "empty sep returns unsplit s".rsplit("") == @["empty sep returns unsplit s"]
   accResult(rsplit(s, sep, maxsplit))
   result.reverse()
 
diff --git a/tests/js/tstdlib_various.nim b/tests/js/tstdlib_various.nim
index 4b5ce1de8..1e584f735 100644
--- a/tests/js/tstdlib_various.nim
+++ b/tests/js/tstdlib_various.nim
@@ -150,12 +150,7 @@ block tsplit2:
     s.add("#")
     s.add(w)
 
-  var errored = false
-  try:
-    discard "hello".split("")
-  except AssertionDefect:
-    errored = true
-  doAssert errored
+  doAssert "true".split("") == @["true"]
 
 block txmlgen:
   var nim = "Nim"
diff --git a/tests/stdlib/tstdlib_various.nim b/tests/stdlib/tstdlib_various.nim
index ce9c9a7c5..bac5018fa 100644
--- a/tests/stdlib/tstdlib_various.nim
+++ b/tests/stdlib/tstdlib_various.nim
@@ -27,7 +27,6 @@ Hi Andreas! How do you feel, Rumpf?
 [2, 3, 4, 5]
 [2, 3, 4, 5, 6]
 [1, 2, 3, 4, 5, 6]
-true
 <h1><a href="http://force7.de/nim">Nim</a></h1>
 '''
 """
@@ -207,11 +206,7 @@ block tsplit2:
     s.add("#")
     s.add(w)
 
-  try:
-    discard "hello".split("")
-    echo "false"
-  except AssertionDefect:
-    echo "true"
+  doAssert "true".split("") == @["true"]
 
 
 
diff --git a/tests/stdlib/tstrutils.nim b/tests/stdlib/tstrutils.nim
index d53e9d8b4..847e22656 100644
--- a/tests/stdlib/tstrutils.nim
+++ b/tests/stdlib/tstrutils.nim
@@ -58,8 +58,10 @@ template main() =
     doAssert "".split(" ") == @[""]
     doAssert "".split({' '}) == @[""]
     # Empty separators:
-    doAssertRaises(AssertionDefect): discard s.split({})
-    doAssertRaises(AssertionDefect): discard s.split("")
+    doAssert "".split({}) == @[""]
+    doAssert "".split("") == @[""]
+    doAssert s.split({}) == @[s]
+    doAssert s.split("") == @[s]
 
   block: # splitLines
     let fixture = "a\nb\rc\r\nd"
@@ -70,8 +72,7 @@ template main() =
   block: # rsplit
     doAssert rsplit("foo bar", seps = Whitespace) == @["foo", "bar"]
     doAssert rsplit(" foo bar", seps = Whitespace, maxsplit = 1) == @[" foo", "bar"]
-    doAssert rsplit(" foo bar ", seps = Whitespace, maxsplit = 1) == @[
-        " foo bar", ""]
+    doAssert rsplit(" foo bar ", seps = Whitespace, maxsplit = 1) == @[" foo bar", ""]
     doAssert rsplit(":foo:bar", sep = ':') == @["", "foo", "bar"]
     doAssert rsplit(":foo:bar", sep = ':', maxsplit = 2) == @["", "foo", "bar"]
     doAssert rsplit(":foo:bar", sep = ':', maxsplit = 3) == @["", "foo", "bar"]
@@ -81,8 +82,11 @@ template main() =
     doAssert "".rsplit(" ") == @[""]
     doAssert "".rsplit({' '}) == @[""]
     # Empty separators:
-    doAssertRaises(AssertionDefect): discard "".rsplit({})
-    doAssertRaises(AssertionDefect): discard "".rsplit("")
+    let s = " this is an example  "
+    doAssert "".rsplit({}) == @[""]
+    doAssert "".rsplit("") == @[""]
+    doAssert s.rsplit({}) == @[s]
+    doAssert s.rsplit("") == @[s]
 
   block: # splitWhitespace
     let s = " this is an example  "