summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--src/nre.nim29
-rw-r--r--test/split.nim7
2 files changed, 21 insertions, 15 deletions
diff --git a/src/nre.nim b/src/nre.nim
index d907cae62..d98f55889 100644
--- a/src/nre.nim
+++ b/src/nre.nim
@@ -391,6 +391,7 @@ proc split*(str: string, pattern: Regex, maxSplit = -1): seq[string] =
   result = @[]
   var lastIdx = 0
   var splits = 0
+  var bounds: Slice[int]
 
   for match in str.findIter(pattern):
     # upper bound is exclusive, lower is inclusive:
@@ -398,16 +399,12 @@ proc split*(str: string, pattern: Regex, maxSplit = -1): seq[string] =
     # 0123456
     #  ^^^
     # (1, 4)
-    var bounds = match.matchBounds
-
-    if lastIdx == 0 and
-       lastIdx == bounds.a and
-       bounds.a == bounds.b:
-      # "12".split("") would be @["", "1", "2"], but
-      # if we skip an empty first match, it's the correct
-      # @["1", "2"]
-      discard
-    else:
+    bounds = match.matchBounds
+
+    # "12".split("") would be @["", "1", "2"], but
+    # if we skip an empty first match, it's the correct
+    # @["1", "2"]
+    if bounds.a < bounds.b or bounds.a > 0:
       result.add(str.substr(lastIdx, bounds.a - 1))
       splits += 1
 
@@ -420,10 +417,14 @@ proc split*(str: string, pattern: Regex, maxSplit = -1): seq[string] =
     if splits == maxSplit:
       break
 
-  # last match: Each match takes the previous substring,
-  # but "1 2".split(/ /) needs to return @["1", "2"].
-  # This handles "2"
-  result.add(str.substr(lastIdx, str.len - 1))
+  # "12".split("\b") would be @["1", "2", ""], but
+  # if we skip an empty last match, it's the correct
+  # @["1", "2"]
+  if bounds.a < bounds.b or bounds.b < str.len:
+    # last match: Each match takes the previous substring,
+    # but "1 2".split(/ /) needs to return @["1", "2"].
+    # This handles "2"
+    result.add(str.substr(bounds.b, str.len - 1))
 
 proc replace*(str: string, pattern: Regex,
               subproc: proc (match: RegexMatch): string): string =
diff --git a/test/split.nim b/test/split.nim
index 184b5f9e2..6cc68a41e 100644
--- a/test/split.nim
+++ b/test/split.nim
@@ -3,11 +3,11 @@ include nre
 
 suite "string splitting":
   test "splitting strings":
-    check("12345".split(re("")) == @["1", "2", "3", "4", "5"])
     check("1 2 3 4 5 6 ".split(re" ") == @["1", "2", "3", "4", "5", "6", ""])
     check("1  2  ".split(re(" ")) == @["1", "", "2", "", ""])
     check("1 2".split(re(" ")) == @["1", "2"])
     check("foo".split(re("foo")) == @["", ""])
+    check("".split(re"foo") == newSeq[string]())
 
   test "captured patterns":
     check("12".split(re"(\d)") == @["", "1", "", "2", ""])
@@ -16,3 +16,8 @@ suite "string splitting":
     check("123".split(re"", maxsplit = 1) == @["1", "23"])
     check("123".split(re"", maxsplit = 0) == @["123"])
     check("123".split(re"", maxsplit = -1) == @["1", "2", "3"])
+
+  test "split with 0-length match":
+    check("12345".split(re("")) == @["1", "2", "3", "4", "5"])
+    check("".split(re"") == newSeq[string]())
+    check("word word".split(re"\b") == @["word", " ", "word"])