diff options
-rw-r--r-- | src/nre.nim | 29 | ||||
-rw-r--r-- | test/split.nim | 7 |
2 files changed, 21 insertions, 15 deletions
diff --git a/src/nre.nim b/src/nre.nim index d907cae62..d98f55889 100644 --- a/src/nre.nim +++ b/src/nre.nim @@ -391,6 +391,7 @@ proc split*(str: string, pattern: Regex, maxSplit = -1): seq[string] = result = @[] var lastIdx = 0 var splits = 0 + var bounds: Slice[int] for match in str.findIter(pattern): # upper bound is exclusive, lower is inclusive: @@ -398,16 +399,12 @@ proc split*(str: string, pattern: Regex, maxSplit = -1): seq[string] = # 0123456 # ^^^ # (1, 4) - var bounds = match.matchBounds - - if lastIdx == 0 and - lastIdx == bounds.a and - bounds.a == bounds.b: - # "12".split("") would be @["", "1", "2"], but - # if we skip an empty first match, it's the correct - # @["1", "2"] - discard - else: + bounds = match.matchBounds + + # "12".split("") would be @["", "1", "2"], but + # if we skip an empty first match, it's the correct + # @["1", "2"] + if bounds.a < bounds.b or bounds.a > 0: result.add(str.substr(lastIdx, bounds.a - 1)) splits += 1 @@ -420,10 +417,14 @@ proc split*(str: string, pattern: Regex, maxSplit = -1): seq[string] = if splits == maxSplit: break - # last match: Each match takes the previous substring, - # but "1 2".split(/ /) needs to return @["1", "2"]. - # This handles "2" - result.add(str.substr(lastIdx, str.len - 1)) + # "12".split("\b") would be @["1", "2", ""], but + # if we skip an empty last match, it's the correct + # @["1", "2"] + if bounds.a < bounds.b or bounds.b < str.len: + # last match: Each match takes the previous substring, + # but "1 2".split(/ /) needs to return @["1", "2"]. + # This handles "2" + result.add(str.substr(bounds.b, str.len - 1)) proc replace*(str: string, pattern: Regex, subproc: proc (match: RegexMatch): string): string = diff --git a/test/split.nim b/test/split.nim index 184b5f9e2..6cc68a41e 100644 --- a/test/split.nim +++ b/test/split.nim @@ -3,11 +3,11 @@ include nre suite "string splitting": test "splitting strings": - check("12345".split(re("")) == @["1", "2", "3", "4", "5"]) check("1 2 3 4 5 6 ".split(re" ") == @["1", "2", "3", "4", "5", "6", ""]) check("1 2 ".split(re(" ")) == @["1", "", "2", "", ""]) check("1 2".split(re(" ")) == @["1", "2"]) check("foo".split(re("foo")) == @["", ""]) + check("".split(re"foo") == newSeq[string]()) test "captured patterns": check("12".split(re"(\d)") == @["", "1", "", "2", ""]) @@ -16,3 +16,8 @@ suite "string splitting": check("123".split(re"", maxsplit = 1) == @["1", "23"]) check("123".split(re"", maxsplit = 0) == @["123"]) check("123".split(re"", maxsplit = -1) == @["1", "2", "3"]) + + test "split with 0-length match": + check("12345".split(re("")) == @["1", "2", "3", "4", "5"]) + check("".split(re"") == newSeq[string]()) + check("word word".split(re"\b") == @["word", " ", "word"]) |