diff options
-rw-r--r-- | changelog.md | 6 | ||||
-rw-r--r-- | lib/impure/re.nim | 42 |
2 files changed, 34 insertions, 14 deletions
diff --git a/changelog.md b/changelog.md index a26972372..00c638f1d 100644 --- a/changelog.md +++ b/changelog.md @@ -4,10 +4,16 @@ #### Breaking changes in the standard library +- ``re.split`` for empty regular expressions now yields every character in + the string which is what other programming languages chose to do. + #### Breaking changes in the compiler ### Library additions +- ``re.split`` now also supports the ``maxsplit`` parameter for consistency + with ``strutils.split``. + ### Library changes ### Language additions diff --git a/lib/impure/re.nim b/lib/impure/re.nim index 3c7c39d2b..34d55b7b0 100644 --- a/lib/impure/re.nim +++ b/lib/impure/re.nim @@ -498,7 +498,7 @@ proc transformFile*(infile, outfile: string, var x = readFile(infile).string writeFile(outfile, x.multiReplace(subs)) -iterator split*(s: string, sep: Regex): string = +iterator split*(s: string, sep: Regex; maxsplit = -1): string = ## Splits the string ``s`` into substrings. ## ## Substrings are separated by the regular expression ``sep`` @@ -520,22 +520,28 @@ iterator split*(s: string, sep: Regex): string = ## "example" ## "" ## - var - first = -1 - last = -1 - while last < len(s): - var x = matchLen(s, sep, last) - if x > 0: inc(last, x) - first = last - if x == 0: inc(last) + var last = 0 + var splits = maxsplit + var x: int + while last <= len(s): + var first = last + var sepLen = 1 while last < len(s): x = matchLen(s, sep, last) - if x >= 0: break + if x >= 0: + sepLen = x + break inc(last) - if first <= last: - yield substr(s, first, last-1) - -proc split*(s: string, sep: Regex): seq[string] {.inline.} = + if x == 0: + if last >= len(s): break + inc last + if splits == 0: last = len(s) + yield substr(s, first, last-1) + if splits == 0: break + dec(splits) + inc(last, sepLen) + +proc split*(s: string, sep: Regex, maxsplit = -1): seq[string] {.inline.} = ## Splits the string ``s`` into a seq of substrings. ## ## The portion matched by ``sep`` is not returned. @@ -632,6 +638,14 @@ when isMainModule: accum.add(word) doAssert(accum == @["AAA", "", "BBB"]) + doAssert(split("abc", re"") == @["a", "b", "c"]) + doAssert(split("", re"") == @[]) + + doAssert(split("a;b;c", re";") == @["a", "b", "c"]) + doAssert(split(";a;b;c", re";") == @["", "a", "b", "c"]) + doAssert(split(";a;b;c;", re";") == @["", "a", "b", "c", ""]) + doAssert(split("a;b;c;", re";") == @["a", "b", "c", ""]) + for x in findAll("abcdef", re"^{.}", 3): doAssert x == "d" accum = @[] |