make re.split consistent with strutils.split and other programming languages; refs #7278

author: Andreas Rumpf <rumpf_a@web.de> 2018-03-05 21:39:13 +0100
committer: Andreas Rumpf <rumpf_a@web.de> 2018-03-05 21:39:13 +0100
commit: e2094bc6f4014d0da38e1bf440236b15023f6aa4 (patch)
tree: 4e213fca9477c389b02f625317223a3f0fbcbc03 /lib/impure/re.nim
parent: 5c8332d8715107e9735ef87cd95f2b4d1f95f4e7 (diff)
download: Nim-e2094bc6f4014d0da38e1bf440236b15023f6aa4.tar.gz
1 files changed, 28 insertions, 14 deletions
diff --git a/lib/impure/re.nim b/lib/impure/re.nim
index 3c7c39d2b..34d55b7b0 100644
--- a/lib/impure/re.nim
+++ b/lib/impure/re.nim
@@ -498,7 +498,7 @@ proc transformFile*(infile, outfile: string,
   var x = readFile(infile).string
   writeFile(outfile, x.multiReplace(subs))
 
-iterator split*(s: string, sep: Regex): string =
+iterator split*(s: string, sep: Regex; maxsplit = -1): string =
   ## Splits the string ``s`` into substrings.
   ##
   ## Substrings are separated by the regular expression ``sep``
@@ -520,22 +520,28 @@ iterator split*(s: string, sep: Regex): string =
   ##   "example"
   ##   ""
   ##
-  var
-    first = -1
-    last = -1
-  while last < len(s):
-    var x = matchLen(s, sep, last)
-    if x > 0: inc(last, x)
-    first = last
-    if x == 0: inc(last)
+  var last = 0
+  var splits = maxsplit
+  var x: int
+  while last <= len(s):
+    var first = last
+    var sepLen = 1
     while last < len(s):
       x = matchLen(s, sep, last)
-      if x >= 0: break
+      if x >= 0:
+        sepLen = x
+        break
       inc(last)
-    if first <= last:
-      yield substr(s, first, last-1)
-
-proc split*(s: string, sep: Regex): seq[string] {.inline.} =
+    if x == 0:
+      if last >= len(s): break
+      inc last
+    if splits == 0: last = len(s)
+    yield substr(s, first, last-1)
+    if splits == 0: break
+    dec(splits)
+    inc(last, sepLen)
+
+proc split*(s: string, sep: Regex, maxsplit = -1): seq[string] {.inline.} =
   ## Splits the string ``s`` into a seq of substrings.
   ##
   ## The portion matched by ``sep`` is not returned.
@@ -632,6 +638,14 @@ when isMainModule:
     accum.add(word)
   doAssert(accum == @["AAA", "", "BBB"])
 
+  doAssert(split("abc", re"") == @["a", "b", "c"])
+  doAssert(split("", re"") == @[])
+
+  doAssert(split("a;b;c", re";") == @["a", "b", "c"])
+  doAssert(split(";a;b;c", re";") == @["", "a", "b", "c"])
+  doAssert(split(";a;b;c;", re";") == @["", "a", "b", "c", ""])
+  doAssert(split("a;b;c;", re";") == @["a", "b", "c", ""])
+
   for x in findAll("abcdef", re"^{.}", 3):
     doAssert x == "d"
   accum = @[]
author	Andreas Rumpf <rumpf_a@web.de>	2018-03-05 21:39:13 +0100
committer	Andreas Rumpf <rumpf_a@web.de>	2018-03-05 21:39:13 +0100
commit	e2094bc6f4014d0da38e1bf440236b15023f6aa4 (patch)
tree	4e213fca9477c389b02f625317223a3f0fbcbc03 /lib/impure/re.nim
parent	5c8332d8715107e9735ef87cd95f2b4d1f95f4e7 (diff)
download	Nim-e2094bc6f4014d0da38e1bf440236b15023f6aa4.tar.gz