summary refs log tree commit diff stats
path: root/lib/impure/re.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/impure/re.nim')
-rw-r--r--lib/impure/re.nim54
1 files changed, 32 insertions, 22 deletions
diff --git a/lib/impure/re.nim b/lib/impure/re.nim
index c7f8f336b..201c490f3 100644
--- a/lib/impure/re.nim
+++ b/lib/impure/re.nim
@@ -7,18 +7,14 @@
 #    distribution, for details about the copyright.
 #
 
-## Regular expression support for Nim. This module still has some
-## obscure bugs and limitations,
-## consider using the ``nre`` or ``pegs`` modules instead.
-## We had to de-deprecate this module since too much code relies on it
-## and many people prefer its API over ``nre``'s.
+## Regular expression support for Nim.
 ##
 ## This module is implemented by providing a wrapper around the
-## `PRCE (Perl-Compatible Regular Expressions) <http://www.pcre.org>`_
-## C library. This means that your application will depend on the PRCE
+## `PCRE (Perl-Compatible Regular Expressions) <http://www.pcre.org>`_
+## C library. This means that your application will depend on the PCRE
 ## library's licence when using this module, which should not be a problem
 ## though.
-## PRCE's licence follows:
+## PCRE's licence follows:
 ##
 ## .. include:: ../../doc/regexprs.txt
 ##
@@ -502,7 +498,7 @@ proc transformFile*(infile, outfile: string,
   var x = readFile(infile).string
   writeFile(outfile, x.multiReplace(subs))
 
-iterator split*(s: string, sep: Regex): string =
+iterator split*(s: string, sep: Regex; maxsplit = -1): string =
   ## Splits the string ``s`` into substrings.
   ##
   ## Substrings are separated by the regular expression ``sep``
@@ -524,22 +520,28 @@ iterator split*(s: string, sep: Regex): string =
   ##   "example"
   ##   ""
   ##
-  var
-    first = -1
-    last = -1
-  while last < len(s):
-    var x = matchLen(s, sep, last)
-    if x > 0: inc(last, x)
-    first = last
-    if x == 0: inc(last)
+  var last = 0
+  var splits = maxsplit
+  var x: int
+  while last <= len(s):
+    var first = last
+    var sepLen = 1
     while last < len(s):
       x = matchLen(s, sep, last)
-      if x >= 0: break
+      if x >= 0:
+        sepLen = x
+        break
       inc(last)
-    if first <= last:
-      yield substr(s, first, last-1)
-
-proc split*(s: string, sep: Regex): seq[string] {.inline.} =
+    if x == 0:
+      if last >= len(s): break
+      inc last
+    if splits == 0: last = len(s)
+    yield substr(s, first, last-1)
+    if splits == 0: break
+    dec(splits)
+    inc(last, sepLen)
+
+proc split*(s: string, sep: Regex, maxsplit = -1): seq[string] {.inline.} =
   ## Splits the string ``s`` into a seq of substrings.
   ##
   ## The portion matched by ``sep`` is not returned.
@@ -636,6 +638,14 @@ when isMainModule:
     accum.add(word)
   doAssert(accum == @["AAA", "", "BBB"])
 
+  doAssert(split("abc", re"") == @["a", "b", "c"])
+  doAssert(split("", re"") == @[])
+
+  doAssert(split("a;b;c", re";") == @["a", "b", "c"])
+  doAssert(split(";a;b;c", re";") == @["", "a", "b", "c"])
+  doAssert(split(";a;b;c;", re";") == @["", "a", "b", "c", ""])
+  doAssert(split("a;b;c;", re";") == @["a", "b", "c", ""])
+
   for x in findAll("abcdef", re"^{.}", 3):
     doAssert x == "d"
   accum = @[]