summary refs log tree commit diff stats
path: root/lib/impure/re.nim
diff options
context:
space:
mode:
authorAndreas Rumpf <rumpf_a@web.de>2018-09-11 17:27:47 +0200
committerAndreas Rumpf <rumpf_a@web.de>2018-09-11 17:27:47 +0200
commitf7d1902043c1bc70ba0bb159a3e8c71b78947ad7 (patch)
tree96adf0fef3b9f1af4b318c1b639797dcca185874 /lib/impure/re.nim
parent0495e6cf3a1cf0f5f71622a8408d24fbc27642a0 (diff)
parentaf94946517d4e07e91b5c5ca21d58645f6da86c4 (diff)
downloadNim-f7d1902043c1bc70ba0bb159a3e8c71b78947ad7.tar.gz
fixes merge conflicts
Diffstat (limited to 'lib/impure/re.nim')
-rw-r--r--lib/impure/re.nim64
1 files changed, 36 insertions, 28 deletions
diff --git a/lib/impure/re.nim b/lib/impure/re.nim
index c7f8f336b..a60f70828 100644
--- a/lib/impure/re.nim
+++ b/lib/impure/re.nim
@@ -7,18 +7,14 @@
 #    distribution, for details about the copyright.
 #
 
-## Regular expression support for Nim. This module still has some
-## obscure bugs and limitations,
-## consider using the ``nre`` or ``pegs`` modules instead.
-## We had to de-deprecate this module since too much code relies on it
-## and many people prefer its API over ``nre``'s.
+## Regular expression support for Nim.
 ##
 ## This module is implemented by providing a wrapper around the
-## `PRCE (Perl-Compatible Regular Expressions) <http://www.pcre.org>`_
-## C library. This means that your application will depend on the PRCE
+## `PCRE (Perl-Compatible Regular Expressions) <http://www.pcre.org>`_
+## C library. This means that your application will depend on the PCRE
 ## library's licence when using this module, which should not be a problem
 ## though.
-## PRCE's licence follows:
+## PCRE's licence follows:
 ##
 ## .. include:: ../../doc/regexprs.txt
 ##
@@ -117,7 +113,7 @@ proc matchOrFind(buf: cstring, pattern: Regex, matches: var openArray[string],
     var b = rawMatches[i * 2 + 1]
     if a >= 0'i32:
       matches[i-1] = bufSubstr(buf, int(a), int(b))
-    else: matches[i-1] = nil
+    else: matches[i-1] = ""
   return rawMatches[1] - rawMatches[0]
 
 proc findBounds*(buf: cstring, pattern: Regex, matches: var openArray[string],
@@ -137,7 +133,7 @@ proc findBounds*(buf: cstring, pattern: Regex, matches: var openArray[string],
     var a = rawMatches[i * 2]
     var b = rawMatches[i * 2 + 1]
     if a >= 0'i32: matches[i-1] = bufSubstr(buf, int(a), int(b))
-    else: matches[i-1] = nil
+    else: matches[i-1] = ""
   return (rawMatches[0].int, rawMatches[1].int - 1)
 
 proc findBounds*(s: string, pattern: Regex, matches: var openArray[string],
@@ -291,7 +287,7 @@ proc find*(buf: cstring, pattern: Regex, matches: var openArray[string],
     var a = rawMatches[i * 2]
     var b = rawMatches[i * 2 + 1]
     if a >= 0'i32: matches[i-1] = bufSubstr(buf, int(a), int(b))
-    else: matches[i-1] = nil
+    else: matches[i-1] = ""
   return rawMatches[0]
 
 proc find*(s: string, pattern: Regex, matches: var openArray[string],
@@ -460,8 +456,6 @@ proc replacef*(s: string, sub: Regex, by: string): string =
   while true:
     var match = findBounds(s, sub, caps, prev)
     if match.first < 0: break
-    assert result != nil
-    assert s != nil
     add(result, substr(s, prev, match.first-1))
     addf(result, by, caps)
     prev = match.last + 1
@@ -502,7 +496,7 @@ proc transformFile*(infile, outfile: string,
   var x = readFile(infile).string
   writeFile(outfile, x.multiReplace(subs))
 
-iterator split*(s: string, sep: Regex): string =
+iterator split*(s: string, sep: Regex; maxsplit = -1): string =
   ## Splits the string ``s`` into substrings.
   ##
   ## Substrings are separated by the regular expression ``sep``
@@ -524,22 +518,28 @@ iterator split*(s: string, sep: Regex): string =
   ##   "example"
   ##   ""
   ##
-  var
-    first = -1
-    last = -1
-  while last < len(s):
-    var x = matchLen(s, sep, last)
-    if x > 0: inc(last, x)
-    first = last
-    if x == 0: inc(last)
+  var last = 0
+  var splits = maxsplit
+  var x: int
+  while last <= len(s):
+    var first = last
+    var sepLen = 1
     while last < len(s):
       x = matchLen(s, sep, last)
-      if x >= 0: break
+      if x >= 0:
+        sepLen = x
+        break
       inc(last)
-    if first <= last:
-      yield substr(s, first, last-1)
-
-proc split*(s: string, sep: Regex): seq[string] {.inline.} =
+    if x == 0:
+      if last >= len(s): break
+      inc last
+    if splits == 0: last = len(s)
+    yield substr(s, first, last-1)
+    if splits == 0: break
+    dec(splits)
+    inc(last, sepLen)
+
+proc split*(s: string, sep: Regex, maxsplit = -1): seq[string] {.inline.} =
   ## Splits the string ``s`` into a seq of substrings.
   ##
   ## The portion matched by ``sep`` is not returned.
@@ -613,7 +613,7 @@ when isMainModule:
     doAssert false
 
   if "abc" =~ re"(cba)?.*":
-    doAssert matches[0] == nil
+    doAssert matches[0] == ""
   else: doAssert false
 
   if "abc" =~ re"().*":
@@ -636,6 +636,14 @@ when isMainModule:
     accum.add(word)
   doAssert(accum == @["AAA", "", "BBB"])
 
+  doAssert(split("abc", re"") == @["a", "b", "c"])
+  doAssert(split("", re"") == @[])
+
+  doAssert(split("a;b;c", re";") == @["a", "b", "c"])
+  doAssert(split(";a;b;c", re";") == @["", "a", "b", "c"])
+  doAssert(split(";a;b;c;", re";") == @["", "a", "b", "c", ""])
+  doAssert(split("a;b;c;", re";") == @["a", "b", "c", ""])
+
   for x in findAll("abcdef", re"^{.}", 3):
     doAssert x == "d"
   accum = @[]