summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorFlaviu Tamas <tamasflaviu@gmail.com>2015-01-17 16:21:25 -0500
committerFlaviu Tamas <tamasflaviu@gmail.com>2015-01-17 16:21:25 -0500
commit4d25a89ba990b602fdf19b1174a3eba180482029 (patch)
tree136f0cfba0bc78e21340cc40bda6f3872852673f
parenta7ce05b42fca227eb28e06a100219e2f07df6098 (diff)
downloadNim-4d25a89ba990b602fdf19b1174a3eba180482029.tar.gz
Add replace
Also came a cross a find bug with 0-length subject strings
-rw-r--r--src/nre.nim26
-rw-r--r--test/find.nim4
-rw-r--r--test/replace.nim8
-rw-r--r--test/testall.nim1
4 files changed, 34 insertions, 5 deletions
diff --git a/src/nre.nim b/src/nre.nim
index f41091009..89680ea5d 100644
--- a/src/nre.nim
+++ b/src/nre.nim
@@ -329,11 +329,7 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = -1): RegexMa
 
   var offset = start
   var previousMatch: RegexMatch
-  while offset != endpos:
-    if offset > endpos:
-      # eos occurs in the middle of a unicode char? die.
-      raise newException(AssertionError, "Input string has malformed unicode")
-
+  while true:
     var flags = 0
 
     if previousMatch != nil and
@@ -356,11 +352,16 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = -1): RegexMa
       elif unicode:
         # XXX what about invalid unicode?
         offset += str.runeLenAt(offset)
+        assert(offset <= endpos)
     else:
       offset = currentMatch.matchBounds.b
 
       yield currentMatch
 
+    if offset >= endpos:
+      # do while
+      break
+
 proc find*(str: string, pattern: Regex, start = 0, endpos = -1): RegexMatch =
   ## Returns a `RegexMatch` if there is a match between `start` and `endpos`, otherwise
   ## it returns nil.
@@ -415,3 +416,18 @@ proc split*(str: string, pattern: Regex): seq[string] =
   # but "1 2".split(/ /) needs to return @["1", "2"].
   # This handles "2"
   result.add(str.substr(lastIdx, str.len - 1))
+
+proc replace*(str: string, pattern: Regex,
+              subproc: proc (match: RegexMatch): string): string =
+  # XXX seems very similar to split, maybe I can reduce code duplication
+  # somehow?
+  result = ""
+  var lastIdx = 0
+  for match in str.findIter(pattern):
+    let bounds = match.matchBounds
+    result.add(str.substr(lastIdx, bounds.a - 1))
+    result.add(subproc(match))
+
+    lastIdx = bounds.b
+
+  result.add(str.substr(lastIdx, str.len - 1))
diff --git a/test/find.nim b/test/find.nim
index 6ea4f51e8..1fd91f0d2 100644
--- a/test/find.nim
+++ b/test/find.nim
@@ -16,3 +16,7 @@ suite "find":
   test "overlapping find":
     check("222".findAllStr(re"22") == @["22"])
     check("2222".findAllStr(re"22") == @["22", "22"])
+
+  test "len 0 find":
+    check("".findAllStr(re"\ ") == newSeq[string]())
+    check("".findAllStr(re"") == @[""])
diff --git a/test/replace.nim b/test/replace.nim
new file mode 100644
index 000000000..df7227f0b
--- /dev/null
+++ b/test/replace.nim
@@ -0,0 +1,8 @@
+include nre
+import unittest
+
+suite "replace":
+  test "replace with 0-length strings":
+    check("".replace(re"1", proc (v: RegexMatch): string = "1") == "")
+    check(" ".replace(re"", proc (v: RegexMatch): string = "1") == "1 ")
+    check("".replace(re"", proc (v: RegexMatch): string = "1") == "1")
diff --git a/test/testall.nim b/test/testall.nim
index 6fa3e9bca..89b661a2a 100644
--- a/test/testall.nim
+++ b/test/testall.nim
@@ -4,3 +4,4 @@ import captures
 import find
 import split
 import match
+import replace