summary refs log tree commit diff stats
path: root/src/nre.nim
diff options
context:
space:
mode:
authorFlaviu Tamas <tamasflaviu@gmail.com>2015-01-17 16:21:25 -0500
committerFlaviu Tamas <tamasflaviu@gmail.com>2015-01-17 16:21:25 -0500
commit4d25a89ba990b602fdf19b1174a3eba180482029 (patch)
tree136f0cfba0bc78e21340cc40bda6f3872852673f /src/nre.nim
parenta7ce05b42fca227eb28e06a100219e2f07df6098 (diff)
downloadNim-4d25a89ba990b602fdf19b1174a3eba180482029.tar.gz
Add replace
Also came a cross a find bug with 0-length subject strings
Diffstat (limited to 'src/nre.nim')
-rw-r--r--src/nre.nim26
1 files changed, 21 insertions, 5 deletions
diff --git a/src/nre.nim b/src/nre.nim
index f41091009..89680ea5d 100644
--- a/src/nre.nim
+++ b/src/nre.nim
@@ -329,11 +329,7 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = -1): RegexMa
 
   var offset = start
   var previousMatch: RegexMatch
-  while offset != endpos:
-    if offset > endpos:
-      # eos occurs in the middle of a unicode char? die.
-      raise newException(AssertionError, "Input string has malformed unicode")
-
+  while true:
     var flags = 0
 
     if previousMatch != nil and
@@ -356,11 +352,16 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = -1): RegexMa
       elif unicode:
         # XXX what about invalid unicode?
         offset += str.runeLenAt(offset)
+        assert(offset <= endpos)
     else:
       offset = currentMatch.matchBounds.b
 
       yield currentMatch
 
+    if offset >= endpos:
+      # do while
+      break
+
 proc find*(str: string, pattern: Regex, start = 0, endpos = -1): RegexMatch =
   ## Returns a `RegexMatch` if there is a match between `start` and `endpos`, otherwise
   ## it returns nil.
@@ -415,3 +416,18 @@ proc split*(str: string, pattern: Regex): seq[string] =
   # but "1 2".split(/ /) needs to return @["1", "2"].
   # This handles "2"
   result.add(str.substr(lastIdx, str.len - 1))
+
+proc replace*(str: string, pattern: Regex,
+              subproc: proc (match: RegexMatch): string): string =
+  # XXX seems very similar to split, maybe I can reduce code duplication
+  # somehow?
+  result = ""
+  var lastIdx = 0
+  for match in str.findIter(pattern):
+    let bounds = match.matchBounds
+    result.add(str.substr(lastIdx, bounds.a - 1))
+    result.add(subproc(match))
+
+    lastIdx = bounds.b
+
+  result.add(str.substr(lastIdx, str.len - 1))