summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--changelog.md19
-rw-r--r--lib/impure/nre.nim131
-rw-r--r--lib/impure/nre/private/util.nim16
-rw-r--r--tests/stdlib/nre/captures.nim31
-rw-r--r--tests/stdlib/nre/match.nim6
-rw-r--r--tests/stdlib/nre/replace.nim6
6 files changed, 126 insertions, 83 deletions
diff --git a/changelog.md b/changelog.md
index 093ea7ae2..9c1b218f5 100644
--- a/changelog.md
+++ b/changelog.md
@@ -31,6 +31,25 @@
 
 - `options.UnpackError` is no longer a ref type and inherits from `System.Defect` instead of `System.ValueError`.
 
+- nre's `RegexMatch.{captureBounds,captures}[]`  no longer return `Option` or
+  `nil`/`""`, respectivly. Use the newly added `n in p.captures` method to
+  check if a group is captured, otherwise you'll recieve an exception.
+
+- nre's `RegexMatch.{captureBounds,captures}.toTable` no longer accept a
+  default parameter. Instead uncaptured entries are left empty. Use
+  `Table.getOrDefault()` if you need defaults.
+
+- nre's `RegexMatch.captures.{items,toSeq}` now returns an `Option[string]`
+  instead of a `string`. With the removal of `nil` strings, this is the only
+  way to indicate a missing match. Inside your loops, instead of `capture ==
+  ""` or `capture == nil`, use `capture.isSome` to check if a capture is
+  present, and `capture.get` to get its value.
+
+- nre's `replace()` no longer throws `ValueError` when the replacement string
+  has missing captures. It instead throws `KeyError` for named captures, and
+  `IndexError` for un-named captures. This is consistant with
+  `RegexMatch.{captureBounds,captures}[]`.
+
 #### Breaking changes in the compiler
 
 - The compiler now implements the "generic symbol prepass" for `when` statements
diff --git a/lib/impure/nre.nim b/lib/impure/nre.nim
index 58594f054..94dd89db5 100644
--- a/lib/impure/nre.nim
+++ b/lib/impure/nre.nim
@@ -65,7 +65,7 @@ runnableExamples:
   let hasVowel = firstVowel.isSome()
   if hasVowel:
     let matchBounds = firstVowel.get().captureBounds[-1]
-    doAssert matchBounds.get().a == 1
+    doAssert matchBounds.a == 1
 
 
 # Type definitions {{{
@@ -167,14 +167,15 @@ type
     ##     -  ``"abc".match(re"(?<letter>\w)").get.captures["letter"] == "a"``
     ##     -  ``"abc".match(re"(\w)\w").get.captures[-1] == "ab"``
     ##
-    ## ``captureBounds[]: Option[HSlice[int, int]]``
+    ## ``captureBounds[]: HSlice[int, int]``
     ##     gets the bounds of the given capture according to the same rules as
     ##     the above. If the capture is not filled, then ``None`` is returned.
     ##     The bounds are both inclusive.
     ##
-    ##     -  ``"abc".match(re"(\w)").get.captureBounds[0].get == 0 .. 0``
-    ##     -  ``"abc".match(re"").get.captureBounds[-1].get == 0 .. -1``
-    ##     -  ``"abc".match(re"abc").get.captureBounds[-1].get == 0 .. 2``
+    ##     -  ``"abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0``
+    ##     -  ``0 in "abc".match(re"(\w)").get.captureBounds == true``
+    ##     -  ``"abc".match(re"").get.captureBounds[-1] == 0 .. -1``
+    ##     -  ``"abc".match(re"abc").get.captureBounds[-1] == 0 .. 2``
     ##
     ## ``match: string``
     ##     the full text of the match.
@@ -227,9 +228,10 @@ runnableExamples:
     doAssert "abc".match(re"(?<letter>\w)").get.captures["letter"] == "a"
     doAssert "abc".match(re"(\w)\w").get.captures[-1] == "ab"
 
-    doAssert "abc".match(re"(\w)").get.captureBounds[0].get == 0 .. 0
-    doAssert "abc".match(re"").get.captureBounds[-1].get == 0 .. -1
-    doAssert "abc".match(re"abc").get.captureBounds[-1].get == 0 .. 2
+    doAssert "abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0
+    doAssert 0 in "abc".match(re"(\w)").get.captureBounds == true
+    doAssert "abc".match(re"").get.captureBounds[-1] == 0 .. -1
+    doAssert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2
 # }}}
 
 proc getinfo[T](pattern: Regex, opt: cint): T =
@@ -269,78 +271,99 @@ proc matchesCrLf(pattern: Regex): bool =
 # }}}
 
 # Capture accessors {{{
-proc captureBounds*(pattern: RegexMatch): CaptureBounds = return CaptureBounds(pattern)
+func captureBounds*(pattern: RegexMatch): CaptureBounds = return CaptureBounds(pattern)
 
-proc captures*(pattern: RegexMatch): Captures = return Captures(pattern)
+func captures*(pattern: RegexMatch): Captures = return Captures(pattern)
 
-proc `[]`*(pattern: CaptureBounds, i: int): Option[HSlice[int, int]] =
+func contains*(pattern: CaptureBounds, i: int): bool =
   let pattern = RegexMatch(pattern)
-  if pattern.pcreMatchBounds[i + 1].a != -1:
-    let bounds = pattern.pcreMatchBounds[i + 1]
-    return some(int(bounds.a) .. int(bounds.b-1))
-  else:
-    return none(HSlice[int, int])
+  pattern.pcreMatchBounds[i + 1].a != -1
+
+func contains*(pattern: Captures, i: int): bool =
+  i in CaptureBounds(pattern)
 
-proc `[]`*(pattern: Captures, i: int): string =
+func `[]`*(pattern: CaptureBounds, i: int): HSlice[int, int] =
+  let pattern = RegexMatch(pattern)
+  if not (i in pattern.captureBounds):
+    raise newException(IndexError, "Group '" & $i & "' was not captured")
+
+  let bounds = pattern.pcreMatchBounds[i + 1]
+  int(bounds.a)..int(bounds.b-1)
+
+func `[]`*(pattern: Captures, i: int): string =
   let pattern = RegexMatch(pattern)
   let bounds = pattern.captureBounds[i]
 
-  if bounds.isSome:
-    let bounds = bounds.get
-    return pattern.str.substr(bounds.a, bounds.b)
-  else:
-    return ""
+  pattern.str.substr(bounds.a, bounds.b)
 
-proc match*(pattern: RegexMatch): string =
+func match*(pattern: RegexMatch): string =
   return pattern.captures[-1]
 
-proc matchBounds*(pattern: RegexMatch): HSlice[int, int] =
-  return pattern.captureBounds[-1].get
+func matchBounds*(pattern: RegexMatch): HSlice[int, int] =
+  return pattern.captureBounds[-1]
+
+func contains*(pattern: CaptureBounds, name: string): bool =
+  let pattern = RegexMatch(pattern)
+  let nameToId = pattern.pattern.captureNameToId
+  if not (name in nameToId):
+      return false
+  nameToId[name] in pattern.captureBounds
+
+func contains*(pattern: Captures, name: string): bool =
+  name in CaptureBounds(pattern)
 
-proc `[]`*(pattern: CaptureBounds, name: string): Option[HSlice[int, int]] =
+func checkNamedCaptured(pattern: RegexMatch, name: string): void =
+  if not (name in pattern.captureBounds):
+    raise newException(KeyError, "Group '" & name & "' was not captured")
+
+func `[]`*(pattern: CaptureBounds, name: string): HSlice[int, int] =
   let pattern = RegexMatch(pattern)
-  return pattern.captureBounds[pattern.pattern.captureNameToId.fget(name)]
+  checkNamedCaptured(pattern, name)
+  pattern.captureBounds[pattern.pattern.captureNameToId[name]]
 
-proc `[]`*(pattern: Captures, name: string): string =
+func `[]`*(pattern: Captures, name: string): string =
   let pattern = RegexMatch(pattern)
-  return pattern.captures[pattern.pattern.captureNameToId.fget(name)]
+  checkNamedCaptured(pattern, name)
+  return pattern.captures[pattern.pattern.captureNameToId[name]]
 
-template toTableImpl(cond: untyped) {.dirty.} =
+template toTableImpl() {.dirty.} =
   for key in RegexMatch(pattern).pattern.captureNameId.keys:
-    let nextVal = pattern[key]
-    if cond:
-      result[key] = default
-    else:
-      result[key] = nextVal
+    if key in pattern:
+        result[key] = pattern[key]
 
-proc toTable*(pattern: Captures, default: string = ""): Table[string, string] =
+func toTable*(pattern: Captures): Table[string, string] =
   result = initTable[string, string]()
-  toTableImpl(nextVal.len == 0)
+  toTableImpl()
 
-proc toTable*(pattern: CaptureBounds, default = none(HSlice[int, int])):
-    Table[string, Option[HSlice[int, int]]] =
-  result = initTable[string, Option[HSlice[int, int]]]()
-  toTableImpl(nextVal.isNone)
+func toTable*(pattern: CaptureBounds): Table[string, HSlice[int, int]] =
+  result = initTable[string, HSlice[int, int]]()
+  toTableImpl()
 
-template itemsImpl(cond: untyped) {.dirty.} =
+template itemsImpl() {.dirty.} =
   for i in 0 ..< RegexMatch(pattern).pattern.captureCount:
-    let nextVal = pattern[i]
     # done in this roundabout way to avoid multiple yields (potential code
     # bloat)
-    let nextYieldVal = if cond: default else: nextVal
-    yield nextYieldVal
+    let nextYieldVal = if i in pattern:
+      some(pattern[i])
+    else:
+      default
 
+    yield nextYieldVal
 
-iterator items*(pattern: CaptureBounds, default = none(HSlice[int, int])): Option[HSlice[int, int]] =
-  itemsImpl(nextVal.isNone)
+iterator items*(pattern: CaptureBounds,
+                default = none(HSlice[int, int])): Option[HSlice[int, int]] =
+  itemsImpl()
 
-iterator items*(pattern: Captures, default: string = ""): string =
-  itemsImpl(nextVal.len == 0)
+iterator items*(pattern: Captures,
+                default: Option[string] = none(string)): Option[string] =
+  itemsImpl()
 
-proc toSeq*(pattern: CaptureBounds, default = none(HSlice[int, int])): seq[Option[HSlice[int, int]]] =
+proc toSeq*(pattern: CaptureBounds,
+            default = none(HSlice[int, int])): seq[Option[HSlice[int, int]]] =
   accumulateResult(pattern.items(default))
 
-proc toSeq*(pattern: Captures, default: string = ""): seq[string] =
+proc toSeq*(pattern: Captures,
+            default: Option[string] = none(string)): seq[Option[string]] =
   accumulateResult(pattern.items(default))
 
 proc `$`*(pattern: RegexMatch): string =
@@ -652,7 +675,8 @@ proc split*(str: string, pattern: Regex, maxSplit = -1, start = 0): seq[string]
 
     for cap in match.captures:
       # if there are captures, include them in the result
-      result.add(cap)
+      if cap.isSome:
+        result.add(cap.get)
 
     if splits == maxSplit - 1:
       break
@@ -706,7 +730,8 @@ proc replace*(str: string, pattern: Regex,
   ## -  ``$#`` - first capture
   ## -  ``$0`` - full match
   ##
-  ## If a given capture is missing, a ``ValueError`` exception is thrown.
+  ## If a given capture is missing, ``IndexError`` thrown for un-named captures
+  ## and ``KeyError`` for named captures.
   replaceImpl(str, pattern, subproc(match))
 
 proc replace*(str: string, pattern: Regex,
diff --git a/lib/impure/nre/private/util.nim b/lib/impure/nre/private/util.nim
index a3ae84007..f7d8b1d60 100644
--- a/lib/impure/nre/private/util.nim
+++ b/lib/impure/nre/private/util.nim
@@ -1,17 +1,9 @@
 ## INTERNAL FILE FOR USE ONLY BY nre.nim.
 import tables
 
-proc fget*[K, V](self: Table[K, V], key: K): V =
-  if self.hasKey(key):
-    return self[key]
-  else:
-    raise newException(KeyError, "Key does not exist in table: " & $key)
-
 const Ident = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\128'..'\255'}
 const StartIdent = Ident - {'0'..'9'}
 
-template checkNil(arg: string): string = arg
-
 template formatStr*(howExpr, namegetter, idgetter): untyped =
   let how = howExpr
   var val = newStringOfCap(how.len)
@@ -28,7 +20,7 @@ template formatStr*(howExpr, namegetter, idgetter): untyped =
         i += 2
       elif how[i + 1] == '#':
         var id {.inject.} = lastNum
-        val.add(checkNil(idgetter))
+        val.add(idgetter)
         lastNum += 1
         i += 2
       elif how[i + 1] in {'0'..'9'}:
@@ -37,7 +29,7 @@ template formatStr*(howExpr, namegetter, idgetter): untyped =
         while i < how.len and how[i] in {'0'..'9'}:
           id += (id * 10) + (ord(how[i]) - ord('0'))
           i += 1
-        val.add(checkNil(idgetter))
+        val.add(idgetter)
         lastNum = id + 1
       elif how[i + 1] in StartIdent:
         i += 1
@@ -45,7 +37,7 @@ template formatStr*(howExpr, namegetter, idgetter): untyped =
         while i < how.len and how[i] in Ident:
           name.add(how[i])
           i += 1
-        val.add(checkNil(namegetter))
+        val.add(namegetter)
       elif how[i + 1] == '{':
         i += 2
         var name {.inject.} = ""
@@ -53,7 +45,7 @@ template formatStr*(howExpr, namegetter, idgetter): untyped =
           name.add(how[i])
           i += 1
         i += 1
-        val.add(checkNil(namegetter))
+        val.add(namegetter)
       else:
         raise newException(Exception, "Syntax error in format string at " & $i)
   val
diff --git a/tests/stdlib/nre/captures.nim b/tests/stdlib/nre/captures.nim
index 31de71154..bd5e83ecc 100644
--- a/tests/stdlib/nre/captures.nim
+++ b/tests/stdlib/nre/captures.nim
@@ -9,16 +9,16 @@ suite "captures":
   test "capture bounds are correct":
     let ex1 = re("([0-9])")
     check("1 23".find(ex1).matchBounds == 0 .. 0)
-    check("1 23".find(ex1).captureBounds[0].get == 0 .. 0)
+    check("1 23".find(ex1).captureBounds[0] == 0 .. 0)
     check("1 23".find(ex1, 1).matchBounds == 2 .. 2)
     check("1 23".find(ex1, 3).matchBounds == 3 .. 3)
 
     let ex2 = re("()()()()()()()()()()([0-9])")
-    check("824".find(ex2).captureBounds[0].get == 0 .. -1)
-    check("824".find(ex2).captureBounds[10].get == 0 .. 0)
+    check("824".find(ex2).captureBounds[0] == 0 .. -1)
+    check("824".find(ex2).captureBounds[10] == 0 .. 0)
 
     let ex3 = re("([0-9]+)")
-    check("824".find(ex3).captureBounds[0].get == 0 .. 2)
+    check("824".find(ex3).captureBounds[0] == 0 .. 2)
 
   test "named captures":
     let ex1 = "foobar".find(re("(?<foo>foo)(?<bar>bar)"))
@@ -26,13 +26,19 @@ suite "captures":
     check(ex1.captures["bar"] == "bar")
 
     let ex2 = "foo".find(re("(?<foo>foo)(?<bar>bar)?"))
+    check("foo" in ex2.captureBounds)
     check(ex2.captures["foo"] == "foo")
-    check(ex2.captures["bar"] == "")
+    check(not ("bar" in ex2.captures))
+    expect KeyError:
+        discard ex2.captures["bar"]
 
   test "named capture bounds":
     let ex1 = "foo".find(re("(?<foo>foo)(?<bar>bar)?"))
-    check(ex1.captureBounds["foo"] == some(0..2))
-    check(ex1.captureBounds["bar"] == none(Slice[int]))
+    check("foo" in ex1.captureBounds)
+    check(ex1.captureBounds["foo"] == 0..2)
+    check(not ("bar" in ex1.captures))
+    expect KeyError:
+        discard ex1.captures["bar"]
 
   test "capture count":
     let ex1 = re("(?<foo>foo)(?<bar>bar)?")
@@ -41,19 +47,18 @@ suite "captures":
 
   test "named capture table":
     let ex1 = "foo".find(re("(?<foo>foo)(?<bar>bar)?"))
-    check(ex1.captures.toTable == {"foo" : "foo", "bar" : ""}.toTable())
-    check(ex1.captureBounds.toTable == {"foo" : some(0..2), "bar" : none(Slice[int])}.toTable())
-    check(ex1.captures.toTable("") == {"foo" : "foo", "bar" : ""}.toTable())
+    check(ex1.captures.toTable == {"foo" : "foo"}.toTable())
+    check(ex1.captureBounds.toTable == {"foo" : 0..2}.toTable())
 
     let ex2 = "foobar".find(re("(?<foo>foo)(?<bar>bar)?"))
     check(ex2.captures.toTable == {"foo" : "foo", "bar" : "bar"}.toTable())
 
   test "capture sequence":
     let ex1 = "foo".find(re("(?<foo>foo)(?<bar>bar)?"))
-    check(ex1.captures.toSeq == @["foo", ""])
+    check(ex1.captures.toSeq == @[some("foo"), none(string)])
     check(ex1.captureBounds.toSeq == @[some(0..2), none(Slice[int])])
-    check(ex1.captures.toSeq("") == @["foo", ""])
+    check(ex1.captures.toSeq(some("")) == @[some("foo"), some("")])
 
     let ex2 = "foobar".find(re("(?<foo>foo)(?<bar>bar)?"))
-    check(ex2.captures.toSeq == @["foo", "bar"])
+    check(ex2.captures.toSeq == @[some("foo"), some("bar")])
 
diff --git a/tests/stdlib/nre/match.nim b/tests/stdlib/nre/match.nim
index 38ee5214b..06b69fd04 100644
--- a/tests/stdlib/nre/match.nim
+++ b/tests/stdlib/nre/match.nim
@@ -10,9 +10,9 @@ suite "match":
     check("abc".match(re"(\w)").captures[0] == "a")
     check("abc".match(re"(?<letter>\w)").captures["letter"] == "a")
     check("abc".match(re"(\w)\w").captures[-1] == "ab")
-    check("abc".match(re"(\w)").captureBounds[0].get == 0 .. 0)
-    check("abc".match(re"").captureBounds[-1].get == 0 .. -1)
-    check("abc".match(re"abc").captureBounds[-1].get == 0 .. 2)
+    check("abc".match(re"(\w)").captureBounds[0] == 0 .. 0)
+    check("abc".match(re"").captureBounds[-1] == 0 .. -1)
+    check("abc".match(re"abc").captureBounds[-1] == 0 .. 2)
 
   test "match test cases":
     check("123".match(re"").matchBounds == 0 .. -1)
diff --git a/tests/stdlib/nre/replace.nim b/tests/stdlib/nre/replace.nim
index b762271a2..812a7f384 100644
--- a/tests/stdlib/nre/replace.nim
+++ b/tests/stdlib/nre/replace.nim
@@ -16,5 +16,7 @@ suite "replace":
     check("123".replace(re"(?<foo>\d)(\d)", "${foo}$#$#") == "1123")
 
   test "replacing missing captures should throw instead of segfaulting":
-    discard "ab".replace(re"(a)|(b)", "$1$2")
-    discard "b".replace(re"(a)?(b)", "$1$2")
+    expect IndexError: discard "ab".replace(re"(a)|(b)", "$1$2")
+    expect IndexError: discard "b".replace(re"(a)?(b)", "$1$2")
+    expect KeyError: discard "b".replace(re"(a)?", "${foo}")
+    expect KeyError: discard "b".replace(re"(?<foo>a)?", "${foo}")