summary refs log tree commit diff stats
path: root/lib/impure/nre.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/impure/nre.nim')
-rw-r--r--lib/impure/nre.nim178
1 files changed, 82 insertions, 96 deletions
diff --git a/lib/impure/nre.nim b/lib/impure/nre.nim
index 7b2d7d3ee..39d238055 100644
--- a/lib/impure/nre.nim
+++ b/lib/impure/nre.nim
@@ -20,15 +20,17 @@ when defined(js):
 ## search the internet for a wide variety of third-party documentation and
 ## tools.
 ##
-## **Note**: If you love `sequtils.toSeq` we have bad news for you. This
-## library doesn't work with it due to documented compiler limitations. As
-## a workaround, use this:
-##
-## .. code-block:: nim
-##
-##    import std/nre except toSeq
-##
-##
+## .. warning:: If you love `sequtils.toSeq` we have bad news for you. This
+##   library doesn't work with it due to documented compiler limitations. As
+##   a workaround, use this:
+runnableExamples:
+  # either `import std/nre except toSeq` or fully qualify `sequtils.toSeq`:
+  import std/sequtils
+  iterator iota(n: int): int =
+    for i in 0..<n: yield i
+  assert sequtils.toSeq(iota(3)) == @[0, 1, 2]
+## .. note:: There are also alternative nimble packages such as [tinyre](https://github.com/khchen/tinyre)
+##   and [regex](https://github.com/nitely/nim-regex).
 ## Licencing
 ## ---------
 ##
@@ -36,40 +38,38 @@ when defined(js):
 ## this module.
 ##
 ## .. _`some additional terms`: http://pcre.sourceforge.net/license.txt
-##
 runnableExamples:
+  import std/sugar
   let vowels = re"[aeoui]"
-
-  let expectedResults = [
-    1 .. 1,
-    2 .. 2,
-    4 .. 4,
-    6 .. 6,
-    7 .. 7,
-  ]
-  var i = 0
-  for match in "moigagoo".findIter(vowels):
-    doAssert match.matchBounds == expectedResults[i]
-    inc i
+  let bounds = collect:
+    for match in "moiga".findIter(vowels): match.matchBounds
+  assert bounds == @[1 .. 1, 2 .. 2, 4 .. 4]
+  from std/sequtils import toSeq
+  let s = sequtils.toSeq("moiga".findIter(vowels))
+    # fully qualified to avoid confusion with nre.toSeq
+  assert s.len == 3
 
   let firstVowel = "foo".find(vowels)
   let hasVowel = firstVowel.isSome()
-  if hasVowel:
-    let matchBounds = firstVowel.get().captureBounds[-1]
-    doAssert matchBounds.a == 1
+  assert hasVowel
+  let matchBounds = firstVowel.get().captureBounds[-1]
+  assert matchBounds.a == 1
 
-  ## as with module `re`, unless specified otherwise, `start` parameter in each
-  ## proc indicates where the scan starts, but outputs are relative to the start
-  ## of the input string, not to `start`:
-  doAssert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab"
-  doAssert find("uxabc", re"ab", start = 3).isNone
+  # as with module `re`, unless specified otherwise, `start` parameter in each
+  # proc indicates where the scan starts, but outputs are relative to the start
+  # of the input string, not to `start`:
+  assert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab"
+  assert find("uxabc", re"ab", start = 3).isNone
 
-from pcre import nil
+from std/pcre import nil
 import nre/private/util
-import tables
-from strutils import `%`
-import options
-from unicode import runeLenAt
+import std/tables
+from std/strutils import `%`
+import std/options
+from std/unicode import runeLenAt
+
+when defined(nimPreviewSlimSystem):
+  import std/assertions
 
 export options
 
@@ -80,16 +80,16 @@ type
     ## comment".`
     ##
     ## `pattern: string`
-    ##     the string that was used to create the pattern. For details on how
+    ## :   the string that was used to create the pattern. For details on how
     ##     to write a pattern, please see `the official PCRE pattern
     ##     documentation.
     ##     <https://www.pcre.org/original/doc/html/pcrepattern.html>`_
     ##
     ## `captureCount: int`
-    ##     the number of captures that the pattern has.
+    ## :   the number of captures that the pattern has.
     ##
     ## `captureNameId: Table[string, int]`
-    ##     a table from the capture names to their numeric id.
+    ## :   a table from the capture names to their numeric id.
     ##
     ##
     ## Options
@@ -145,7 +145,7 @@ type
     ## `DOLLAR_ENDONLY`, `FIRSTLINE`, `NO_AUTO_CAPTURE`,
     ## `JAVASCRIPT_COMPAT`, `U`, `NO_STUDY`. In other PCRE wrappers, you
     ## will need to pass these as separate flags to PCRE.
-    pattern*: string  ## not nil
+    pattern*: string
     pcreObj: ptr pcre.Pcre  ## not nil
     pcreExtra: ptr pcre.ExtraData  ## nil
 
@@ -156,49 +156,39 @@ type
     ## execution. On failure, it is none, on success, it is some.
     ##
     ## `pattern: Regex`
-    ##     the pattern that is being matched
+    ## :   the pattern that is being matched
     ##
     ## `str: string`
-    ##     the string that was matched against
+    ## :   the string that was matched against
     ##
     ## `captures[]: string`
-    ##     the string value of whatever was captured at that id. If the value
+    ## :   the string value of whatever was captured at that id. If the value
     ##     is invalid, then behavior is undefined. If the id is `-1`, then
     ##     the whole match is returned. If the given capture was not matched,
-    ##     `nil` is returned.
-    ##
-    ##     -  `"abc".match(re"(\w)").get.captures[0] == "a"`
-    ##     -  `"abc".match(re"(?<letter>\w)").get.captures["letter"] == "a"`
-    ##     -  `"abc".match(re"(\w)\w").get.captures[-1] == "ab"`
+    ##     `nil` is returned. See examples for `match`.
     ##
     ## `captureBounds[]: HSlice[int, int]`
-    ##     gets the bounds of the given capture according to the same rules as
+    ## :   gets the bounds of the given capture according to the same rules as
     ##     the above. If the capture is not filled, then `None` is returned.
-    ##     The bounds are both inclusive.
-    ##
-    ##     -  `"abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0`
-    ##     -  `0 in "abc".match(re"(\w)").get.captureBounds == true`
-    ##     -  `"abc".match(re"").get.captureBounds[-1] == 0 .. -1`
-    ##     -  `"abc".match(re"abc").get.captureBounds[-1] == 0 .. 2`
+    ##     The bounds are both inclusive.  See examples for `match`.
     ##
     ## `match: string`
-    ##     the full text of the match.
+    ## :   the full text of the match.
     ##
     ## `matchBounds: HSlice[int, int]`
-    ##     the bounds of the match, as in `captureBounds[]`
+    ## :   the bounds of the match, as in `captureBounds[]`
     ##
     ## `(captureBounds|captures).toTable`
-    ##     returns a table with each named capture as a key.
+    ## :   returns a table with each named capture as a key.
     ##
     ## `(captureBounds|captures).toSeq`
-    ##     returns all the captures by their number.
+    ## :   returns all the captures by their number.
     ##
     ## `$: string`
-    ##     same as `match`
+    ## :   same as `match`
     pattern*: Regex  ## The regex doing the matching.
                      ## Not nil.
     str*: string  ## The string that was matched against.
-                  ## Not nil.
     pcreMatchBounds: seq[HSlice[cint, cint]] ## First item is the bounds of the match
                                             ## Other items are the captures
                                             ## `a` is inclusive start, `b` is exclusive end
@@ -226,22 +216,12 @@ type
     ## for whatever reason. The message contains the error
     ## code.
 
-runnableExamples:
-  # This MUST be kept in sync with the examples in RegexMatch
-  doAssert "abc".match(re"(\w)").get.captures[0] == "a"
-  doAssert "abc".match(re"(?<letter>\w)").get.captures["letter"] == "a"
-  doAssert "abc".match(re"(\w)\w").get.captures[-1] == "ab"
-
-  doAssert "abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0
-  doAssert 0 in "abc".match(re"(\w)").get.captureBounds == true
-  doAssert "abc".match(re"").get.captureBounds[-1] == 0 .. -1
-  doAssert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2
-
-
 proc destroyRegex(pattern: Regex) =
+  `=destroy`(pattern.pattern)
   pcre.free_substring(cast[cstring](pattern.pcreObj))
   if pattern.pcreExtra != nil:
     pcre.free_study(pattern.pcreExtra)
+  `=destroy`(pattern.captureNameToId)
 
 proc getinfo[T](pattern: Regex, opt: cint): T =
   let retcode = pcre.fullinfo(pattern.pcreObj, pattern.pcreExtra, opt, addr result)
@@ -308,7 +288,7 @@ proc matchesCrLf(pattern: Regex): bool =
   let newlineFlags = flags and (pcre.NEWLINE_CRLF or
                                 pcre.NEWLINE_ANY or
                                 pcre.NEWLINE_ANYCRLF)
-  if newLineFlags > 0u32:
+  if newlineFlags > 0u32:
     return true
 
   # get flags from build config
@@ -540,24 +520,33 @@ proc matchImpl(str: string, pattern: Regex, start, endpos: int, flags: int): Opt
 proc match*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[RegexMatch] =
   ## Like `find(...)<#find,string,Regex,int>`_, but anchored to the start of the
   ## string.
-  ##
   runnableExamples:
-    doAssert "foo".match(re"f").isSome
-    doAssert "foo".match(re"o").isNone
+    assert "foo".match(re"f").isSome
+    assert "foo".match(re"o").isNone
 
+    assert "abc".match(re"(\w)").get.captures[0] == "a"
+    assert "abc".match(re"(?<letter>\w)").get.captures["letter"] == "a"
+    assert "abc".match(re"(\w)\w").get.captures[-1] == "ab"
+
+    assert "abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0
+    assert 0 in "abc".match(re"(\w)").get.captureBounds
+    assert "abc".match(re"").get.captureBounds[-1] == 0 .. -1
+    assert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2
   return str.matchImpl(pattern, start, endpos, pcre.ANCHORED)
 
 iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): RegexMatch =
   ## Works the same as `find(...)<#find,string,Regex,int>`_, but finds every
-  ## non-overlapping match. `"2222".find(re"22")` is `"22", "22"`, not
-  ## `"22", "22", "22"`.
-  ##
+  ## non-overlapping match:
+  runnableExamples:
+    import std/sugar
+    assert collect(for a in "2222".findIter(re"22"): a.match) == @["22", "22"]
+     # not @["22", "22", "22"]
   ## Arguments are the same as `find(...)<#find,string,Regex,int>`_
   ##
   ## Variants:
   ##
   ## -  `proc findAll(...)` returns a `seq[string]`
-  # see pcredemo for explanation
+  # see pcredemo for explanation => https://www.pcre.org/original/doc/html/pcredemo.html
   let matchesCrLf = pattern.matchesCrLf()
   let unicode = uint32(getinfo[culong](pattern, pcre.INFO_OPTIONS) and
     pcre.UTF8) > 0u32
@@ -578,7 +567,7 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R
       # either the end of the input or the string
       # cannot be split here - we also need to bail
       # if we've never matched and we've already tried to...
-      if offset >= strlen or neverMatched:
+      if flags == 0 or offset >= strlen or neverMatched: # All matches found
         break
 
       if matchesCrLf and offset < (str.len - 1) and
@@ -594,7 +583,6 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R
     else:
       neverMatched = false
       offset = match.get.matchBounds.b + 1
-
       yield match.get
 
 proc find*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[RegexMatch] =
@@ -602,11 +590,11 @@ proc find*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[Re
   ## positions.
   ##
   ## `start`
-  ##     The start point at which to start matching. `|abc` is `0`;
+  ## :   The start point at which to start matching. `|abc` is `0`;
   ##     `a|bc` is `1`
   ##
   ## `endpos`
-  ##     The maximum index for a match; `int.high` means the end of the
+  ## :   The maximum index for a match; `int.high` means the end of the
   ##     string, otherwise it’s an inclusive upper bound.
   return str.matchImpl(pattern, start, endpos, 0)
 
@@ -619,11 +607,10 @@ proc contains*(str: string, pattern: Regex, start = 0, endpos = int.high): bool
   ## Determine if the string contains the given pattern between the end and
   ## start positions:
   ## This function is equivalent to `isSome(str.find(pattern, start, endpos))`.
-  ##
   runnableExamples:
-    doAssert "abc".contains(re"bc")
-    doAssert not "abc".contains(re"cd")
-    doAssert not "abc".contains(re"a", start = 1)
+    assert "abc".contains(re"bc")
+    assert not "abc".contains(re"cd")
+    assert not "abc".contains(re"a", start = 1)
 
   return isSome(str.find(pattern, start, endpos))
 
@@ -635,16 +622,16 @@ proc split*(str: string, pattern: Regex, maxSplit = -1, start = 0): seq[string]
   ##
   runnableExamples:
     # -  If the match is zero-width, then the string is still split:
-    doAssert "123".split(re"") == @["1", "2", "3"]
+    assert "123".split(re"") == @["1", "2", "3"]
 
     # -  If the pattern has a capture in it, it is added after the string
     #    split:
-    doAssert "12".split(re"(\d)") == @["", "1", "", "2", ""]
+    assert "12".split(re"(\d)") == @["", "1", "", "2", ""]
 
     # -  If `maxsplit != -1`, then the string will only be split
     #    `maxsplit - 1` times. This means that there will be `maxsplit`
     #    strings in the output seq.
-    doAssert "1.2.3".split(re"\.", maxsplit = 2) == @["1", "2.3"]
+    assert "1.2.3".split(re"\.", maxsplit = 2) == @["1", "2.3"]
 
   result = @[]
   var lastIdx = start
@@ -715,8 +702,7 @@ proc replace*(str: string, pattern: Regex,
   ## each match and the return value is the replacement value.
   ##
   ## If `subproc` is a `proc (string): string`, then it is executed with the
-  ## full text of the match and and the return value is the replacement
-  ## value.
+  ## full text of the match and the return value is the replacement value.
   ##
   ## If `subproc` is a string, the syntax is as follows:
   ##
@@ -747,9 +733,9 @@ proc escapeRe*(str: string): string {.gcsafe.} =
   ##
   ## Escaped char: `\ + * ? [ ^ ] $ ( ) { } = ! < > | : -`
   runnableExamples:
-    doAssert escapeRe("fly+wind") == "fly\\+wind"
-    doAssert escapeRe("!") == "\\!"
-    doAssert escapeRe("nim*") == "nim\\*"
+    assert escapeRe("fly+wind") == "fly\\+wind"
+    assert escapeRe("!") == "\\!"
+    assert escapeRe("nim*") == "nim\\*"
 
   #([\\+*?[^\]$(){}=!<>|:-])
   const SpecialCharMatcher = {'\\', '+', '*', '?', '[', '^', ']', '$', '(',