summary refs log tree commit diff stats
path: root/lib/pure/pegs.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/pegs.nim')
-rw-r--r--lib/pure/pegs.nim356
1 files changed, 181 insertions, 175 deletions
diff --git a/lib/pure/pegs.nim b/lib/pure/pegs.nim
index 7cef0a00d..39f0bfa95 100644
--- a/lib/pure/pegs.nim
+++ b/lib/pure/pegs.nim
@@ -29,7 +29,7 @@ when useUnicode:
 const
   InlineThreshold = 5  ## number of leaves; -1 to disable inlining
   MaxSubpatterns* = 20 ## defines the maximum number of subpatterns that
-                       ## can be captured. More subpatterns cannot be captured! 
+                       ## can be captured. More subpatterns cannot be captured!
 
 type
   PegKind = enum
@@ -85,14 +85,14 @@ type
     of pkBackRef..pkBackRefIgnoreStyle: index: range[0..MaxSubpatterns]
     else: sons: seq[TNode]
   NonTerminal* = ref NonTerminalObj
-  
+
   Peg* = TNode ## type that represents a PEG
 
 {.deprecated: [TPeg: Peg].}
 
 proc term*(t: string): Peg {.nosideEffect, rtl, extern: "npegs$1Str".} =
   ## constructs a PEG from a terminal string
-  if t.len != 1:  
+  if t.len != 1:
     result.kind = pkTerminal
     result.term = t
   else:
@@ -116,7 +116,7 @@ proc term*(t: char): Peg {.nosideEffect, rtl, extern: "npegs$1Char".} =
   assert t != '\0'
   result.kind = pkChar
   result.ch = t
-  
+
 proc charSet*(s: set[char]): Peg {.nosideEffect, rtl, extern: "npegs$1".} =
   ## constructs a PEG from a character set `s`
   assert '\0' notin s
@@ -129,12 +129,12 @@ proc add(d: var Peg, s: Peg) {.inline.} = add(d.sons, s)
 
 proc addChoice(dest: var Peg, elem: Peg) =
   var L = dest.len-1
-  if L >= 0 and dest.sons[L].kind == pkCharChoice: 
+  if L >= 0 and dest.sons[L].kind == pkCharChoice:
     # caution! Do not introduce false aliasing here!
     case elem.kind
     of pkCharChoice:
       dest.sons[L] = charSet(dest.sons[L].charChoice[] + elem.charChoice[])
-    of pkChar: 
+    of pkChar:
       dest.sons[L] = charSet(dest.sons[L].charChoice[] + {elem.ch})
     else: add(dest, elem)
   else: add(dest, elem)
@@ -158,12 +158,12 @@ proc `/`*(a: varargs[Peg]): Peg {.
 
 proc addSequence(dest: var Peg, elem: Peg) =
   var L = dest.len-1
-  if L >= 0 and dest.sons[L].kind == pkTerminal: 
+  if L >= 0 and dest.sons[L].kind == pkTerminal:
     # caution! Do not introduce false aliasing here!
     case elem.kind
-    of pkTerminal: 
+    of pkTerminal:
       dest.sons[L] = term(dest.sons[L].term & elem.term)
-    of pkChar: 
+    of pkChar:
       dest.sons[L] = term(dest.sons[L].term & elem.ch)
     else: add(dest, elem)
   else: add(dest, elem)
@@ -172,7 +172,7 @@ proc sequence*(a: varargs[Peg]): Peg {.
   nosideEffect, rtl, extern: "npegs$1".} =
   ## constructs a sequence with all the PEGs from `a`
   multipleOp(pkSequence, addSequence)
- 
+
 proc `?`*(a: Peg): Peg {.nosideEffect, rtl, extern: "npegsOptional".} =
   ## constructs an optional for the PEG `a`
   if a.kind in {pkOption, pkGreedyRep, pkGreedyAny, pkGreedyRepChar,
@@ -207,7 +207,7 @@ proc `!*`*(a: Peg): Peg {.nosideEffect, rtl, extern: "npegsSearch".} =
   result.kind = pkSearch
   result.sons = @[a]
 
-proc `!*\`*(a: Peg): Peg {.noSideEffect, rtl, 
+proc `!*\`*(a: Peg): Peg {.noSideEffect, rtl,
                              extern: "npgegsCapturedSearch".} =
   ## constructs a "captured search" for the PEG `a`
   result.kind = pkCapturedSearch
@@ -216,7 +216,7 @@ proc `!*\`*(a: Peg): Peg {.noSideEffect, rtl,
 proc `+`*(a: Peg): Peg {.nosideEffect, rtl, extern: "npegsGreedyPosRep".} =
   ## constructs a "greedy positive repetition" with the PEG `a`
   return sequence(a, *a)
-  
+
 proc `&`*(a: Peg): Peg {.nosideEffect, rtl, extern: "npegsAndPredicate".} =
   ## constructs an "and predicate" with the PEG `a`
   result.kind = pkAndPredicate
@@ -239,33 +239,33 @@ proc newLine*: Peg {.inline.} =
   ## constructs the PEG `newline`:idx: (``\n``)
   result.kind = pkNewLine
 
-proc unicodeLetter*: Peg {.inline.} = 
+proc unicodeLetter*: Peg {.inline.} =
   ## constructs the PEG ``\letter`` which matches any Unicode letter.
   result.kind = pkLetter
-  
-proc unicodeLower*: Peg {.inline.} = 
+
+proc unicodeLower*: Peg {.inline.} =
   ## constructs the PEG ``\lower`` which matches any Unicode lowercase letter.
-  result.kind = pkLower 
+  result.kind = pkLower
 
-proc unicodeUpper*: Peg {.inline.} = 
+proc unicodeUpper*: Peg {.inline.} =
   ## constructs the PEG ``\upper`` which matches any Unicode uppercase letter.
   result.kind = pkUpper
-  
-proc unicodeTitle*: Peg {.inline.} = 
+
+proc unicodeTitle*: Peg {.inline.} =
   ## constructs the PEG ``\title`` which matches any Unicode title letter.
   result.kind = pkTitle
 
-proc unicodeWhitespace*: Peg {.inline.} = 
-  ## constructs the PEG ``\white`` which matches any Unicode 
+proc unicodeWhitespace*: Peg {.inline.} =
+  ## constructs the PEG ``\white`` which matches any Unicode
   ## whitespace character.
   result.kind = pkWhitespace
 
-proc startAnchor*: Peg {.inline.} = 
-  ## constructs the PEG ``^`` which matches the start of the input.  
+proc startAnchor*: Peg {.inline.} =
+  ## constructs the PEG ``^`` which matches the start of the input.
   result.kind = pkStartAnchor
 
-proc endAnchor*: Peg {.inline.} = 
-  ## constructs the PEG ``$`` which matches the end of the input.  
+proc endAnchor*: Peg {.inline.} =
+  ## constructs the PEG ``$`` which matches the end of the input.
   result = !any()
 
 proc capture*(a: Peg): Peg {.nosideEffect, rtl, extern: "npegsCapture".} =
@@ -274,21 +274,21 @@ proc capture*(a: Peg): Peg {.nosideEffect, rtl, extern: "npegsCapture".} =
   result.sons = @[a]
 
 proc backref*(index: range[1..MaxSubpatterns]): Peg {.
-  nosideEffect, rtl, extern: "npegs$1".} = 
+  nosideEffect, rtl, extern: "npegs$1".} =
   ## constructs a back reference of the given `index`. `index` starts counting
   ## from 1.
   result.kind = pkBackRef
   result.index = index-1
 
 proc backrefIgnoreCase*(index: range[1..MaxSubpatterns]): Peg {.
-  nosideEffect, rtl, extern: "npegs$1".} = 
+  nosideEffect, rtl, extern: "npegs$1".} =
   ## constructs a back reference of the given `index`. `index` starts counting
   ## from 1. Ignores case for matching.
   result.kind = pkBackRefIgnoreCase
   result.index = index-1
 
 proc backrefIgnoreStyle*(index: range[1..MaxSubpatterns]): Peg {.
-  nosideEffect, rtl, extern: "npegs$1".}= 
+  nosideEffect, rtl, extern: "npegs$1".}=
   ## constructs a back reference of the given `index`. `index` starts counting
   ## from 1. Ignores style for matching.
   result.kind = pkBackRefIgnoreStyle
@@ -298,7 +298,7 @@ proc spaceCost(n: Peg): int =
   case n.kind
   of pkEmpty: discard
   of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar,
-     pkGreedyRepChar, pkCharChoice, pkGreedyRepSet, 
+     pkGreedyRepChar, pkCharChoice, pkGreedyRepSet,
      pkAny..pkWhitespace, pkGreedyAny:
     result = 1
   of pkNonTerminal:
@@ -310,7 +310,7 @@ proc spaceCost(n: Peg): int =
       if result >= InlineThreshold: break
 
 proc nonterminal*(n: NonTerminal): Peg {.
-  nosideEffect, rtl, extern: "npegs$1".} = 
+  nosideEffect, rtl, extern: "npegs$1".} =
   ## constructs a PEG that consists of the nonterminal symbol
   assert n != nil
   if ntDeclared in n.flags and spaceCost(n.rule) < InlineThreshold:
@@ -331,7 +331,7 @@ proc newNonTerminal*(name: string, line, column: int): NonTerminal {.
 template letters*: expr =
   ## expands to ``charset({'A'..'Z', 'a'..'z'})``
   charSet({'A'..'Z', 'a'..'z'})
-  
+
 template digits*: expr =
   ## expands to ``charset({'0'..'9'})``
   charSet({'0'..'9'})
@@ -339,11 +339,11 @@ template digits*: expr =
 template whitespace*: expr =
   ## expands to ``charset({' ', '\9'..'\13'})``
   charSet({' ', '\9'..'\13'})
-  
+
 template identChars*: expr =
   ## expands to ``charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})``
   charSet({'a'..'z', 'A'..'Z', '0'..'9', '_'})
-  
+
 template identStartChars*: expr =
   ## expands to ``charset({'A'..'Z', 'a'..'z', '_'})``
   charSet({'a'..'z', 'A'..'Z', '_'})
@@ -352,14 +352,14 @@ template ident*: expr =
   ## same as ``[a-zA-Z_][a-zA-z_0-9]*``; standard identifier
   sequence(charSet({'a'..'z', 'A'..'Z', '_'}),
            *charSet({'a'..'z', 'A'..'Z', '0'..'9', '_'}))
-  
+
 template natural*: expr =
   ## same as ``\d+``
   +digits
 
 # ------------------------- debugging -----------------------------------------
 
-proc esc(c: char, reserved = {'\0'..'\255'}): string = 
+proc esc(c: char, reserved = {'\0'..'\255'}): string =
   case c
   of '\b': result = "\\b"
   of '\t': result = "\\t"
@@ -374,38 +374,38 @@ proc esc(c: char, reserved = {'\0'..'\255'}): string =
   elif c < ' ' or c >= '\128': result = '\\' & $ord(c)
   elif c in reserved: result = '\\' & c
   else: result = $c
-  
+
 proc singleQuoteEsc(c: char): string = return "'" & esc(c, {'\''}) & "'"
 
-proc singleQuoteEsc(str: string): string = 
+proc singleQuoteEsc(str: string): string =
   result = "'"
   for c in items(str): add result, esc(c, {'\''})
   add result, '\''
-  
-proc charSetEscAux(cc: set[char]): string = 
+
+proc charSetEscAux(cc: set[char]): string =
   const reserved = {'^', '-', ']'}
   result = ""
   var c1 = 0
-  while c1 <= 0xff: 
-    if chr(c1) in cc: 
+  while c1 <= 0xff:
+    if chr(c1) in cc:
       var c2 = c1
       while c2 < 0xff and chr(succ(c2)) in cc: inc(c2)
-      if c1 == c2: 
+      if c1 == c2:
         add result, esc(chr(c1), reserved)
-      elif c2 == succ(c1): 
+      elif c2 == succ(c1):
         add result, esc(chr(c1), reserved) & esc(chr(c2), reserved)
-      else: 
+      else:
         add result, esc(chr(c1), reserved) & '-' & esc(chr(c2), reserved)
       c1 = c2
     inc(c1)
-  
+
 proc charSetEsc(cc: set[char]): string =
-  if card(cc) >= 128+64: 
+  if card(cc) >= 128+64:
     result = "[^" & charSetEscAux({'\1'..'\xFF'} - cc) & ']'
-  else: 
+  else:
     result = '[' & charSetEscAux(cc) & ']'
-  
-proc toStrAux(r: Peg, res: var string) = 
+
+proc toStrAux(r: Peg, res: var string) =
   case r.kind
   of pkEmpty: add(res, "()")
   of pkAny: add(res, '.')
@@ -469,25 +469,25 @@ proc toStrAux(r: Peg, res: var string) =
     toStrAux(r.sons[0], res)
   of pkCapture:
     add(res, '{')
-    toStrAux(r.sons[0], res)    
+    toStrAux(r.sons[0], res)
     add(res, '}')
-  of pkBackRef: 
+  of pkBackRef:
     add(res, '$')
     add(res, $r.index)
-  of pkBackRefIgnoreCase: 
+  of pkBackRefIgnoreCase:
     add(res, "i$")
     add(res, $r.index)
-  of pkBackRefIgnoreStyle: 
+  of pkBackRefIgnoreStyle:
     add(res, "y$")
     add(res, $r.index)
   of pkRule:
-    toStrAux(r.sons[0], res)    
+    toStrAux(r.sons[0], res)
     add(res, " <- ")
     toStrAux(r.sons[1], res)
   of pkList:
     for i in 0 .. high(r.sons):
       toStrAux(r.sons[i], res)
-      add(res, "\n")  
+      add(res, "\n")
   of pkStartAnchor:
     add(res, '^')
 
@@ -506,14 +506,14 @@ type
 
 {.deprecated: [TCaptures: Captures].}
 
-proc bounds*(c: Captures, 
-             i: range[0..MaxSubpatterns-1]): tuple[first, last: int] = 
+proc bounds*(c: Captures,
+             i: range[0..MaxSubpatterns-1]): tuple[first, last: int] =
   ## returns the bounds ``[first..last]`` of the `i`'th capture.
   result = c.matches[i]
 
 when not useUnicode:
   type
-    TRune = char
+    Rune = char
   template fastRuneAt(s, i, ch: expr) =
     ch = s[i]
     inc(i)
@@ -527,7 +527,7 @@ when not useUnicode:
 
 proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {.
                nosideEffect, rtl, extern: "npegs$1".} =
-  ## low-level matching proc that implements the PEG interpreter. Use this 
+  ## low-level matching proc that implements the PEG interpreter. Use this
   ## for maximum efficiency (every other PEG operation ends up calling this
   ## proc).
   ## Returns -1 if it does not match, else the length of the match
@@ -541,7 +541,7 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {.
       result = runeLenAt(s, start)
     else:
       result = -1
-  of pkLetter: 
+  of pkLetter:
     if s[start] != '\0':
       var a: Rune
       result = start
@@ -550,7 +550,7 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {.
       else: result = -1
     else:
       result = -1
-  of pkLower: 
+  of pkLower:
     if s[start] != '\0':
       var a: Rune
       result = start
@@ -559,7 +559,7 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {.
       else: result = -1
     else:
       result = -1
-  of pkUpper: 
+  of pkUpper:
     if s[start] != '\0':
       var a: Rune
       result = start
@@ -568,16 +568,16 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {.
       else: result = -1
     else:
       result = -1
-  of pkTitle: 
+  of pkTitle:
     if s[start] != '\0':
       var a: Rune
       result = start
       fastRuneAt(s, result, a)
-      if isTitle(a): dec(result, start) 
+      if isTitle(a): dec(result, start)
       else: result = -1
     else:
       result = -1
-  of pkWhitespace: 
+  of pkWhitespace:
     if s[start] != '\0':
       var a: Rune
       result = start
@@ -641,7 +641,7 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {.
     when false: echo "leave: ", p.nt.name
     if result < 0: c.ml = oldMl
   of pkSequence:
-    var oldMl = c.ml  
+    var oldMl = c.ml
     result = 0
     for i in 0..high(p.sons):
       var x = rawMatch(s, p.sons[i], start+result, c)
@@ -723,11 +723,11 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {.
       #else: silently ignore the capture
     else:
       c.ml = idx
-  of pkBackRef..pkBackRefIgnoreStyle: 
+  of pkBackRef..pkBackRefIgnoreStyle:
     if p.index >= c.ml: return -1
     var (a, b) = c.matches[p.index]
     var n: Peg
-    n.kind = succ(pkTerminal, ord(p.kind)-ord(pkBackRef)) 
+    n.kind = succ(pkTerminal, ord(p.kind)-ord(pkBackRef))
     n.term = s.substr(a, b)
     result = rawMatch(s, n, start, c)
   of pkStartAnchor:
@@ -744,24 +744,6 @@ template fillMatches(s, caps, c: expr) =
     else:
       caps[k] = nil
 
-proc match*(s: string, pattern: Peg, matches: var openArray[string],
-            start = 0): bool {.nosideEffect, rtl, extern: "npegs$1Capture".} =
-  ## returns ``true`` if ``s[start..]`` matches the ``pattern`` and
-  ## the captured substrings in the array ``matches``. If it does not
-  ## match, nothing is written into ``matches`` and ``false`` is
-  ## returned.
-  var c: Captures
-  c.origStart = start
-  result = rawMatch(s, pattern, start, c) == len(s) - start
-  if result: fillMatches(s, matches, c)
-
-proc match*(s: string, pattern: Peg, 
-            start = 0): bool {.nosideEffect, rtl, extern: "npegs$1".} =
-  ## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``.
-  var c: Captures
-  c.origStart = start
-  result = rawMatch(s, pattern, start, c) == len(s)-start
-
 proc matchLen*(s: string, pattern: Peg, matches: var openArray[string],
                start = 0): int {.nosideEffect, rtl, extern: "npegs$1Capture".} =
   ## the same as ``match``, but it returns the length of the match,
@@ -773,7 +755,7 @@ proc matchLen*(s: string, pattern: Peg, matches: var openArray[string],
   result = rawMatch(s, pattern, start, c)
   if result >= 0: fillMatches(s, matches, c)
 
-proc matchLen*(s: string, pattern: Peg, 
+proc matchLen*(s: string, pattern: Peg,
                start = 0): int {.nosideEffect, rtl, extern: "npegs$1".} =
   ## the same as ``match``, but it returns the length of the match,
   ## if there is no match, -1 is returned. Note that a match length
@@ -783,6 +765,20 @@ proc matchLen*(s: string, pattern: Peg,
   c.origStart = start
   result = rawMatch(s, pattern, start, c)
 
+proc match*(s: string, pattern: Peg, matches: var openArray[string],
+            start = 0): bool {.nosideEffect, rtl, extern: "npegs$1Capture".} =
+  ## returns ``true`` if ``s[start..]`` matches the ``pattern`` and
+  ## the captured substrings in the array ``matches``. If it does not
+  ## match, nothing is written into ``matches`` and ``false`` is
+  ## returned.
+  result = matchLen(s, pattern, matches, start) != -1
+
+proc match*(s: string, pattern: Peg,
+            start = 0): bool {.nosideEffect, rtl, extern: "npegs$1".} =
+  ## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``.
+  result = matchLen(s, pattern, start) != -1
+
+
 proc find*(s: string, pattern: Peg, matches: var openArray[string],
            start = 0): int {.nosideEffect, rtl, extern: "npegs$1Capture".} =
   ## returns the starting position of ``pattern`` in ``s`` and the captured
@@ -797,11 +793,11 @@ proc find*(s: string, pattern: Peg, matches: var openArray[string],
       return i
   return -1
   # could also use the pattern here: (!P .)* P
-  
+
 proc findBounds*(s: string, pattern: Peg, matches: var openArray[string],
                  start = 0): tuple[first, last: int] {.
                  nosideEffect, rtl, extern: "npegs$1Capture".} =
-  ## returns the starting position and end position of ``pattern`` in ``s`` 
+  ## returns the starting position and end position of ``pattern`` in ``s``
   ## and the captured
   ## substrings in the array ``matches``. If it does not match, nothing
   ## is written into ``matches`` and (-1,0) is returned.
@@ -814,8 +810,8 @@ proc findBounds*(s: string, pattern: Peg, matches: var openArray[string],
       fillMatches(s, matches, c)
       return (i, i+L-1)
   return (-1, 0)
-  
-proc find*(s: string, pattern: Peg, 
+
+proc find*(s: string, pattern: Peg,
            start = 0): int {.nosideEffect, rtl, extern: "npegs$1".} =
   ## returns the starting position of ``pattern`` in ``s``. If it does not
   ## match, -1 is returned.
@@ -824,8 +820,8 @@ proc find*(s: string, pattern: Peg,
   for i in start .. s.len-1:
     if rawMatch(s, pattern, i, c) >= 0: return i
   return -1
-  
-iterator findAll*(s: string, pattern: Peg, start = 0): string = 
+
+iterator findAll*(s: string, pattern: Peg, start = 0): string =
   ## yields all matching *substrings* of `s` that match `pattern`.
   var c: Captures
   c.origStart = start
@@ -838,23 +834,23 @@ iterator findAll*(s: string, pattern: Peg, start = 0): string =
     else:
       yield substr(s, i, i+L-1)
       inc(i, L)
-    
+
 proc findAll*(s: string, pattern: Peg, start = 0): seq[string] {.
-  nosideEffect, rtl, extern: "npegs$1".} = 
+  nosideEffect, rtl, extern: "npegs$1".} =
   ## returns all matching *substrings* of `s` that match `pattern`.
   ## If it does not match, @[] is returned.
   accumulateResult(findAll(s, pattern, start))
 
 when not defined(nimhygiene):
   {.pragma: inject.}
-  
+
 template `=~`*(s: string, pattern: Peg): bool =
-  ## This calls ``match`` with an implicit declared ``matches`` array that 
-  ## can be used in the scope of the ``=~`` call: 
-  ## 
+  ## This calls ``match`` with an implicit declared ``matches`` array that
+  ## can be used in the scope of the ``=~`` call:
+  ##
   ## .. code-block:: nim
   ##
-  ##   if line =~ peg"\s* {\w+} \s* '=' \s* {\w+}": 
+  ##   if line =~ peg"\s* {\w+} \s* '=' \s* {\w+}":
   ##     # matches a key=value pair:
   ##     echo("Key: ", matches[0])
   ##     echo("Value: ", matches[1])
@@ -865,7 +861,7 @@ template `=~`*(s: string, pattern: Peg): bool =
   ##     echo("comment: ", matches[0])
   ##   else:
   ##     echo("syntax error")
-  ##  
+  ##
   bind MaxSubpatterns
   when not declaredInScope(matches):
     var matches {.inject.}: array[0..MaxSubpatterns-1, string]
@@ -902,7 +898,7 @@ proc replacef*(s: string, sub: Peg, by: string): string {.
   ## with the notation ``$i`` and ``$#`` (see strutils.`%`). Examples:
   ##
   ## .. code-block:: nim
-  ##   "var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2")
+  ##   "var1=key; var2=key2".replacef(peg"{\ident}'='{\ident}", "$1<-$2$2")
   ##
   ## Results in:
   ##
@@ -941,10 +937,10 @@ proc replace*(s: string, sub: Peg, by = ""): string {.
       add(result, by)
       inc(i, x)
   add(result, substr(s, i))
-  
+
 proc parallelReplace*(s: string, subs: varargs[
                       tuple[pattern: Peg, repl: string]]): string {.
-                      nosideEffect, rtl, extern: "npegs$1".} = 
+                      nosideEffect, rtl, extern: "npegs$1".} =
   ## Returns a modified copy of `s` with the substitutions in `subs`
   ## applied in parallel.
   result = ""
@@ -964,8 +960,8 @@ proc parallelReplace*(s: string, subs: varargs[
       add(result, s[i])
       inc(i)
   # copy the rest:
-  add(result, substr(s, i))  
-  
+  add(result, substr(s, i))
+
 proc transformFile*(infile, outfile: string,
                     subs: varargs[tuple[pattern: Peg, repl: string]]) {.
                     rtl, extern: "npegs$1".} =
@@ -974,7 +970,7 @@ proc transformFile*(infile, outfile: string,
   ## error occurs. This is supposed to be used for quick scripting.
   var x = readFile(infile).string
   writeFile(outfile, x.parallelReplace(subs))
-  
+
 iterator split*(s: string, sep: Peg): string =
   ## Splits the string `s` into substrings.
   ##
@@ -1049,14 +1045,14 @@ type
     tkBackref,          ## '$'
     tkDollar,           ## '$'
     tkHat               ## '^'
-  
+
   TToken {.final.} = object  ## a token
     kind: TTokKind           ## the type of the token
     modifier: TModifier
     literal: string          ## the parsed (string) literal
     charset: set[char]       ## if kind == tkCharSet
     index: int               ## if kind == tkBackref
-  
+
   PegLexer {.inheritable.} = object          ## the lexer object.
     bufpos: int               ## the current position within the buffer
     buf: cstring              ## the buffer itself
@@ -1086,7 +1082,7 @@ proc handleLF(L: var PegLexer, pos: int): int =
   result = pos+1
   L.lineStart = result
 
-proc init(L: var PegLexer, input, filename: string, line = 1, col = 0) = 
+proc init(L: var PegLexer, input, filename: string, line = 1, col = 0) =
   L.buf = input
   L.bufpos = 0
   L.lineNumber = line
@@ -1094,69 +1090,69 @@ proc init(L: var PegLexer, input, filename: string, line = 1, col = 0) =
   L.lineStart = 0
   L.filename = filename
 
-proc getColumn(L: PegLexer): int {.inline.} = 
+proc getColumn(L: PegLexer): int {.inline.} =
   result = abs(L.bufpos - L.lineStart) + L.colOffset
 
-proc getLine(L: PegLexer): int {.inline.} = 
+proc getLine(L: PegLexer): int {.inline.} =
   result = L.lineNumber
-  
+
 proc errorStr(L: PegLexer, msg: string, line = -1, col = -1): string =
   var line = if line < 0: getLine(L) else: line
   var col = if col < 0: getColumn(L) else: col
   result = "$1($2, $3) Error: $4" % [L.filename, $line, $col, msg]
 
-proc handleHexChar(c: var PegLexer, xi: var int) = 
+proc handleHexChar(c: var PegLexer, xi: var int) =
   case c.buf[c.bufpos]
-  of '0'..'9': 
+  of '0'..'9':
     xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0'))
     inc(c.bufpos)
-  of 'a'..'f': 
+  of 'a'..'f':
     xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10)
     inc(c.bufpos)
-  of 'A'..'F': 
+  of 'A'..'F':
     xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10)
     inc(c.bufpos)
   else: discard
 
-proc getEscapedChar(c: var PegLexer, tok: var TToken) = 
+proc getEscapedChar(c: var PegLexer, tok: var TToken) =
   inc(c.bufpos)
   case c.buf[c.bufpos]
-  of 'r', 'R', 'c', 'C': 
+  of 'r', 'R', 'c', 'C':
     add(tok.literal, '\c')
     inc(c.bufpos)
-  of 'l', 'L': 
+  of 'l', 'L':
     add(tok.literal, '\L')
     inc(c.bufpos)
-  of 'f', 'F': 
+  of 'f', 'F':
     add(tok.literal, '\f')
     inc(c.bufpos)
-  of 'e', 'E': 
+  of 'e', 'E':
     add(tok.literal, '\e')
     inc(c.bufpos)
-  of 'a', 'A': 
+  of 'a', 'A':
     add(tok.literal, '\a')
     inc(c.bufpos)
-  of 'b', 'B': 
+  of 'b', 'B':
     add(tok.literal, '\b')
     inc(c.bufpos)
-  of 'v', 'V': 
+  of 'v', 'V':
     add(tok.literal, '\v')
     inc(c.bufpos)
-  of 't', 'T': 
+  of 't', 'T':
     add(tok.literal, '\t')
     inc(c.bufpos)
-  of 'x', 'X': 
+  of 'x', 'X':
     inc(c.bufpos)
     var xi = 0
     handleHexChar(c, xi)
     handleHexChar(c, xi)
     if xi == 0: tok.kind = tkInvalid
     else: add(tok.literal, chr(xi))
-  of '0'..'9': 
+  of '0'..'9':
     var val = ord(c.buf[c.bufpos]) - ord('0')
     inc(c.bufpos)
     var i = 1
-    while (i <= 3) and (c.buf[c.bufpos] in {'0'..'9'}): 
+    while (i <= 3) and (c.buf[c.bufpos] in {'0'..'9'}):
       val = val * 10 + ord(c.buf[c.bufpos]) - ord('0')
       inc(c.bufpos)
       inc(i)
@@ -1169,32 +1165,32 @@ proc getEscapedChar(c: var PegLexer, tok: var TToken) =
   else:
     add(tok.literal, c.buf[c.bufpos])
     inc(c.bufpos)
-  
-proc skip(c: var PegLexer) = 
+
+proc skip(c: var PegLexer) =
   var pos = c.bufpos
   var buf = c.buf
-  while true: 
+  while true:
     case buf[pos]
-    of ' ', '\t': 
+    of ' ', '\t':
       inc(pos)
     of '#':
       while not (buf[pos] in {'\c', '\L', '\0'}): inc(pos)
     of '\c':
       pos = handleCR(c, pos)
       buf = c.buf
-    of '\L': 
+    of '\L':
       pos = handleLF(c, pos)
       buf = c.buf
-    else: 
+    else:
       break                   # EndOfFile also leaves the loop
   c.bufpos = pos
-  
-proc getString(c: var PegLexer, tok: var TToken) = 
+
+proc getString(c: var PegLexer, tok: var TToken) =
   tok.kind = tkStringLit
   var pos = c.bufpos + 1
   var buf = c.buf
   var quote = buf[pos-1]
-  while true: 
+  while true:
     case buf[pos]
     of '\\':
       c.bufpos = pos
@@ -1205,13 +1201,13 @@ proc getString(c: var PegLexer, tok: var TToken) =
       break
     elif buf[pos] == quote:
       inc(pos)
-      break      
+      break
     else:
       add(tok.literal, buf[pos])
       inc(pos)
   c.bufpos = pos
-  
-proc getDollar(c: var PegLexer, tok: var TToken) = 
+
+proc getDollar(c: var PegLexer, tok: var TToken) =
   var pos = c.bufpos + 1
   var buf = c.buf
   if buf[pos] in {'0'..'9'}:
@@ -1223,8 +1219,8 @@ proc getDollar(c: var PegLexer, tok: var TToken) =
   else:
     tok.kind = tkDollar
   c.bufpos = pos
-  
-proc getCharSet(c: var PegLexer, tok: var TToken) = 
+
+proc getCharSet(c: var PegLexer, tok: var TToken) =
   tok.kind = tkCharSet
   tok.charset = {}
   var pos = c.bufpos + 1
@@ -1247,7 +1243,7 @@ proc getCharSet(c: var PegLexer, tok: var TToken) =
     of '\C', '\L', '\0':
       tok.kind = tkInvalid
       break
-    else: 
+    else:
       ch = buf[pos]
       inc(pos)
     incl(tok.charset, ch)
@@ -1267,18 +1263,18 @@ proc getCharSet(c: var PegLexer, tok: var TToken) =
         of '\C', '\L', '\0':
           tok.kind = tkInvalid
           break
-        else: 
+        else:
           ch2 = buf[pos]
           inc(pos)
         for i in ord(ch)+1 .. ord(ch2):
           incl(tok.charset, chr(i))
   c.bufpos = pos
   if caret: tok.charset = {'\1'..'\xFF'} - tok.charset
-  
-proc getSymbol(c: var PegLexer, tok: var TToken) = 
+
+proc getSymbol(c: var PegLexer, tok: var TToken) =
   var pos = c.bufpos
   var buf = c.buf
-  while true: 
+  while true:
     add(tok.literal, buf[pos])
     inc(pos)
     if buf[pos] notin strutils.IdentChars: break
@@ -1294,7 +1290,7 @@ proc getBuiltin(c: var PegLexer, tok: var TToken) =
     tok.kind = tkEscaped
     getEscapedChar(c, tok) # may set tok.kind to tkInvalid
 
-proc getTok(c: var PegLexer, tok: var TToken) = 
+proc getTok(c: var PegLexer, tok: var TToken) =
   tok.kind = tkInvalid
   tok.modifier = modNone
   setLen(tok.literal, 0)
@@ -1309,11 +1305,11 @@ proc getTok(c: var PegLexer, tok: var TToken) =
     else:
       tok.kind = tkCurlyLe
       add(tok.literal, '{')
-  of '}': 
+  of '}':
     tok.kind = tkCurlyRi
     inc(c.bufpos)
     add(tok.literal, '}')
-  of '[': 
+  of '[':
     getCharSet(c, tok)
   of '(':
     tok.kind = tkParLe
@@ -1323,7 +1319,7 @@ proc getTok(c: var PegLexer, tok: var TToken) =
     tok.kind = tkParRi
     inc(c.bufpos)
     add(tok.literal, ')')
-  of '.': 
+  of '.':
     tok.kind = tkAny
     inc(c.bufpos)
     add(tok.literal, '.')
@@ -1331,16 +1327,16 @@ proc getTok(c: var PegLexer, tok: var TToken) =
     tok.kind = tkAnyRune
     inc(c.bufpos)
     add(tok.literal, '_')
-  of '\\': 
+  of '\\':
     getBuiltin(c, tok)
   of '\'', '"': getString(c, tok)
   of '$': getDollar(c, tok)
-  of '\0': 
+  of '\0':
     tok.kind = tkEof
     tok.literal = "[EOF]"
   of 'a'..'z', 'A'..'Z', '\128'..'\255':
     getSymbol(c, tok)
-    if c.buf[c.bufpos] in {'\'', '"'} or 
+    if c.buf[c.bufpos] in {'\'', '"'} or
         c.buf[c.bufpos] == '$' and c.buf[c.bufpos+1] in {'0'..'9'}:
       case tok.literal
       of "i": tok.modifier = modIgnoreCase
@@ -1388,7 +1384,7 @@ proc getTok(c: var PegLexer, tok: var TToken) =
     tok.kind = tkAt
     inc(c.bufpos)
     add(tok.literal, '@')
-    if c.buf[c.bufpos] == '@': 
+    if c.buf[c.bufpos] == '@':
       tok.kind = tkCurlyAt
       inc(c.bufpos)
       add(tok.literal, '@')
@@ -1407,7 +1403,7 @@ proc arrowIsNextTok(c: PegLexer): bool =
   result = c.buf[pos] == '<' and c.buf[pos+1] == '-'
 
 # ----------------------------- parser ----------------------------------------
-    
+
 type
   EInvalidPeg* = object of ValueError ## raised if an invalid
                                          ## PEG has been detected
@@ -1425,7 +1421,7 @@ proc pegError(p: PegParser, msg: string, line = -1, col = -1) =
   e.msg = errorStr(p, msg, line, col)
   raise e
 
-proc getTok(p: var PegParser) = 
+proc getTok(p: var PegParser) =
   getTok(p, p.tok)
   if p.tok.kind == tkInvalid: pegError(p, "invalid token")
 
@@ -1475,7 +1471,7 @@ proc builtin(p: var PegParser): Peg =
   of "white": result = unicodeWhitespace()
   else: pegError(p, "unknown built-in: " & p.tok.literal)
 
-proc token(terminal: Peg, p: PegParser): Peg = 
+proc token(terminal: Peg, p: PegParser): Peg =
   if p.skip.kind == pkEmpty: result = terminal
   else: result = sequence(p.skip, terminal)
 
@@ -1496,7 +1492,7 @@ proc primary(p: var PegParser): Peg =
   else: discard
   case p.tok.kind
   of tkIdentifier:
-    if p.identIsVerbatim: 
+    if p.identIsVerbatim:
       var m = p.tok.modifier
       if m == modNone: m = p.modifier
       result = modifiedTerm(p.tok.literal, m).token(p)
@@ -1539,17 +1535,17 @@ proc primary(p: var PegParser): Peg =
   of tkEscaped:
     result = term(p.tok.literal[0]).token(p)
     getTok(p)
-  of tkDollar: 
+  of tkDollar:
     result = endAnchor()
     getTok(p)
-  of tkHat: 
+  of tkHat:
     result = startAnchor()
     getTok(p)
   of tkBackref:
     var m = p.tok.modifier
     if m == modNone: m = p.modifier
     result = modifiedBackref(p.tok.index, m).token(p)
-    if p.tok.index < 0 or p.tok.index > p.captures: 
+    if p.tok.index < 0 or p.tok.index > p.captures:
       pegError(p, "invalid back reference index: " & $p.tok.index)
     getTok(p)
   else:
@@ -1573,7 +1569,7 @@ proc seqExpr(p: var PegParser): Peg =
   while true:
     case p.tok.kind
     of tkAmp, tkNot, tkAt, tkStringLit, tkCharSet, tkParLe, tkCurlyLe,
-       tkAny, tkAnyRune, tkBuiltin, tkEscaped, tkDollar, tkBackref, 
+       tkAny, tkAnyRune, tkBuiltin, tkEscaped, tkDollar, tkBackref,
        tkHat, tkCurlyAt:
       result = sequence(result, primary(p))
     of tkIdentifier:
@@ -1587,7 +1583,7 @@ proc parseExpr(p: var PegParser): Peg =
   while p.tok.kind == tkBar:
     getTok(p)
     result = result / seqExpr(p)
-  
+
 proc parseRule(p: var PegParser): NonTerminal =
   if p.tok.kind == tkIdentifier and arrowIsNextTok(p):
     result = getNonTerminal(p, p.tok.literal)
@@ -1601,7 +1597,7 @@ proc parseRule(p: var PegParser): NonTerminal =
     incl(result.flags, ntDeclared) # NOW inlining may be attempted
   else:
     pegError(p, "rule expected, but found: " & p.tok.literal)
-  
+
 proc rawParse(p: var PegParser): Peg =
   ## parses a rule or a PEG expression
   while p.tok.kind == tkBuiltin:
@@ -1680,27 +1676,29 @@ when isMainModule:
   assert(not match("W_HI_L", peg"\y 'while'"))
   assert(not match("W_HI_Le", peg"\y v'while'"))
   assert match("W_HI_Le", peg"y'while'")
-  
+
   assert($ +digits == $peg"\d+")
   assert "0158787".match(peg"\d+")
   assert "ABC 0232".match(peg"\w+\s+\d+")
   assert "ABC".match(peg"\d+ / \w+")
 
+  var accum: seq[string] = @[]
   for word in split("00232this02939is39an22example111", peg"\d+"):
-    writeln(stdout, word)
+    accum.add(word)
+  assert(accum == @["this", "is", "an", "example"])
 
   assert matchLen("key", ident) == 3
 
   var pattern = sequence(ident, *whitespace, term('='), *whitespace, ident)
   assert matchLen("key1=  cal9", pattern) == 11
-  
+
   var ws = newNonTerminal("ws", 1, 1)
   ws.rule = *whitespace
-  
+
   var expr = newNonTerminal("expr", 1, 1)
   expr.rule = sequence(capture(ident), *sequence(
                 nonterminal(ws), term('+'), nonterminal(ws), nonterminal(expr)))
-  
+
   var c: Captures
   var s = "a+b +  c +d+e+f"
   assert rawMatch(s, expr.rule, 0, c) == len(s)
@@ -1722,7 +1720,7 @@ when isMainModule:
     assert matches[0] == "abc"
   else:
     assert false
-  
+
   var g2 = peg"""S <- A B / C D
                  A <- 'a'+
                  B <- 'b'+
@@ -1748,18 +1746,20 @@ when isMainModule:
   else:
     assert false
 
+  accum = @[]
   for x in findAll("abcdef", peg".", 3):
-    echo x
+    accum.add(x)
+  assert(accum == @["d", "e", "f"])
 
   for x in findAll("abcdef", peg"^{.}", 3):
     assert x == "d"
-    
+
   if "f(a, b)" =~ peg"{[0-9]+} / ({\ident} '(' {@} ')')":
     assert matches[0] == "f"
     assert matches[1] == "a, b"
   else:
     assert false
-  
+
   assert match("eine übersicht und außerdem", peg"(\letter \white*)+")
   # ß is not a lower cased letter?!
   assert match("eine übersicht und auerdem", peg"(\lower \white*)+")
@@ -1783,3 +1783,9 @@ when isMainModule:
   if "foo" =~ peg"{'foo'}":
     assert matches[0] == "foo"
   else: assert false
+
+  let empty_test = peg"^\d*"
+  let str = "XYZ"
+
+  assert(str.find(empty_test) == 0)
+  assert(str.match(empty_test))