summary refs log tree commit diff stats
path: root/lib/pure/pegs.nim
diff options
context:
space:
mode:
authorAraq <rumpf_a@web.de>2010-11-07 23:52:41 +0100
committerAraq <rumpf_a@web.de>2010-11-07 23:52:41 +0100
commit8ee63f98364259b2d1b6c02d050e0efccecbcf9b (patch)
treec567fdd150083a5924c1182fb5377d7870828b81 /lib/pure/pegs.nim
parent7659739caf08e56fabf986932c7cd01151a5f40d (diff)
downloadNim-8ee63f98364259b2d1b6c02d050e0efccecbcf9b.tar.gz
pegs: captured search loop
Diffstat (limited to 'lib/pure/pegs.nim')
-rwxr-xr-xlib/pure/pegs.nim53
1 files changed, 49 insertions, 4 deletions
diff --git a/lib/pure/pegs.nim b/lib/pure/pegs.nim
index 70d8d8d37..0a373125d 100755
--- a/lib/pure/pegs.nim
+++ b/lib/pure/pegs.nim
@@ -58,6 +58,7 @@ type
     pkBackRefIgnoreCase,
     pkBackRefIgnoreStyle,
     pkSearch,           ## @a     --> Internal DSL: @a
+    pkCapturedSearch,   ## {@} a  --> Internal DSL: @@a
     pkRule,             ## a <- b
     pkList              ## a, b
   TNonTerminalFlag = enum
@@ -192,6 +193,11 @@ proc `@`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsSearch".} =
   ## constructs a "search" for the PEG `a`
   result.kind = pkSearch
   result.sons = @[a]
+
+proc `@@`*(a: TPeg): TPeg {.noSideEffect, rtl, 
+                            extern: "npgegsCapturedSearch".} =
+  result.kind = pkCapturedSearch
+  result.sons = @[a]
   
 when false:
   proc contains(a: TPeg, k: TPegKind): bool =
@@ -421,6 +427,9 @@ proc toStrAux(r: TPeg, res: var string) =
   of pkSearch:
     add(res, '@')
     toStrAux(r.sons[0], res)
+  of pkCapturedSearch:
+    add(res, "{@}")
+    toStrAux(r.sons[0], res)
   of pkCapture:
     add(res, '{')
     toStrAux(r.sons[0], res)    
@@ -558,6 +567,21 @@ proc m(s: string, p: TPeg, start: int, c: var TMatchClosure): int =
       inc(result)
     result = -1
     c.ml = oldMl
+  of pkCapturedSearch:
+    var idx = c.ml # reserve a slot for the subpattern
+    inc(c.ml)
+    result = 0
+    while start+result < s.len:
+      var x = m(s, p.sons[0], start+result, c)
+      if x >= 0:
+        if idx < maxSubpatterns:
+          c.matches[idx] = (start, start+result-1)
+        #else: silently ignore the capture
+        inc(result, x)
+        return
+      inc(result)
+    result = -1
+    c.ml = idx
   of pkGreedyRep:
     result = 0
     while true:
@@ -850,6 +874,7 @@ type
     tkParRi,            ## ')'
     tkCurlyLe,          ## '{'
     tkCurlyRi,          ## '}'
+    tkCurlyAt,          ## '{@}'
     tkArrow,            ## '<-'
     tkBar,              ## '/'
     tkStar,             ## '*'
@@ -880,7 +905,8 @@ type
 const
   tokKindToStr: array[TTokKind, string] = [
     "invalid", "[EOF]", ".", "_", "identifier", "string literal",
-    "character set", "(", ")", "{", "}", "<-", "/", "*", "+", "&", "!", "?",
+    "character set", "(", ")", "{", "}", "{@}",
+    "<-", "/", "*", "+", "&", "!", "?",
     "@", "built-in", "escaped", "$"
   ]
 
@@ -1112,9 +1138,14 @@ proc getTok(c: var TPegLexer, tok: var TToken) =
   skip(c)
   case c.buf[c.bufpos]
   of '{':
-    tok.kind = tkCurlyLe
     inc(c.bufpos)
-    add(tok.literal, '{')
+    if c.buf[c.bufpos] == '@' and c.buf[c.bufpos+1] == '}':
+      tok.kind = tkCurlyAt
+      inc(c.bufpos, 2)
+      add(tok.literal, "{@}")
+    else:
+      tok.kind = tkCurlyLe
+      add(tok.literal, '{')
   of '}': 
     tok.kind = tkCurlyRi
     inc(c.bufpos)
@@ -1193,6 +1224,10 @@ proc getTok(c: var TPegLexer, tok: var TToken) =
     tok.kind = tkAt
     inc(c.bufpos)
     add(tok.literal, '@')
+    if c.buf[c.bufpos] == '@': 
+      tok.kind = tkCurlyAt
+      inc(c.bufpos)
+      add(tok.literal, '@')
   else:
     add(tok.literal, c.buf[c.bufpos])
     inc(c.bufpos)
@@ -1261,6 +1296,9 @@ proc primary(p: var TPegParser): TPeg =
   of tkAt:
     getTok(p)
     return @primary(p)
+  of tkCurlyAt:
+    getTok(p)
+    return @@primary(p)
   else: nil
   case p.tok.kind
   of tkIdentifier:
@@ -1346,7 +1384,7 @@ proc seqExpr(p: var TPegParser): TPeg =
   while true:
     case p.tok.kind
     of tkAmp, tkNot, tkAt, tkStringLit, tkCharset, tkParLe, tkCurlyLe,
-       tkAny, tkAnyRune, tkBuiltin, tkEscaped, tkDollar:
+       tkAny, tkAnyRune, tkBuiltin, tkEscaped, tkDollar, tkCurlyAt:
       result = sequence(result, primary(p))
     of tkIdentifier:
       if not arrowIsNextTok(p):
@@ -1514,4 +1552,11 @@ when isMainModule:
 
   for x in findAll("abcdef", peg"{.}", 3):
     echo x
+    
+  if "f(a, b)" =~ peg"{[0-9]+} / ({\ident} '(' {@} ')')":
+    assert matches[0] == "f"
+    assert matches[1] == "a, b"
+  else:
+    assert false
+