summary refs log tree commit diff stats
path: root/lib/pure
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure')
-rwxr-xr-x[-rw-r--r--]lib/pure/browsers.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/cgi.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/colors.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/complex.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/dynlib.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/hashes.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/hashtabs.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/htmlparser.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/httpclient.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/httpserver.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/lexbase.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/logging.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/macros.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/math.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/md5.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/os.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/osproc.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/parsecfg.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/parsecsv.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/parseopt.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/parsesql.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/parseurl.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/parseutils.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/parsexml.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/pegs.nim109
-rwxr-xr-x[-rw-r--r--]lib/pure/re.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/regexprs.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/ropes.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/sockets.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/streams.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/strtabs.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/strutils.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/terminal.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/times.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/unicode.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/unidecode/gen.py0
-rwxr-xr-x[-rw-r--r--]lib/pure/unidecode/unidecode.dat0
-rwxr-xr-x[-rw-r--r--]lib/pure/unidecode/unidecode.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/variants.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/xmldom.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/xmldomparser.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/xmlgen.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/xmlparser.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/xmltree.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/yamllexer.nim0
45 files changed, 93 insertions, 16 deletions
diff --git a/lib/pure/browsers.nim b/lib/pure/browsers.nim
index 243c07dad..243c07dad 100644..100755
--- a/lib/pure/browsers.nim
+++ b/lib/pure/browsers.nim
diff --git a/lib/pure/cgi.nim b/lib/pure/cgi.nim
index 490ae926d..490ae926d 100644..100755
--- a/lib/pure/cgi.nim
+++ b/lib/pure/cgi.nim
diff --git a/lib/pure/colors.nim b/lib/pure/colors.nim
index 548f07381..548f07381 100644..100755
--- a/lib/pure/colors.nim
+++ b/lib/pure/colors.nim
diff --git a/lib/pure/complex.nim b/lib/pure/complex.nim
index f50ff4bd0..f50ff4bd0 100644..100755
--- a/lib/pure/complex.nim
+++ b/lib/pure/complex.nim
diff --git a/lib/pure/dynlib.nim b/lib/pure/dynlib.nim
index 592073e3d..592073e3d 100644..100755
--- a/lib/pure/dynlib.nim
+++ b/lib/pure/dynlib.nim
diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim
index 1593119bd..1593119bd 100644..100755
--- a/lib/pure/hashes.nim
+++ b/lib/pure/hashes.nim
diff --git a/lib/pure/hashtabs.nim b/lib/pure/hashtabs.nim
index 68d19d63b..68d19d63b 100644..100755
--- a/lib/pure/hashtabs.nim
+++ b/lib/pure/hashtabs.nim
diff --git a/lib/pure/htmlparser.nim b/lib/pure/htmlparser.nim
index 278bf9b90..278bf9b90 100644..100755
--- a/lib/pure/htmlparser.nim
+++ b/lib/pure/htmlparser.nim
diff --git a/lib/pure/httpclient.nim b/lib/pure/httpclient.nim
index 0f9054873..0f9054873 100644..100755
--- a/lib/pure/httpclient.nim
+++ b/lib/pure/httpclient.nim
diff --git a/lib/pure/httpserver.nim b/lib/pure/httpserver.nim
index 2c85d8137..2c85d8137 100644..100755
--- a/lib/pure/httpserver.nim
+++ b/lib/pure/httpserver.nim
diff --git a/lib/pure/lexbase.nim b/lib/pure/lexbase.nim
index bb207e92a..bb207e92a 100644..100755
--- a/lib/pure/lexbase.nim
+++ b/lib/pure/lexbase.nim
diff --git a/lib/pure/logging.nim b/lib/pure/logging.nim
index 6df39f50b..6df39f50b 100644..100755
--- a/lib/pure/logging.nim
+++ b/lib/pure/logging.nim
diff --git a/lib/pure/macros.nim b/lib/pure/macros.nim
index 677469ed2..677469ed2 100644..100755
--- a/lib/pure/macros.nim
+++ b/lib/pure/macros.nim
diff --git a/lib/pure/math.nim b/lib/pure/math.nim
index cf4b6d95c..cf4b6d95c 100644..100755
--- a/lib/pure/math.nim
+++ b/lib/pure/math.nim
diff --git a/lib/pure/md5.nim b/lib/pure/md5.nim
index e75f80b4c..e75f80b4c 100644..100755
--- a/lib/pure/md5.nim
+++ b/lib/pure/md5.nim
diff --git a/lib/pure/os.nim b/lib/pure/os.nim
index 4bb25098d..4bb25098d 100644..100755
--- a/lib/pure/os.nim
+++ b/lib/pure/os.nim
diff --git a/lib/pure/osproc.nim b/lib/pure/osproc.nim
index bbdea1eee..bbdea1eee 100644..100755
--- a/lib/pure/osproc.nim
+++ b/lib/pure/osproc.nim
diff --git a/lib/pure/parsecfg.nim b/lib/pure/parsecfg.nim
index c26dab099..c26dab099 100644..100755
--- a/lib/pure/parsecfg.nim
+++ b/lib/pure/parsecfg.nim
diff --git a/lib/pure/parsecsv.nim b/lib/pure/parsecsv.nim
index 5970f2090..5970f2090 100644..100755
--- a/lib/pure/parsecsv.nim
+++ b/lib/pure/parsecsv.nim
diff --git a/lib/pure/parseopt.nim b/lib/pure/parseopt.nim
index 8f4be98f4..8f4be98f4 100644..100755
--- a/lib/pure/parseopt.nim
+++ b/lib/pure/parseopt.nim
diff --git a/lib/pure/parsesql.nim b/lib/pure/parsesql.nim
index 2109c273a..2109c273a 100644..100755
--- a/lib/pure/parsesql.nim
+++ b/lib/pure/parsesql.nim
diff --git a/lib/pure/parseurl.nim b/lib/pure/parseurl.nim
index cd3bc621a..cd3bc621a 100644..100755
--- a/lib/pure/parseurl.nim
+++ b/lib/pure/parseurl.nim
diff --git a/lib/pure/parseutils.nim b/lib/pure/parseutils.nim
index 0f107793c..0f107793c 100644..100755
--- a/lib/pure/parseutils.nim
+++ b/lib/pure/parseutils.nim
diff --git a/lib/pure/parsexml.nim b/lib/pure/parsexml.nim
index 598ae6c68..598ae6c68 100644..100755
--- a/lib/pure/parsexml.nim
+++ b/lib/pure/parsexml.nim
diff --git a/lib/pure/pegs.nim b/lib/pure/pegs.nim
index 5ba0351ad..4f55a1883 100644..100755
--- a/lib/pure/pegs.nim
+++ b/lib/pure/pegs.nim
@@ -26,6 +26,8 @@ when useUnicode:
 
 const
   InlineThreshold = 5   ## number of leaves; -1 to disable inlining
+  MaxSubpatterns* = 10 ## defines the maximum number of subpatterns that
+                       ## can be captured. More subpatterns cannot be captured! 
 
 type
   TPegKind = enum
@@ -50,17 +52,20 @@ type
     pkAndPredicate,     ## &a     --> Internal DSL: &a
     pkNotPredicate,     ## !a     --> Internal DSL: !a
     pkCapture,          ## {a}    --> Internal DSL: capture(a)
+    pkBackRef,          ## $i     --> Internal DSL: backref(i)
+    pkBackRefIgnoreCase,
+    pkBackRefIgnoreStyle,
     pkSearch,           ## @a     --> Internal DSL: @a
     pkRule,             ## a <- b
     pkList              ## a, b
   TNonTerminalFlag = enum
     ntDeclared, ntUsed
   TNonTerminal {.final.} = object ## represents a non terminal symbol
-    name: string        ## the name of the symbol
-    line: int           ## the line the symbol has been declared/used in
-    col: int            ## the column the symbol has been declared/used in
-    flags: set[TNonTerminalFlag] ## the nonterminal's flags
-    rule: TNode         ## the rule that the symbol refers to
+    name: string                  ## the name of the symbol
+    line: int                     ## line the symbol has been declared/used in
+    col: int                      ## column the symbol has been declared/used in
+    flags: set[TNonTerminalFlag]  ## the nonterminal's flags
+    rule: TNode                   ## the rule that the symbol refers to
   TNode {.final.} = object
     case kind: TPegKind
     of pkEmpty, pkAny, pkAnyRune, pkGreedyAny, pkNewLine: nil
@@ -68,6 +73,7 @@ type
     of pkChar, pkGreedyRepChar: ch: char
     of pkCharChoice, pkGreedyRepSet: charChoice: ref set[char]
     of pkNonTerminal: nt: PNonTerminal
+    of pkBackRef..pkBackRefIgnoreStyle: index: range[1..MaxSubpatterns]
     else: sons: seq[TNode]
   PNonTerminal* = ref TNonTerminal
   
@@ -224,6 +230,24 @@ proc capture*(a: TPeg): TPeg =
   result.kind = pkCapture
   result.sons = @[a]
 
+proc backref*(index: range[1..MaxSubPatterns]): TPeg = 
+  ## constructs a back reference of the given `index`. `index` starts counting
+  ## from 1.
+  result.kind = pkBackRef
+  result.index = index-1
+
+proc backrefIgnoreCase*(index: range[1..MaxSubPatterns]): TPeg = 
+  ## constructs a back reference of the given `index`. `index` starts counting
+  ## from 1. Ignores case for matching.
+  result.kind = pkBackRefIgnoreCase
+  result.index = index-1
+
+proc backrefIgnoreStyle*(index: range[1..MaxSubPatterns]): TPeg = 
+  ## constructs a back reference of the given `index`. `index` starts counting
+  ## from 1. Ignores style for matching.
+  result.kind = pkBackRefIgnoreStyle
+  result.index = index-1
+
 proc spaceCost(n: TPeg): int =
   case n.kind
   of pkEmpty: nil
@@ -285,10 +309,6 @@ template natural*: expr =
   ## same as ``\d+``
   +digits
 
-const
-  MaxSubpatterns* = 10 ## defines the maximum number of subpatterns that
-                       ## can be captured. More subpatterns cannot be captured! 
-
 # ------------------------- debugging -----------------------------------------
 
 proc esc(c: char, reserved = {'\0'..'\255'}): string = 
@@ -394,6 +414,15 @@ proc toStrAux(r: TPeg, res: var string) =
     add(res, '{')
     toStrAux(r.sons[0], res)    
     add(res, '}')
+  of pkBackRef: 
+    add(res, '$')
+    add(res, $r.index)
+  of pkBackRefIgnoreCase: 
+    add(res, "i$")
+    add(res, $r.index)
+  of pkBackRefIgnoreStyle: 
+    add(res, "y$")
+    add(res, $r.index)
   of pkRule:
     toStrAux(r.sons[0], res)    
     add(res, " <- ")
@@ -559,6 +588,18 @@ proc m(s: string, p: TPeg, start: int, c: var TMatchClosure): int =
       #else: silently ignore the capture
     else:
       c.ml = idx
+  of pkBackRef: 
+    if p.index >= c.ml: return -1
+    var (a, b) = c.matches[p.index]
+    result = m(s, term(s.copy(a, b)), start, c)
+  of pkBackRefIgnoreCase:
+    if p.index >= c.ml: return -1
+    var (a, b) = c.matches[p.index]
+    result = m(s, termIgnoreCase(s.copy(a, b)), start, c)
+  of pkBackRefIgnoreStyle:
+    if p.index >= c.ml: return -1
+    var (a, b) = c.matches[p.index]
+    result = m(s, termIgnoreStyle(s.copy(a, b)), start, c)
   of pkRule, pkList: assert false
 
 proc match*(s: string, pattern: TPeg, matches: var openarray[string],
@@ -784,13 +825,15 @@ type
     tkOption,           ## '?'
     tkAt,               ## '@'
     tkBuiltin,          ## \identifier
-    tkEscaped           ## \\
+    tkEscaped,          ## \\
+    tkDollar            ## '$'
   
   TToken {.final.} = object  ## a token
     kind: TTokKind           ## the type of the token
     modifier: TModifier
     literal: string          ## the parsed (string) literal
     charset: set[char]       ## if kind == tkCharSet
+    index: int               ## if kind == tkDollar
   
   TPegLexer = object          ## the lexer object.
     bufpos: int               ## the current position within the buffer
@@ -804,7 +847,7 @@ const
   tokKindToStr: array[TTokKind, string] = [
     "invalid", "[EOF]", ".", "_", "identifier", "string literal",
     "character set", "(", ")", "{", "}", "<-", "/", "*", "+", "&", "!", "?",
-    "@", "built-in", "escaped"
+    "@", "built-in", "escaped", "$"
   ]
 
 proc HandleCR(L: var TPegLexer, pos: int): int =
@@ -945,6 +988,19 @@ proc getString(c: var TPegLexer, tok: var TToken) =
       Inc(pos)
   c.bufpos = pos
   
+proc getDollar(c: var TPegLexer, tok: var TToken) = 
+  var pos = c.bufPos + 1
+  var buf = c.buf
+  if buf[pos] in {'0'..'9'}:
+    tok.kind = tkDollar
+    tok.index = 0
+    while buf[pos] in {'0'..'9'}:
+      tok.index = tok.index * 10 + ord(buf[pos]) - ord('0')
+      inc(pos)
+  else:
+    tok.kind = tkInvalid
+  c.bufpos = pos
+  
 proc getCharSet(c: var TPegLexer, tok: var TToken) = 
   tok.kind = tkCharSet
   tok.charset = {}
@@ -1050,19 +1106,23 @@ proc getTok(c: var TPegLexer, tok: var TToken) =
   of '\\': 
     getBuiltin(c, tok)
   of '\'', '"': getString(c, tok)
+  of '$': getDollar(c, tok)
   of '\0': 
     tok.kind = tkEof
     tok.literal = "[EOF]"
   of 'a'..'z', 'A'..'Z', '\128'..'\255':
     getSymbol(c, tok)
-    if c.buf[c.bufpos] in {'\'', '"'}:
+    if c.buf[c.bufpos] in {'\'', '"', '$'}:
       case tok.literal
       of "i": tok.modifier = modIgnoreCase
       of "y": tok.modifier = modIgnoreStyle
       of "v": tok.modifier = modVerbatim
       else: nil
       setLen(tok.literal, 0)
-      getString(c, tok)
+      if c.buf[c.bufpos] == '$':
+        getDollar(c, tok)
+      else:
+        getString(c, tok)
       if tok.modifier == modNone: tok.kind = tkInvalid
   of '+':
     tok.kind = tkPlus
@@ -1117,8 +1177,7 @@ type
     tok: TToken
     nonterms: seq[PNonTerminal]
     modifier: TModifier
-
-proc getTok(p: var TPegParser) = getTok(p, p.tok)
+    captures: int
 
 proc pegError(p: TPegParser, msg: string, line = -1, col = -1) =
   var e: ref EInvalidPeg
@@ -1126,6 +1185,10 @@ proc pegError(p: TPegParser, msg: string, line = -1, col = -1) =
   e.msg = errorStr(p, msg, line, col)
   raise e
 
+proc getTok(p: var TPegParser) = 
+  getTok(p, p.tok)
+  if p.tok.kind == tkInvalid: pegError(p, "invalid token")
+
 proc eat(p: var TPegParser, kind: TTokKind) =
   if p.tok.kind == kind: getTok(p)
   else: pegError(p, tokKindToStr[kind] & " expected")
@@ -1146,6 +1209,12 @@ proc modifiedTerm(s: string, m: TModifier): TPeg =
   of modIgnoreCase: result = termIgnoreCase(s)
   of modIgnoreStyle: result = termIgnoreStyle(s)
 
+proc modifiedBackref(s: int, m: TModifier): TPeg =
+  case m
+  of modNone, modVerbatim: result = backRef(s)
+  of modIgnoreCase: result = backRefIgnoreCase(s)
+  of modIgnoreStyle: result = backRefIgnoreStyle(s)
+
 proc primary(p: var TPegParser): TPeg =
   case p.tok.kind
   of tkAmp:
@@ -1185,6 +1254,7 @@ proc primary(p: var TPegParser): TPeg =
     getTok(p)
     result = capture(parseExpr(p))
     eat(p, tkCurlyRi)
+    inc(p.captures)
   of tkAny:
     result = any()
     getTok(p)
@@ -1206,6 +1276,13 @@ proc primary(p: var TPegParser): TPeg =
   of tkEscaped:
     result = term(p.tok.literal[0])
     getTok(p)
+  of tkDollar:
+    var m = p.tok.modifier
+    if m == modNone: m = p.modifier
+    result = modifiedBackRef(p.tok.index, m)
+    if p.tok.index < 0 or p.tok.index > p.captures: 
+      pegError(p, "invalid back reference index: " & $p.tok.index)
+    getTok(p)
   else:
     pegError(p, "expression expected, but found: " & p.tok.literal)
     getTok(p) # we must consume a token here to prevent endless loops!
@@ -1227,7 +1304,7 @@ proc seqExpr(p: var TPegParser): TPeg =
   while true:
     case p.tok.kind
     of tkAmp, tkNot, tkAt, tkStringLit, tkCharset, tkParLe, tkCurlyLe,
-       tkAny, tkAnyRune, tkBuiltin, tkEscaped:
+       tkAny, tkAnyRune, tkBuiltin, tkEscaped, tkDollar:
       result = sequence(result, primary(p))
     of tkIdentifier:
       if not arrowIsNextTok(p):
diff --git a/lib/pure/re.nim b/lib/pure/re.nim
index 953f9c744..953f9c744 100644..100755
--- a/lib/pure/re.nim
+++ b/lib/pure/re.nim
diff --git a/lib/pure/regexprs.nim b/lib/pure/regexprs.nim
index 43c7f05be..43c7f05be 100644..100755
--- a/lib/pure/regexprs.nim
+++ b/lib/pure/regexprs.nim
diff --git a/lib/pure/ropes.nim b/lib/pure/ropes.nim
index df85baf92..df85baf92 100644..100755
--- a/lib/pure/ropes.nim
+++ b/lib/pure/ropes.nim
diff --git a/lib/pure/sockets.nim b/lib/pure/sockets.nim
index 85628db78..85628db78 100644..100755
--- a/lib/pure/sockets.nim
+++ b/lib/pure/sockets.nim
diff --git a/lib/pure/streams.nim b/lib/pure/streams.nim
index f4d2911fc..f4d2911fc 100644..100755
--- a/lib/pure/streams.nim
+++ b/lib/pure/streams.nim
diff --git a/lib/pure/strtabs.nim b/lib/pure/strtabs.nim
index 8ea59637a..8ea59637a 100644..100755
--- a/lib/pure/strtabs.nim
+++ b/lib/pure/strtabs.nim
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index 724d00ee9..724d00ee9 100644..100755
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
diff --git a/lib/pure/terminal.nim b/lib/pure/terminal.nim
index 42bd80cb4..42bd80cb4 100644..100755
--- a/lib/pure/terminal.nim
+++ b/lib/pure/terminal.nim
diff --git a/lib/pure/times.nim b/lib/pure/times.nim
index 70cb038a7..70cb038a7 100644..100755
--- a/lib/pure/times.nim
+++ b/lib/pure/times.nim
diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim
index 099509afe..099509afe 100644..100755
--- a/lib/pure/unicode.nim
+++ b/lib/pure/unicode.nim
diff --git a/lib/pure/unidecode/gen.py b/lib/pure/unidecode/gen.py
index 8da0136ff..8da0136ff 100644..100755
--- a/lib/pure/unidecode/gen.py
+++ b/lib/pure/unidecode/gen.py
diff --git a/lib/pure/unidecode/unidecode.dat b/lib/pure/unidecode/unidecode.dat
index 9dff0a4a9..9dff0a4a9 100644..100755
--- a/lib/pure/unidecode/unidecode.dat
+++ b/lib/pure/unidecode/unidecode.dat
diff --git a/lib/pure/unidecode/unidecode.nim b/lib/pure/unidecode/unidecode.nim
index a665dd73e..a665dd73e 100644..100755
--- a/lib/pure/unidecode/unidecode.nim
+++ b/lib/pure/unidecode/unidecode.nim
diff --git a/lib/pure/variants.nim b/lib/pure/variants.nim
index 0b4f078e7..0b4f078e7 100644..100755
--- a/lib/pure/variants.nim
+++ b/lib/pure/variants.nim
diff --git a/lib/pure/xmldom.nim b/lib/pure/xmldom.nim
index babf60108..babf60108 100644..100755
--- a/lib/pure/xmldom.nim
+++ b/lib/pure/xmldom.nim
diff --git a/lib/pure/xmldomparser.nim b/lib/pure/xmldomparser.nim
index f338ca2e5..f338ca2e5 100644..100755
--- a/lib/pure/xmldomparser.nim
+++ b/lib/pure/xmldomparser.nim
diff --git a/lib/pure/xmlgen.nim b/lib/pure/xmlgen.nim
index 29f2700f2..29f2700f2 100644..100755
--- a/lib/pure/xmlgen.nim
+++ b/lib/pure/xmlgen.nim
diff --git a/lib/pure/xmlparser.nim b/lib/pure/xmlparser.nim
index 635497fa8..635497fa8 100644..100755
--- a/lib/pure/xmlparser.nim
+++ b/lib/pure/xmlparser.nim
diff --git a/lib/pure/xmltree.nim b/lib/pure/xmltree.nim
index 7b77fe156..7b77fe156 100644..100755
--- a/lib/pure/xmltree.nim
+++ b/lib/pure/xmltree.nim
diff --git a/lib/pure/yamllexer.nim b/lib/pure/yamllexer.nim
index 4640179c1..4640179c1 100644..100755
--- a/lib/pure/yamllexer.nim
+++ b/lib/pure/yamllexer.nim