diff options
Diffstat (limited to 'lib/pure')
-rwxr-xr-x[-rw-r--r--] | lib/pure/browsers.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/cgi.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/colors.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/complex.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/dynlib.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/hashes.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/hashtabs.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/htmlparser.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/httpclient.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/httpserver.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/lexbase.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/logging.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/macros.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/math.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/md5.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/os.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/osproc.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/parsecfg.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/parsecsv.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/parseopt.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/parsesql.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/parseurl.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/parseutils.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/parsexml.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/pegs.nim | 109 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/re.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/regexprs.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/ropes.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/sockets.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/streams.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/strtabs.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/strutils.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/terminal.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/times.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/unicode.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/unidecode/gen.py | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/unidecode/unidecode.dat | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/unidecode/unidecode.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/variants.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/xmldom.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/xmldomparser.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/xmlgen.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/xmlparser.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/xmltree.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/yamllexer.nim | 0 |
45 files changed, 93 insertions, 16 deletions
diff --git a/lib/pure/browsers.nim b/lib/pure/browsers.nim index 243c07dad..243c07dad 100644..100755 --- a/lib/pure/browsers.nim +++ b/lib/pure/browsers.nim diff --git a/lib/pure/cgi.nim b/lib/pure/cgi.nim index 490ae926d..490ae926d 100644..100755 --- a/lib/pure/cgi.nim +++ b/lib/pure/cgi.nim diff --git a/lib/pure/colors.nim b/lib/pure/colors.nim index 548f07381..548f07381 100644..100755 --- a/lib/pure/colors.nim +++ b/lib/pure/colors.nim diff --git a/lib/pure/complex.nim b/lib/pure/complex.nim index f50ff4bd0..f50ff4bd0 100644..100755 --- a/lib/pure/complex.nim +++ b/lib/pure/complex.nim diff --git a/lib/pure/dynlib.nim b/lib/pure/dynlib.nim index 592073e3d..592073e3d 100644..100755 --- a/lib/pure/dynlib.nim +++ b/lib/pure/dynlib.nim diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index 1593119bd..1593119bd 100644..100755 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim diff --git a/lib/pure/hashtabs.nim b/lib/pure/hashtabs.nim index 68d19d63b..68d19d63b 100644..100755 --- a/lib/pure/hashtabs.nim +++ b/lib/pure/hashtabs.nim diff --git a/lib/pure/htmlparser.nim b/lib/pure/htmlparser.nim index 278bf9b90..278bf9b90 100644..100755 --- a/lib/pure/htmlparser.nim +++ b/lib/pure/htmlparser.nim diff --git a/lib/pure/httpclient.nim b/lib/pure/httpclient.nim index 0f9054873..0f9054873 100644..100755 --- a/lib/pure/httpclient.nim +++ b/lib/pure/httpclient.nim diff --git a/lib/pure/httpserver.nim b/lib/pure/httpserver.nim index 2c85d8137..2c85d8137 100644..100755 --- a/lib/pure/httpserver.nim +++ b/lib/pure/httpserver.nim diff --git a/lib/pure/lexbase.nim b/lib/pure/lexbase.nim index bb207e92a..bb207e92a 100644..100755 --- a/lib/pure/lexbase.nim +++ b/lib/pure/lexbase.nim diff --git a/lib/pure/logging.nim b/lib/pure/logging.nim index 6df39f50b..6df39f50b 100644..100755 --- a/lib/pure/logging.nim +++ b/lib/pure/logging.nim diff --git a/lib/pure/macros.nim b/lib/pure/macros.nim index 677469ed2..677469ed2 100644..100755 --- a/lib/pure/macros.nim +++ b/lib/pure/macros.nim diff --git a/lib/pure/math.nim b/lib/pure/math.nim index cf4b6d95c..cf4b6d95c 100644..100755 --- a/lib/pure/math.nim +++ b/lib/pure/math.nim diff --git a/lib/pure/md5.nim b/lib/pure/md5.nim index e75f80b4c..e75f80b4c 100644..100755 --- a/lib/pure/md5.nim +++ b/lib/pure/md5.nim diff --git a/lib/pure/os.nim b/lib/pure/os.nim index 4bb25098d..4bb25098d 100644..100755 --- a/lib/pure/os.nim +++ b/lib/pure/os.nim diff --git a/lib/pure/osproc.nim b/lib/pure/osproc.nim index bbdea1eee..bbdea1eee 100644..100755 --- a/lib/pure/osproc.nim +++ b/lib/pure/osproc.nim diff --git a/lib/pure/parsecfg.nim b/lib/pure/parsecfg.nim index c26dab099..c26dab099 100644..100755 --- a/lib/pure/parsecfg.nim +++ b/lib/pure/parsecfg.nim diff --git a/lib/pure/parsecsv.nim b/lib/pure/parsecsv.nim index 5970f2090..5970f2090 100644..100755 --- a/lib/pure/parsecsv.nim +++ b/lib/pure/parsecsv.nim diff --git a/lib/pure/parseopt.nim b/lib/pure/parseopt.nim index 8f4be98f4..8f4be98f4 100644..100755 --- a/lib/pure/parseopt.nim +++ b/lib/pure/parseopt.nim diff --git a/lib/pure/parsesql.nim b/lib/pure/parsesql.nim index 2109c273a..2109c273a 100644..100755 --- a/lib/pure/parsesql.nim +++ b/lib/pure/parsesql.nim diff --git a/lib/pure/parseurl.nim b/lib/pure/parseurl.nim index cd3bc621a..cd3bc621a 100644..100755 --- a/lib/pure/parseurl.nim +++ b/lib/pure/parseurl.nim diff --git a/lib/pure/parseutils.nim b/lib/pure/parseutils.nim index 0f107793c..0f107793c 100644..100755 --- a/lib/pure/parseutils.nim +++ b/lib/pure/parseutils.nim diff --git a/lib/pure/parsexml.nim b/lib/pure/parsexml.nim index 598ae6c68..598ae6c68 100644..100755 --- a/lib/pure/parsexml.nim +++ b/lib/pure/parsexml.nim diff --git a/lib/pure/pegs.nim b/lib/pure/pegs.nim index 5ba0351ad..4f55a1883 100644..100755 --- a/lib/pure/pegs.nim +++ b/lib/pure/pegs.nim @@ -26,6 +26,8 @@ when useUnicode: const InlineThreshold = 5 ## number of leaves; -1 to disable inlining + MaxSubpatterns* = 10 ## defines the maximum number of subpatterns that + ## can be captured. More subpatterns cannot be captured! type TPegKind = enum @@ -50,17 +52,20 @@ type pkAndPredicate, ## &a --> Internal DSL: &a pkNotPredicate, ## !a --> Internal DSL: !a pkCapture, ## {a} --> Internal DSL: capture(a) + pkBackRef, ## $i --> Internal DSL: backref(i) + pkBackRefIgnoreCase, + pkBackRefIgnoreStyle, pkSearch, ## @a --> Internal DSL: @a pkRule, ## a <- b pkList ## a, b TNonTerminalFlag = enum ntDeclared, ntUsed TNonTerminal {.final.} = object ## represents a non terminal symbol - name: string ## the name of the symbol - line: int ## the line the symbol has been declared/used in - col: int ## the column the symbol has been declared/used in - flags: set[TNonTerminalFlag] ## the nonterminal's flags - rule: TNode ## the rule that the symbol refers to + name: string ## the name of the symbol + line: int ## line the symbol has been declared/used in + col: int ## column the symbol has been declared/used in + flags: set[TNonTerminalFlag] ## the nonterminal's flags + rule: TNode ## the rule that the symbol refers to TNode {.final.} = object case kind: TPegKind of pkEmpty, pkAny, pkAnyRune, pkGreedyAny, pkNewLine: nil @@ -68,6 +73,7 @@ type of pkChar, pkGreedyRepChar: ch: char of pkCharChoice, pkGreedyRepSet: charChoice: ref set[char] of pkNonTerminal: nt: PNonTerminal + of pkBackRef..pkBackRefIgnoreStyle: index: range[1..MaxSubpatterns] else: sons: seq[TNode] PNonTerminal* = ref TNonTerminal @@ -224,6 +230,24 @@ proc capture*(a: TPeg): TPeg = result.kind = pkCapture result.sons = @[a] +proc backref*(index: range[1..MaxSubPatterns]): TPeg = + ## constructs a back reference of the given `index`. `index` starts counting + ## from 1. + result.kind = pkBackRef + result.index = index-1 + +proc backrefIgnoreCase*(index: range[1..MaxSubPatterns]): TPeg = + ## constructs a back reference of the given `index`. `index` starts counting + ## from 1. Ignores case for matching. + result.kind = pkBackRefIgnoreCase + result.index = index-1 + +proc backrefIgnoreStyle*(index: range[1..MaxSubPatterns]): TPeg = + ## constructs a back reference of the given `index`. `index` starts counting + ## from 1. Ignores style for matching. + result.kind = pkBackRefIgnoreStyle + result.index = index-1 + proc spaceCost(n: TPeg): int = case n.kind of pkEmpty: nil @@ -285,10 +309,6 @@ template natural*: expr = ## same as ``\d+`` +digits -const - MaxSubpatterns* = 10 ## defines the maximum number of subpatterns that - ## can be captured. More subpatterns cannot be captured! - # ------------------------- debugging ----------------------------------------- proc esc(c: char, reserved = {'\0'..'\255'}): string = @@ -394,6 +414,15 @@ proc toStrAux(r: TPeg, res: var string) = add(res, '{') toStrAux(r.sons[0], res) add(res, '}') + of pkBackRef: + add(res, '$') + add(res, $r.index) + of pkBackRefIgnoreCase: + add(res, "i$") + add(res, $r.index) + of pkBackRefIgnoreStyle: + add(res, "y$") + add(res, $r.index) of pkRule: toStrAux(r.sons[0], res) add(res, " <- ") @@ -559,6 +588,18 @@ proc m(s: string, p: TPeg, start: int, c: var TMatchClosure): int = #else: silently ignore the capture else: c.ml = idx + of pkBackRef: + if p.index >= c.ml: return -1 + var (a, b) = c.matches[p.index] + result = m(s, term(s.copy(a, b)), start, c) + of pkBackRefIgnoreCase: + if p.index >= c.ml: return -1 + var (a, b) = c.matches[p.index] + result = m(s, termIgnoreCase(s.copy(a, b)), start, c) + of pkBackRefIgnoreStyle: + if p.index >= c.ml: return -1 + var (a, b) = c.matches[p.index] + result = m(s, termIgnoreStyle(s.copy(a, b)), start, c) of pkRule, pkList: assert false proc match*(s: string, pattern: TPeg, matches: var openarray[string], @@ -784,13 +825,15 @@ type tkOption, ## '?' tkAt, ## '@' tkBuiltin, ## \identifier - tkEscaped ## \\ + tkEscaped, ## \\ + tkDollar ## '$' TToken {.final.} = object ## a token kind: TTokKind ## the type of the token modifier: TModifier literal: string ## the parsed (string) literal charset: set[char] ## if kind == tkCharSet + index: int ## if kind == tkDollar TPegLexer = object ## the lexer object. bufpos: int ## the current position within the buffer @@ -804,7 +847,7 @@ const tokKindToStr: array[TTokKind, string] = [ "invalid", "[EOF]", ".", "_", "identifier", "string literal", "character set", "(", ")", "{", "}", "<-", "/", "*", "+", "&", "!", "?", - "@", "built-in", "escaped" + "@", "built-in", "escaped", "$" ] proc HandleCR(L: var TPegLexer, pos: int): int = @@ -945,6 +988,19 @@ proc getString(c: var TPegLexer, tok: var TToken) = Inc(pos) c.bufpos = pos +proc getDollar(c: var TPegLexer, tok: var TToken) = + var pos = c.bufPos + 1 + var buf = c.buf + if buf[pos] in {'0'..'9'}: + tok.kind = tkDollar + tok.index = 0 + while buf[pos] in {'0'..'9'}: + tok.index = tok.index * 10 + ord(buf[pos]) - ord('0') + inc(pos) + else: + tok.kind = tkInvalid + c.bufpos = pos + proc getCharSet(c: var TPegLexer, tok: var TToken) = tok.kind = tkCharSet tok.charset = {} @@ -1050,19 +1106,23 @@ proc getTok(c: var TPegLexer, tok: var TToken) = of '\\': getBuiltin(c, tok) of '\'', '"': getString(c, tok) + of '$': getDollar(c, tok) of '\0': tok.kind = tkEof tok.literal = "[EOF]" of 'a'..'z', 'A'..'Z', '\128'..'\255': getSymbol(c, tok) - if c.buf[c.bufpos] in {'\'', '"'}: + if c.buf[c.bufpos] in {'\'', '"', '$'}: case tok.literal of "i": tok.modifier = modIgnoreCase of "y": tok.modifier = modIgnoreStyle of "v": tok.modifier = modVerbatim else: nil setLen(tok.literal, 0) - getString(c, tok) + if c.buf[c.bufpos] == '$': + getDollar(c, tok) + else: + getString(c, tok) if tok.modifier == modNone: tok.kind = tkInvalid of '+': tok.kind = tkPlus @@ -1117,8 +1177,7 @@ type tok: TToken nonterms: seq[PNonTerminal] modifier: TModifier - -proc getTok(p: var TPegParser) = getTok(p, p.tok) + captures: int proc pegError(p: TPegParser, msg: string, line = -1, col = -1) = var e: ref EInvalidPeg @@ -1126,6 +1185,10 @@ proc pegError(p: TPegParser, msg: string, line = -1, col = -1) = e.msg = errorStr(p, msg, line, col) raise e +proc getTok(p: var TPegParser) = + getTok(p, p.tok) + if p.tok.kind == tkInvalid: pegError(p, "invalid token") + proc eat(p: var TPegParser, kind: TTokKind) = if p.tok.kind == kind: getTok(p) else: pegError(p, tokKindToStr[kind] & " expected") @@ -1146,6 +1209,12 @@ proc modifiedTerm(s: string, m: TModifier): TPeg = of modIgnoreCase: result = termIgnoreCase(s) of modIgnoreStyle: result = termIgnoreStyle(s) +proc modifiedBackref(s: int, m: TModifier): TPeg = + case m + of modNone, modVerbatim: result = backRef(s) + of modIgnoreCase: result = backRefIgnoreCase(s) + of modIgnoreStyle: result = backRefIgnoreStyle(s) + proc primary(p: var TPegParser): TPeg = case p.tok.kind of tkAmp: @@ -1185,6 +1254,7 @@ proc primary(p: var TPegParser): TPeg = getTok(p) result = capture(parseExpr(p)) eat(p, tkCurlyRi) + inc(p.captures) of tkAny: result = any() getTok(p) @@ -1206,6 +1276,13 @@ proc primary(p: var TPegParser): TPeg = of tkEscaped: result = term(p.tok.literal[0]) getTok(p) + of tkDollar: + var m = p.tok.modifier + if m == modNone: m = p.modifier + result = modifiedBackRef(p.tok.index, m) + if p.tok.index < 0 or p.tok.index > p.captures: + pegError(p, "invalid back reference index: " & $p.tok.index) + getTok(p) else: pegError(p, "expression expected, but found: " & p.tok.literal) getTok(p) # we must consume a token here to prevent endless loops! @@ -1227,7 +1304,7 @@ proc seqExpr(p: var TPegParser): TPeg = while true: case p.tok.kind of tkAmp, tkNot, tkAt, tkStringLit, tkCharset, tkParLe, tkCurlyLe, - tkAny, tkAnyRune, tkBuiltin, tkEscaped: + tkAny, tkAnyRune, tkBuiltin, tkEscaped, tkDollar: result = sequence(result, primary(p)) of tkIdentifier: if not arrowIsNextTok(p): diff --git a/lib/pure/re.nim b/lib/pure/re.nim index 953f9c744..953f9c744 100644..100755 --- a/lib/pure/re.nim +++ b/lib/pure/re.nim diff --git a/lib/pure/regexprs.nim b/lib/pure/regexprs.nim index 43c7f05be..43c7f05be 100644..100755 --- a/lib/pure/regexprs.nim +++ b/lib/pure/regexprs.nim diff --git a/lib/pure/ropes.nim b/lib/pure/ropes.nim index df85baf92..df85baf92 100644..100755 --- a/lib/pure/ropes.nim +++ b/lib/pure/ropes.nim diff --git a/lib/pure/sockets.nim b/lib/pure/sockets.nim index 85628db78..85628db78 100644..100755 --- a/lib/pure/sockets.nim +++ b/lib/pure/sockets.nim diff --git a/lib/pure/streams.nim b/lib/pure/streams.nim index f4d2911fc..f4d2911fc 100644..100755 --- a/lib/pure/streams.nim +++ b/lib/pure/streams.nim diff --git a/lib/pure/strtabs.nim b/lib/pure/strtabs.nim index 8ea59637a..8ea59637a 100644..100755 --- a/lib/pure/strtabs.nim +++ b/lib/pure/strtabs.nim diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index 724d00ee9..724d00ee9 100644..100755 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim diff --git a/lib/pure/terminal.nim b/lib/pure/terminal.nim index 42bd80cb4..42bd80cb4 100644..100755 --- a/lib/pure/terminal.nim +++ b/lib/pure/terminal.nim diff --git a/lib/pure/times.nim b/lib/pure/times.nim index 70cb038a7..70cb038a7 100644..100755 --- a/lib/pure/times.nim +++ b/lib/pure/times.nim diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim index 099509afe..099509afe 100644..100755 --- a/lib/pure/unicode.nim +++ b/lib/pure/unicode.nim diff --git a/lib/pure/unidecode/gen.py b/lib/pure/unidecode/gen.py index 8da0136ff..8da0136ff 100644..100755 --- a/lib/pure/unidecode/gen.py +++ b/lib/pure/unidecode/gen.py diff --git a/lib/pure/unidecode/unidecode.dat b/lib/pure/unidecode/unidecode.dat index 9dff0a4a9..9dff0a4a9 100644..100755 --- a/lib/pure/unidecode/unidecode.dat +++ b/lib/pure/unidecode/unidecode.dat diff --git a/lib/pure/unidecode/unidecode.nim b/lib/pure/unidecode/unidecode.nim index a665dd73e..a665dd73e 100644..100755 --- a/lib/pure/unidecode/unidecode.nim +++ b/lib/pure/unidecode/unidecode.nim diff --git a/lib/pure/variants.nim b/lib/pure/variants.nim index 0b4f078e7..0b4f078e7 100644..100755 --- a/lib/pure/variants.nim +++ b/lib/pure/variants.nim diff --git a/lib/pure/xmldom.nim b/lib/pure/xmldom.nim index babf60108..babf60108 100644..100755 --- a/lib/pure/xmldom.nim +++ b/lib/pure/xmldom.nim diff --git a/lib/pure/xmldomparser.nim b/lib/pure/xmldomparser.nim index f338ca2e5..f338ca2e5 100644..100755 --- a/lib/pure/xmldomparser.nim +++ b/lib/pure/xmldomparser.nim diff --git a/lib/pure/xmlgen.nim b/lib/pure/xmlgen.nim index 29f2700f2..29f2700f2 100644..100755 --- a/lib/pure/xmlgen.nim +++ b/lib/pure/xmlgen.nim diff --git a/lib/pure/xmlparser.nim b/lib/pure/xmlparser.nim index 635497fa8..635497fa8 100644..100755 --- a/lib/pure/xmlparser.nim +++ b/lib/pure/xmlparser.nim diff --git a/lib/pure/xmltree.nim b/lib/pure/xmltree.nim index 7b77fe156..7b77fe156 100644..100755 --- a/lib/pure/xmltree.nim +++ b/lib/pure/xmltree.nim diff --git a/lib/pure/yamllexer.nim b/lib/pure/yamllexer.nim index 4640179c1..4640179c1 100644..100755 --- a/lib/pure/yamllexer.nim +++ b/lib/pure/yamllexer.nim |