diff options
author | Araq <rumpf_a@web.de> | 2010-10-31 00:16:01 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2010-10-31 00:16:01 +0200 |
commit | ec67d98286fcc8c550c6c747c937c5a8fceac749 (patch) | |
tree | caa1e86a4d5de7fd7299d7cbdc8117e9c619607d /lib/pure/pegs.nim | |
parent | 11d3d94438056dd98cf4a49411b18c61bfc0cc56 (diff) | |
download | Nim-ec67d98286fcc8c550c6c747c937c5a8fceac749.tar.gz |
pegs: bugfixes and extensions
Diffstat (limited to 'lib/pure/pegs.nim')
-rwxr-xr-x | lib/pure/pegs.nim | 49 |
1 files changed, 39 insertions, 10 deletions
diff --git a/lib/pure/pegs.nim b/lib/pure/pegs.nim index 6942f97ef..70d8d8d37 100755 --- a/lib/pure/pegs.nim +++ b/lib/pure/pegs.nim @@ -27,7 +27,7 @@ when useUnicode: import unicode const - InlineThreshold = 5 ## number of leaves; -1 to disable inlining + InlineThreshold = 5 ## number of leaves; -1 to disable inlining MaxSubpatterns* = 10 ## defines the maximum number of subpatterns that ## can be captured. More subpatterns cannot be captured! @@ -652,7 +652,7 @@ proc find*(s: string, pattern: TPeg, matches: var openarray[string], ## returns the starting position of ``pattern`` in ``s`` and the captured ## substrings in the array ``matches``. If it does not match, nothing ## is written into ``matches`` and -1 is returned. - for i in 0 .. s.len-1: + for i in start .. s.len-1: if matchLen(s, pattern, matches, i) >= 0: return i return -1 # could also use the pattern here: (!P .)* P @@ -661,10 +661,28 @@ proc find*(s: string, pattern: TPeg, start = 0): int {.nosideEffect, rtl, extern: "npegs$1".} = ## returns the starting position of ``pattern`` in ``s``. If it does not ## match, -1 is returned. - for i in 0 .. s.len-1: + for i in start .. s.len-1: if matchLen(s, pattern, i) >= 0: return i return -1 +iterator findAll*(s: string, pattern: TPeg, start = 0): string = + ## yields all matching captures of pattern in `s`. + var matches: array[0..MaxSubpatterns-1, string] + var i = start + while i < s.len: + var L = matchLen(s, pattern, matches, i) + if L < 0: break + for k in 0..maxSubPatterns-1: + if isNil(matches[k]): break + yield matches[k] + inc(i, L) + +proc findAll*(s: string, pattern: TPeg, start = 0): seq[string] {. + nosideEffect, rtl, extern: "npegs$1".} = + ## returns all matching captures of pattern in `s`. + ## If it does not match, @[] is returned. + accumulateResult(findAll(s, pattern, start)) + template `=~`*(s: string, pattern: TPeg): expr = ## This calls ``match`` with an implicit declared ``matches`` array that ## can be used in the scope of the ``=~`` call: @@ -699,15 +717,15 @@ proc contains*(s: string, pattern: TPeg, matches: var openArray[string], ## same as ``find(s, pattern, matches, start) >= 0`` return find(s, pattern, matches, start) >= 0 -proc startsWith*(s: string, prefix: TPeg): bool {. +proc startsWith*(s: string, prefix: TPeg, start = 0): bool {. nosideEffect, rtl, extern: "npegs$1".} = ## returns true if `s` starts with the pattern `prefix` - result = matchLen(s, prefix) >= 0 + result = matchLen(s, prefix, start) >= 0 -proc endsWith*(s: string, suffix: TPeg): bool {. +proc endsWith*(s: string, suffix: TPeg, start = 0): bool {. nosideEffect, rtl, extern: "npegs$1".} = ## returns true if `s` ends with the pattern `prefix` - for i in 0 .. s.len-1: + for i in start .. s.len-1: if matchLen(s, suffix, i) == s.len - i: return true proc replace*(s: string, sub: TPeg, by: string): string {. @@ -1194,6 +1212,7 @@ type nonterms: seq[PNonTerminal] modifier: TModifier captures: int + identIsVerbatim: bool proc pegError(p: TPegParser, msg: string, line = -1, col = -1) = var e: ref EInvalidPeg @@ -1245,7 +1264,12 @@ proc primary(p: var TPegParser): TPeg = else: nil case p.tok.kind of tkIdentifier: - if not arrowIsNextTok(p): + if p.identIsVerbatim: + var m = p.tok.modifier + if m == modNone: m = p.modifier + result = modifiedTerm(p.tok.literal, m) + getTok(p) + elif not arrowIsNextTok(p): var nt = getNonTerminal(p, p.tok.literal) incl(nt.flags, ntUsed) result = nonTerminal(nt) @@ -1366,6 +1390,7 @@ proc rawParse(p: var TPegParser): TPeg = while p.tok.kind != tkEof: discard parseRule(p) else: + p.identIsVerbatim = true result = parseExpr(p) if p.tok.kind != tkEof: pegError(p, "EOF expected, but found: " & p.tok.literal) @@ -1384,6 +1409,7 @@ proc parsePeg*(input: string, filename = "pattern", line = 1, col = 0): TPeg = p.tok.literal = "" p.tok.charset = {} p.nonterms = @[] + p.identIsVerbatim = false getTok(p) result = rawParse(p) @@ -1453,7 +1479,7 @@ when isMainModule: #const filename = "lib/devel/peg/grammar.txt" #var grammar = parsePeg(newFileStream(filename, fmRead), filename) #echo "a <- [abc]*?".match(grammar) - assert find("_____abc_______", term("abc")) == 5 + assert find("_____abc_______", term("abc"), 2) == 5 assert match("_______ana", peg"A <- 'ana' / . A") assert match("abcs%%%", peg"A <- ..A / .A / '%'") @@ -1480,9 +1506,12 @@ when isMainModule: assert false var matches: array[0..5, string] - if match("abcdefg", peg"'c' {'d'} 'ef' {'g'}", matches, 2): + if match("abcdefg", peg"c {d} ef {g}", matches, 2): assert matches[0] == "d" assert matches[1] == "g" else: assert false + for x in findAll("abcdef", peg"{.}", 3): + echo x + |