diff options
Diffstat (limited to 'lib/pure/pegs.nim')
-rw-r--r-- | lib/pure/pegs.nim | 76 |
1 files changed, 45 insertions, 31 deletions
diff --git a/lib/pure/pegs.nim b/lib/pure/pegs.nim index 830429842..39c5790ed 100644 --- a/lib/pure/pegs.nim +++ b/lib/pure/pegs.nim @@ -1010,14 +1010,18 @@ proc replace*(s: string, sub: Peg, cb: proc( inc(m) add(result, substr(s, i)) -proc transformFile*(infile, outfile: string, - subs: varargs[tuple[pattern: Peg, repl: string]]) {. - rtl, extern: "npegs$1".} = - ## reads in the file `infile`, performs a parallel replacement (calls - ## `parallelReplace`) and writes back to `outfile`. Raises ``EIO`` if an - ## error occurs. This is supposed to be used for quick scripting. - var x = readFile(infile).string - writeFile(outfile, x.parallelReplace(subs)) +when not defined(js): + proc transformFile*(infile, outfile: string, + subs: varargs[tuple[pattern: Peg, repl: string]]) {. + rtl, extern: "npegs$1".} = + ## reads in the file `infile`, performs a parallel replacement (calls + ## `parallelReplace`) and writes back to `outfile`. Raises ``EIO`` if an + ## error occurs. This is supposed to be used for quick scripting. + ## + ## **Note**: this proc does not exist while using the JS backend. + var x = readFile(infile).string + writeFile(outfile, x.parallelReplace(subs)) + iterator split*(s: string, sep: Peg): string = ## Splits the string `s` into substrings. @@ -1121,7 +1125,7 @@ proc handleCR(L: var PegLexer, pos: int): int = assert(L.buf[pos] == '\c') inc(L.lineNumber) result = pos+1 - if L.buf[result] == '\L': inc(result) + if result < L.buf.len and L.buf[result] == '\L': inc(result) L.lineStart = result proc handleLF(L: var PegLexer, pos: int): int = @@ -1217,12 +1221,13 @@ proc getEscapedChar(c: var PegLexer, tok: var Token) = proc skip(c: var PegLexer) = var pos = c.bufpos var buf = c.buf - while true: + while pos < c.buf.len: case buf[pos] of ' ', '\t': inc(pos) of '#': - while not (buf[pos] in {'\c', '\L', '\0'}): inc(pos) + while (pos < c.buf.len) and + not (buf[pos] in {'\c', '\L', '\0'}): inc(pos) of '\c': pos = handleCR(c, pos) buf = c.buf @@ -1238,7 +1243,7 @@ proc getString(c: var PegLexer, tok: var Token) = var pos = c.bufpos + 1 var buf = c.buf var quote = buf[pos-1] - while true: + while pos < c.buf.len: case buf[pos] of '\\': c.bufpos = pos @@ -1261,7 +1266,7 @@ proc getDollar(c: var PegLexer, tok: var Token) = if buf[pos] in {'0'..'9'}: tok.kind = tkBackref tok.index = 0 - while buf[pos] in {'0'..'9'}: + while pos < c.buf.len and buf[pos] in {'0'..'9'}: tok.index = tok.index * 10 + ord(buf[pos]) - ord('0') inc(pos) else: @@ -1277,11 +1282,11 @@ proc getCharSet(c: var PegLexer, tok: var Token) = if buf[pos] == '^': inc(pos) caret = true - while true: + while pos < c.buf.len: var ch: char case buf[pos] of ']': - inc(pos) + if pos < c.buf.len: inc(pos) break of '\\': c.bufpos = pos @@ -1296,11 +1301,14 @@ proc getCharSet(c: var PegLexer, tok: var Token) = inc(pos) incl(tok.charset, ch) if buf[pos] == '-': - if buf[pos+1] == ']': + if pos+1 < c.buf.len and buf[pos+1] == ']': incl(tok.charset, '-') inc(pos) else: - inc(pos) + if pos+1 < c.buf.len: + inc(pos) + else: + break var ch2: char case buf[pos] of '\\': @@ -1312,8 +1320,11 @@ proc getCharSet(c: var PegLexer, tok: var Token) = tok.kind = tkInvalid break else: - ch2 = buf[pos] - inc(pos) + if pos+1 < c.buf.len: + ch2 = buf[pos] + inc(pos) + else: + break for i in ord(ch)+1 .. ord(ch2): incl(tok.charset, chr(i)) c.bufpos = pos @@ -1322,15 +1333,15 @@ proc getCharSet(c: var PegLexer, tok: var Token) = proc getSymbol(c: var PegLexer, tok: var Token) = var pos = c.bufpos var buf = c.buf - while true: + while pos < c.buf.len: add(tok.literal, buf[pos]) inc(pos) - if buf[pos] notin strutils.IdentChars: break + if pos < buf.len and buf[pos] notin strutils.IdentChars: break c.bufpos = pos tok.kind = tkIdentifier proc getBuiltin(c: var PegLexer, tok: var Token) = - if c.buf[c.bufpos+1] in strutils.Letters: + if c.bufpos+1 < c.buf.len and c.buf[c.bufpos+1] in strutils.Letters: inc(c.bufpos) getSymbol(c, tok) tok.kind = tkBuiltin @@ -1343,10 +1354,12 @@ proc getTok(c: var PegLexer, tok: var Token) = tok.modifier = modNone setLen(tok.literal, 0) skip(c) + case c.buf[c.bufpos] of '{': inc(c.bufpos) - if c.buf[c.bufpos] == '@' and c.buf[c.bufpos+1] == '}': + if c.buf[c.bufpos] == '@' and c.bufpos+2 < c.buf.len and + c.buf[c.bufpos+1] == '}': tok.kind = tkCurlyAt inc(c.bufpos, 2) add(tok.literal, "{@}") @@ -1379,13 +1392,11 @@ proc getTok(c: var PegLexer, tok: var Token) = getBuiltin(c, tok) of '\'', '"': getString(c, tok) of '$': getDollar(c, tok) - of '\0': - tok.kind = tkEof - tok.literal = "[EOF]" of 'a'..'z', 'A'..'Z', '\128'..'\255': getSymbol(c, tok) if c.buf[c.bufpos] in {'\'', '"'} or - c.buf[c.bufpos] == '$' and c.buf[c.bufpos+1] in {'0'..'9'}: + c.buf[c.bufpos] == '$' and c.bufpos+1 < c.buf.len and + c.buf[c.bufpos+1] in {'0'..'9'}: case tok.literal of "i": tok.modifier = modIgnoreCase of "y": tok.modifier = modIgnoreStyle @@ -1406,7 +1417,7 @@ proc getTok(c: var PegLexer, tok: var Token) = inc(c.bufpos) add(tok.literal, '+') of '<': - if c.buf[c.bufpos+1] == '-': + if c.bufpos+2 < c.buf.len and c.buf[c.bufpos+1] == '-': inc(c.bufpos, 2) tok.kind = tkArrow add(tok.literal, "<-") @@ -1441,14 +1452,17 @@ proc getTok(c: var PegLexer, tok: var Token) = inc(c.bufpos) add(tok.literal, '^') else: + if c.bufpos >= c.buf.len: + tok.kind = tkEof + tok.literal = "[EOF]" add(tok.literal, c.buf[c.bufpos]) inc(c.bufpos) proc arrowIsNextTok(c: PegLexer): bool = # the only look ahead we need var pos = c.bufpos - while c.buf[pos] in {'\t', ' '}: inc(pos) - result = c.buf[pos] == '<' and c.buf[pos+1] == '-' + while pos < c.buf.len and c.buf[pos] in {'\t', ' '}: inc(pos) + result = c.buf[pos] == '<' and (pos+1 < c.buf.len) and c.buf[pos+1] == '-' # ----------------------------- parser ---------------------------------------- @@ -1471,7 +1485,7 @@ proc pegError(p: PegParser, msg: string, line = -1, col = -1) = proc getTok(p: var PegParser) = getTok(p, p.tok) - if p.tok.kind == tkInvalid: pegError(p, "invalid token") + if p.tok.kind == tkInvalid: pegError(p, "'" & p.tok.literal & "' is invalid token") proc eat(p: var PegParser, kind: TokKind) = if p.tok.kind == kind: getTok(p) |