diff options
author | Araq <rumpf_a@web.de> | 2018-10-18 17:28:00 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2018-10-18 17:28:00 +0200 |
commit | c64391e30b22b7ca2c97adde22e6258e5d2463fb (patch) | |
tree | 7cc2bb7968fcbf1c13e552a3df791e184f51c316 | |
parent | 68c6d709d36beafd3bf44c15de399adb9c4f79fe (diff) | |
download | Nim-c64391e30b22b7ca2c97adde22e6258e5d2463fb.tar.gz |
closes #6220
-rw-r--r-- | tests/generics/tparser_generator.nim | 415 |
1 files changed, 415 insertions, 0 deletions
diff --git a/tests/generics/tparser_generator.nim b/tests/generics/tparser_generator.nim new file mode 100644 index 000000000..01ddd29b8 --- /dev/null +++ b/tests/generics/tparser_generator.nim @@ -0,0 +1,415 @@ +discard """ + output: '''Match failed: spam +Match failed: ham''' +""" + +# bug #6220 + +import nre +import options +import strutils except isAlpha, isLower, isUpper, isSpace +from unicode import isAlpha, isLower, isUpper, isTitle, isWhiteSpace +import os + +const debugLex = false + +template debug(enable: bool, text: string): typed = + when enable: + echo(text) + +type + Parser[N, T] = proc(text: T, start: int, nodes: var seq[Node[N]]): int {.closure.} + + RuleObj[N, T] = object + parser: Parser[N, T] + kind: N + + Rule[N, T] = ref RuleObj[N, T] + + NodeKind = enum + terminal, + nonterminal + + Node*[N] = object of RootObj + # Uncomment the following lines and the compiler crashes + # case nodeKind: NodeKind + # of nonterminal: + # kids: Node[N] + # of terminal: + # discard + start*: int + length*: int + kind*: N + + + NonTerminal[N] = object of Node + children: seq[Node[N]] + +proc newRule[N, T](parser: Parser, kind: N): Rule[N, T] = + new(result) + result.parser = parser + result.kind = kind + +proc newRule[N, T](kind: N): Rule[N, T] = + new(result) + result.kind = kind + +proc initNode[N](start: int, length: int, kind: N): Node[N] = + result.start = start + result.length = length + result.kind = kind + +proc initNode[N](start: int, length: int, children: seq[Node[N]], kind: N): NonTerminal[N] = + result.start = start + result.length = length + result.kind = kind + result.children = children + +proc substr[T](text: T, first, last: int): T = + text[first .. last] + +proc continuesWith[N](text: seq[Node[N]], subtext: seq[N], start: Natural): bool = + let length = len(text) + var pos = 0 + while pos < len(subtext): + let textpos = start + pos + if textpos == len(text): + return false + if text[textpos].kind != subtext[pos].kind: + return false + pos+=1 + return true + + +proc render*[N, T](text: T, nodes: seq[Node[N]]): string = + ## Uses a sequence of Nodes to render a given text string + result = "" + for node in nodes: + result.add("<" & node.value(text) & ">") + +proc render*[N, T](rule: Rule[N, T], text: string): string = + ## Uses a rule to render a given text string + render(text, rule.parse(text)) + +proc render*[N, T](text: T, nodes: seq[Node[N]], source: string): string = + result = "" + for node in nodes: + result.add("[" & node.value(text, source) & "]") + +proc render*[N, T, X](rule: Rule[N, T], text: seq[Node[X]], source: string): string = + ## Uses a rule to render a given series of nodes, providing the source string + text.render(rule.parse(text, source = source), source) + +proc annotate*[N, T](node: Node[N], text: T): string = + result = "<" & node.value(text) & ":" & $node.kind & ">" + +proc annotate*[N, T](nodes: seq[Node[N]], text: T): string = + result = "" + for node in nodes: + result.add(node.annotate(text)) + +proc annotate*[N, T](rule: Rule[N, T], text: T): string = + annotate(rule.parse(text), text) + +proc value*[N, T](node: Node[N], text: T): string = + result = $text.substr(node.start, node.start + node.length - 1) + +proc value*[N, X](node: Node[N], text: seq[Node[X]], source: string): string = + result = "" + for n in node.start ..< node.start + node.length: + result &= text[n].annotate(source) + +proc parse*[N, T](rule: Rule[N, T], text: T, start = 0, source: string = ""): seq[Node[N]] = + result = newSeq[Node[N]]() + debug(debugLex, "Parsing: " & $text) + let length = rule.parser(text, start, result) + + when T is string: + if length == -1: + echo("Match failed: " & $text) + result = @[] + elif length == len(text): + debug(debugLex, "Matched: " & $text & " => " & $len(result) & " tokens: " & text.render(result)) + else: + echo("Matched first " & $length & " symbols: " & $text & " => " & $len(result) & " tokens: " & text.render(result)) + else: + if length == -1: + echo("Match failed: " & $text) + result = @[] + elif length == len(text): + debug(debugLex, "Matched: " & $text & " => " & $len(result) & " tokens: " & text.render(result, source)) + else: + echo("Matched first " & $length & " symbols: " & $text & " => " & $len(result) & " tokens: " & text.render(result, source)) + + +proc literal*[N, T, P](pattern: P, kind: N): Rule[N, T] = + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + if start == len(text): + return -1 + assert(len(text)>start, "Attempting to match at $#, string length is $# " % [$start, $len(text)]) + when P is string or P is seq[N]: + debug(debugLex, "Literal[" & $kind & "]: testing " & $pattern & " at " & $start & ": " & $text[start..start+len(pattern)-1]) + if text.continuesWith(pattern, start): + let node = initNode(start, len(pattern), kind) + nodes.add(node) + debug(debugLex, "Literal: matched <" & $text[start ..< start+node.length] & ":" & $node.length & ">" ) + return node.length + elif P is char: + debug(debugLex, "Literal[" & $kind & "]: testing " & $pattern & " at " & $start & ": " & $text[start]) + if text[start] == pattern: + let node = initNode(start, 1, kind) + nodes.add(node) + return 1 + else: + debug(debugLex, "Literal[" & $kind & "]: testing " & $pattern & " at " & $start & ": " & $text[start]) + if text[start].kind == pattern: + let node = initNode(start, 1, kind) + nodes.add(node) + return 1 + return -1 + result = newRule[N, T](parser, kind) + +proc token[N, T](pattern: T, kind: N): Rule[N, T] = + when T is not string: + {.fatal: "Token is only supported for strings".} + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + debug(debugLex, "Token[" & $kind & "]: testing " & pattern & " at " & $start) + if start == len(text): + return -1 + assert(len(text)>start, "Attempting to match at $#, string length is $# " % [$start, $len(text)]) + let m = text.match(re(pattern), start) + if m.isSome: + let node = initNode(start, len(m.get.match), kind) + nodes.add(node) + result = node.length + debug(debugLex, "Token: matched <" & text[start ..< start+node.length] & ":" & $node.length & ">" ) + else: + result = -1 + result = newRule[N, T](parser, kind) + +proc chartest[N, T, S](testfunc: proc(s: S): bool, kind: N): Rule[N, T] = + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + if start == len(text): + return -1 + assert(len(text)>start, "Attempting to match at $#, string length is $# " % [$start, $len(text)]) + if testfunc(text[start]): + nodes.add(initNode(start, 1, kind)) + result = 1 + else: + result = -1 + result = newRule[N, T](parser, kind) + +proc any*[N, T, S](symbols: T, kind: N): Rule[N, T] = + let test = proc(s: S): bool = + when S is string: + debug(debugLex, "Any[" & $kind & "]: testing for " & symbols.replace("\n", "\\n").replace("\r", "\\r")) + else: + debug(debugLex, "Any[" & $kind & "]: testing for " & $symbols) + result = s in symbols + result = chartest[N, T, S](test, kind) + +proc ignore*[N, T](rule: Rule[N, T]): Rule[N, T] = + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + var mynodes = newSeq[Node[N]]() + result = rule.parser(text, start, mynodes) + result = newRule[N, T](parser, rule.kind) + +proc combine*[N, T](rule: Rule[N, T], kind: N): Rule[N, T] = + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + var mynodes = newSeq[Node[N]]() + result = rule.parser(text, start, mynodes) + nodes.add(initNode(start, result, kind)) + result = newRule[N, T](parser, kind) + +proc build*[N, T](rule: Rule[N, T], kind: N): Rule[N, T] = + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + var mynodes = newSeq[Node[N]]() + result = rule.parser(text, start, mynodes) + let nonTerminal = initNode(start, result, mynodes, kind) + nodes.add(nonTerminal) + result = newRule[N, T](parser, kind) + +proc fail*[N, T](message: string, kind: N): Rule[N, T] = + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + let lineno = countLines(text[0..start]) + var startline = start + var endline = start + while startline>0: + if text[startline] in NewLines: + break + startline-=1 + while endline < len(text): + if text[endline] in NewLines: + break + endline+=1 + let charno = start-startline + echo text.substr(startline, endline) + echo ' '.repeat(max(charno,0)) & '^' + raise newException(ValueError, "Position: " & $start & " Line: " & $lineno & ", Symbol: " & $charno & ": " & message) + result = newRule[N, T](parser, kind) + +proc `+`*[N, T](left: Rule[N, T], right: Rule[N, T]): Rule[N, T] = + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + var mynodes = newSeq[Node[N]]() + assert(not isNil(left.parser), "Left hand side parser is nil") + let leftlength = left.parser(text, start, mynodes) + if leftlength == -1: + return leftlength + assert(not isNil(right.parser), "Right hand side parser is nil") + let rightlength = right.parser(text, start+leftlength, mynodes) + if rightlength == -1: + return rightlength + result = leftlength + rightlength + nodes.add(mynodes) + result = newRule[N, T](parser, left.kind) + +proc `/`*[N, T](left: Rule[N, T], right: Rule[N, T]): Rule[N, T] = + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + var mynodes = newSeq[Node[N]]() + assert(not isNil(left.parser), "Left hand side of / is not fully defined") + let leftlength = left.parser(text, start, mynodes) + if leftlength != -1: + nodes.add(mynodes) + return leftlength + mynodes = newSeq[Node[N]]() + assert(not isNil(right.parser), "Right hand side of / is not fully defined") + let rightlength = right.parser(text, start, mynodes) + if rightlength == -1: + return rightlength + nodes.add(mynodes) + return rightlength + result = newRule[N, T](parser, left.kind) + +proc `?`*[N, T](rule: Rule[N, T]): Rule[N, T] = + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + let success = rule.parser(text, start, nodes) + return if success != -1: success else: 0 + result = newRule[N, T](parser, rule.kind) + +proc `+`*[N, T](rule: Rule[N, T]): Rule[N, T] = + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + var success = rule.parser(text, start, nodes) + if success == -1: + return success + var total = 0 + while success != -1 and start+total < len(text): + total += success + success = rule.parser(text, start+total, nodes) + return total + result = newRule[N, T](parser, rule.kind) + +proc `*`*[N, T](rule: Rule[N, T]): Rule[N, T] = + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + let success = (+rule).parser(text, start, nodes) + return if success != -1: success else: 0 + result = newRule[N, T](parser, rule.kind) + +#Note: this consumes - for zero-width lookahead see ! +proc `^`*[N, T](rule: Rule[N, T]): Rule[N, T] = + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + var mynodes = newSeq[Node[N]]() + let success = rule.parser(text, start, mynodes) + return if success == -1: 1 else: -1 + result = newRule[N, T](parser, rule.kind) + +proc `*`*[N, T](repetitions: int, rule: Rule[N, T]): Rule[N, T] = + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + var mynodes = newSeq[Node[N]]() + var total = 0 + for i in 0..<repetitions: + let success = rule.parser(text, start+total, mynodes) + if success == -1: + return success + else: + total += success + nodes.add(mynodes) + return total + result = newRule[N, T](parser, rule.kind) + +# Positive zero-width lookahead +proc `&`*[N, T](rule: Rule[N, T]): Rule[N, T] = + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + var mynodes = newSeq[Node[N]]() + let success = rule.parser(text, start, mynodes) + return if success != -1: 0 else: -1 + result = newRule[N, T](parser, rule.kind) + +# Negative zero-width lookahead +proc `!`*[N, T](rule: Rule[N, T]): Rule[N, T] = + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + var mynodes = newSeq[Node[N]]() + let failure = rule.parser(text, start, mynodes) + return if failure == -1: 0 else: -1 + result = newRule[N, T](parser, rule.kind) + +proc `/`*[N, T](rule: Rule[N, T]): Rule[N, T] = + let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int = + var mynodes = newSeq[Node[N]]() + var length = 0 + var success = rule.parser(text, start+length, mynodes) + while success == -1 and start+length < len(text): + length += 1 + success = rule.parser(text, start+length, mynodes) + if start+length >= len(text): + result = -1 + else: + nodes.add(initNode(start, length, rule.kind)) + nodes.add(mynodes) + result = length + success + result = newRule[N, T](parser, rule.kind) + +proc `->`*(rule: Rule, production: Rule) = + assert(not isnil(production.parser), "Right hand side of -> is nil - has the rule been defined yet?") + rule.parser = production.parser + +template grammar*[K](Kind, Text, Symbol: typedesc; default: K, code: untyped): typed {.hint[XDeclaredButNotUsed]: off.} = + + proc newRule(): Rule[Kind, Text] {.inject.} = newRule[Kind, Text](default) + proc chartest(testfunc: proc(c: Symbol): bool): Rule[Kind, Text] {.inject.} = chartest[Kind, Text, Symbol](testfunc, default) + proc literal[P](pattern: P, kind: K): Rule[Kind, Text] {.inject.} = literal[Kind, Text, P](pattern, kind) + proc literal[P](pattern: P): Rule[Kind, Text] {.inject.} = literal[Kind, Text, P](pattern, default) + + when Text is string: + proc token(pattern: string): Rule[Kind, Text] {.inject.} = token(pattern, default) + proc fail(message: string): Rule[Kind, Text] {.inject.} = fail[Kind, Text](message, default) + let alpha {.inject.} = chartest[Kind, Text, Symbol](isAlphaAscii, default) + let alphanumeric {.inject.}= chartest[Kind, Text, Symbol](isAlphaNumeric, default) + let digit {.inject.} = chartest[Kind, Text, Symbol](isDigit, default) + let lower {.inject.} = chartest[Kind, Text, Symbol](isLowerAscii, default) + let upper {.inject.} = chartest[Kind, Text, Symbol](isUpperAscii, default) + let isspace = proc (x: char): bool = x.isSpaceAscii and not (x in NewLines) + let space {.inject.} = chartest[Kind, Text, Symbol](isspace, default) + let isnewline = proc (x: char): bool = x in NewLines + let newline {.inject.} = chartest[Kind, Text, Symbol](isnewline, default) + let alphas {.inject.} = combine(+alpha, default) + let alphanumerics {.inject.} = combine(+alphanumeric, default) + let digits {.inject.} = combine(+digit, default) + let lowers {.inject.} = combine(+lower, default) + let uppers {.inject.} = combine(+upper, default) + let spaces {.inject.} = combine(+space, default) + let newlines {.inject.} = combine(+newline, default) + + proc any(chars: Text): Rule[Kind, Text] {.inject.} = any[Kind, Text, Symbol](chars, default) + proc combine(rule: Rule[Kind, Text]): Rule[Kind, Text] {.inject.} = combine[Kind, Text](rule, default) + + code + +template grammar*[K](Kind: typedesc; default: K, code: untyped): typed {.hint[XDeclaredButNotUsed]: off.} = + grammar(Kind, string, char, default, code) + +when isMainModule: + block: + type DummyKind = enum dkDefault + grammar(DummyKind, string, char, dkDefault): + let rule = token("h[a]+m") + ignore(token(r"\s+")) + (literal("eggs") / literal("beans")) + var text = "ham beans" + discard rule.parse(text) + + var recursive = newRule() + recursive -> (literal("(") + recursive + literal(")")) / token(r"\d+") + for test in ["spam", "57", "(25)", "((25))"]: + discard recursive.parse(test) + + let repeated = +literal("spam") + ?literal("ham") + *literal("salami") + for test in ["ham", "spam", "spamspamspam" , "spamham", "spamsalami", "spamsalamisalami"]: + discard repeated.parse(test) |