summary refs log blame commit diff stats
path: root/tests/generics/tparser_generator.nim
blob: 8f8fea3820909ba6a50f0b1fc45a9c87a205381a (plain) (tree)
1
2
3
4


                               
               











































































































































































































































































































































































































                                                                                                                                  














                                                                                              
discard """
  output: '''Match failed: spam
Match failed: ham'''
joinable: false
"""

# bug #6220

import nre
import options
import strutils except isAlpha, isLower, isUpper, isSpace
from unicode import isAlpha, isLower, isUpper, isTitle, isWhiteSpace
import os

const debugLex = false

template debug(enable: bool, text: string): typed =
  when enable:
    echo(text)

type
  Parser[N, T] = proc(text: T, start: int, nodes: var seq[Node[N]]): int {.closure.}

  RuleObj[N, T] = object
    parser: Parser[N, T]
    kind: N

  Rule[N, T] = ref RuleObj[N, T]

  NodeKind = enum
    terminal,
    nonterminal

  Node*[N] = object of RootObj
    # Uncomment the following lines and the compiler crashes
    # case nodeKind: NodeKind
    #   of nonterminal:
    #     kids: Node[N]
    #   of terminal:
    #     discard
    start*: int
    length*: int
    kind*: N


  NonTerminal[N] = object of Node
    children: seq[Node[N]]

proc newRule[N, T](parser: Parser, kind: N): Rule[N, T] =
  new(result)
  result.parser = parser
  result.kind = kind

proc newRule[N, T](kind: N): Rule[N, T] =
  new(result)
  result.kind = kind

proc initNode[N](start: int, length: int, kind: N): Node[N] =
  result.start = start
  result.length = length
  result.kind = kind

proc initNode[N](start: int, length: int, children: seq[Node[N]], kind: N): NonTerminal[N] =
  result.start = start
  result.length = length
  result.kind = kind
  result.children = children

proc substr[T](text: T, first, last: int): T =
  text[first .. last]

proc continuesWith[N](text: seq[Node[N]], subtext: seq[N], start: Natural): bool =
  let length = len(text)
  var pos = 0
  while pos < len(subtext):
    let textpos = start + pos
    if textpos == len(text):
      return false
    if text[textpos].kind != subtext[pos].kind:
      return false
    pos+=1
  return true


proc render*[N, T](text: T, nodes: seq[Node[N]]): string =
  ## Uses a sequence of Nodes to render a given text string
  result = ""
  for node in nodes:
    result.add("<" & node.value(text) & ">")

proc render*[N, T](rule: Rule[N, T], text: string): string =
  ## Uses a rule to render a given text string
  render(text, rule.parse(text))

proc render*[N, T](text: T, nodes: seq[Node[N]], source: string): string =
  result = ""
  for node in nodes:
    result.add("[" & node.value(text, source) & "]")

proc render*[N, T, X](rule: Rule[N, T], text: seq[Node[X]], source: string): string =
  ## Uses a rule to render a given series of nodes, providing the source string
  text.render(rule.parse(text, source = source), source)

proc annotate*[N, T](node: Node[N], text: T): string =
  result = "<" & node.value(text) & ":" & $node.kind & ">"

proc annotate*[N, T](nodes: seq[Node[N]], text: T): string =
  result = ""
  for node in nodes:
    result.add(node.annotate(text))

proc annotate*[N, T](rule: Rule[N, T], text: T): string =
  annotate(rule.parse(text), text)

proc value*[N, T](node: Node[N], text: T): string =
  result = $text.substr(node.start, node.start + node.length - 1)

proc value*[N, X](node: Node[N], text: seq[Node[X]], source: string): string =
  result = ""
  for n in node.start ..< node.start + node.length:
    result &= text[n].annotate(source)

proc parse*[N, T](rule: Rule[N, T], text: T, start = 0, source: string = ""): seq[Node[N]] =
  result = newSeq[Node[N]]()
  debug(debugLex, "Parsing: " & $text)
  let length = rule.parser(text, start, result)

  when T is string:
    if length == -1:
      echo("Match failed: " & $text)
      result = @[]
    elif length == len(text):
      debug(debugLex, "Matched: " & $text & " => " & $len(result) & " tokens: " & text.render(result))
    else:
      echo("Matched first " & $length & " symbols: " & $text & " => " & $len(result) & " tokens: " & text.render(result))
  else:
    if length == -1:
      echo("Match failed: " & $text)
      result = @[]
    elif length == len(text):
      debug(debugLex, "Matched: " & $text & " => " & $len(result) & " tokens: " & text.render(result, source))
    else:
      echo("Matched first " & $length & " symbols: " & $text & " => " & $len(result) & " tokens: " & text.render(result, source))


proc literal*[N, T, P](pattern: P, kind: N): Rule[N, T] =
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    if start == len(text):
      return -1
    assert(len(text)>start, "Attempting to match at $#, string length is $# " % [$start, $len(text)])
    when P is string or P is seq[N]:
      debug(debugLex, "Literal[" & $kind & "]: testing " & $pattern & " at " & $start & ": " & $text[start..start+len(pattern)-1])
      if text.continuesWith(pattern, start):
        let node = initNode(start, len(pattern), kind)
        nodes.add(node)
        debug(debugLex, "Literal: matched <" & $text[start ..< start+node.length] & ":" & $node.length & ">" )
        return node.length
    elif P is char:
      debug(debugLex, "Literal[" & $kind & "]: testing " & $pattern & " at " & $start & ": " & $text[start])
      if text[start] == pattern:
        let node = initNode(start, 1, kind)
        nodes.add(node)
        return 1
    else:
      debug(debugLex, "Literal[" & $kind & "]: testing " & $pattern & " at " & $start & ": " & $text[start])
      if text[start].kind == pattern:
        let node = initNode(start, 1, kind)
        nodes.add(node)
        return 1
    return -1
  result = newRule[N, T](parser, kind)

proc token[N, T](pattern: T, kind: N): Rule[N, T] =
  when T is not string:
     {.fatal: "Token is only supported for strings".}
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    debug(debugLex, "Token[" & $kind & "]: testing " & pattern & " at " & $start)
    if start == len(text):
      return -1
    assert(len(text)>start, "Attempting to match at $#, string length is $# " % [$start, $len(text)])
    let m = text.match(re(pattern), start)
    if m.isSome:
      let node = initNode(start, len(m.get.match), kind)
      nodes.add(node)
      result = node.length
      debug(debugLex, "Token: matched <" & text[start ..< start+node.length] & ":" & $node.length & ">" )
    else:
      result = -1
  result = newRule[N, T](parser, kind)

proc chartest[N, T, S](testfunc: proc(s: S): bool, kind: N): Rule[N, T] =
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    if start == len(text):
      return -1
    assert(len(text)>start, "Attempting to match at $#, string length is $# " % [$start, $len(text)])
    if testfunc(text[start]):
      nodes.add(initNode(start, 1, kind))
      result = 1
    else:
      result = -1
  result = newRule[N, T](parser, kind)

proc any*[N, T, S](symbols: T, kind: N): Rule[N, T] =
  let test = proc(s: S): bool =
    when S is string:
      debug(debugLex, "Any[" & $kind & "]: testing for " & symbols.replace("\n", "\\n").replace("\r", "\\r"))
    else:
      debug(debugLex, "Any[" & $kind & "]: testing for " & $symbols)
    result = s in symbols
  result = chartest[N, T, S](test, kind)

proc ignore*[N, T](rule: Rule[N, T]): Rule[N, T] =
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    var mynodes = newSeq[Node[N]]()
    result = rule.parser(text, start, mynodes)
  result = newRule[N, T](parser, rule.kind)

proc combine*[N, T](rule: Rule[N, T], kind: N): Rule[N, T] =
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    var mynodes = newSeq[Node[N]]()
    result = rule.parser(text, start, mynodes)
    nodes.add(initNode(start, result, kind))
  result = newRule[N, T](parser, kind)

proc build*[N, T](rule: Rule[N, T], kind: N): Rule[N, T] =
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    var mynodes = newSeq[Node[N]]()
    result = rule.parser(text, start, mynodes)
    let nonTerminal = initNode(start, result, mynodes, kind)
    nodes.add(nonTerminal)
  result = newRule[N, T](parser, kind)

proc fail*[N, T](message: string, kind: N): Rule[N, T] =
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    let lineno = countLines(text[0..start])
    var startline = start
    var endline = start
    while startline>0:
      if text[startline] in NewLines:
        break
      startline-=1
    while endline < len(text):
      if text[endline] in NewLines:
        break
      endline+=1
    let charno = start-startline
    echo text.substr(startline, endline)
    echo ' '.repeat(max(charno,0)) & '^'
    raise newException(ValueError, "Position: " & $start & " Line: " & $lineno & ", Symbol: " & $charno & ": " & message)
  result = newRule[N, T](parser, kind)

proc `+`*[N, T](left: Rule[N, T], right: Rule[N, T]): Rule[N, T] =
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    var mynodes = newSeq[Node[N]]()
    assert(not isNil(left.parser), "Left hand side parser is nil")
    let leftlength = left.parser(text, start, mynodes)
    if leftlength == -1:
      return leftlength
    assert(not isNil(right.parser), "Right hand side parser is nil")
    let rightlength = right.parser(text, start+leftlength, mynodes)
    if rightlength == -1:
      return rightlength
    result = leftlength + rightlength
    nodes.add(mynodes)
  result = newRule[N, T](parser, left.kind)

proc `/`*[N, T](left: Rule[N, T], right: Rule[N, T]): Rule[N, T] =
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    var mynodes = newSeq[Node[N]]()
    assert(not isNil(left.parser), "Left hand side of / is not fully defined")
    let leftlength = left.parser(text, start, mynodes)
    if leftlength != -1:
      nodes.add(mynodes)
      return leftlength
    mynodes = newSeq[Node[N]]()
    assert(not isNil(right.parser), "Right hand side of / is not fully defined")
    let rightlength = right.parser(text, start, mynodes)
    if rightlength == -1:
      return rightlength
    nodes.add(mynodes)
    return rightlength
  result = newRule[N, T](parser, left.kind)

proc `?`*[N, T](rule: Rule[N, T]): Rule[N, T] =
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    let success = rule.parser(text, start, nodes)
    return if success != -1: success else: 0
  result = newRule[N, T](parser, rule.kind)

proc `+`*[N, T](rule: Rule[N, T]): Rule[N, T] =
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    var success = rule.parser(text, start, nodes)
    if success == -1:
      return success
    var total = 0
    while success != -1 and start+total < len(text):
      total += success
      success = rule.parser(text, start+total, nodes)
    return total
  result = newRule[N, T](parser, rule.kind)

proc `*`*[N, T](rule: Rule[N, T]): Rule[N, T] =
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    let success = (+rule).parser(text, start, nodes)
    return if success != -1: success else: 0
  result = newRule[N, T](parser, rule.kind)

#Note: this consumes - for zero-width lookahead see !
proc `^`*[N, T](rule: Rule[N, T]): Rule[N, T] =
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    var mynodes = newSeq[Node[N]]()
    let success = rule.parser(text, start, mynodes)
    return if success == -1: 1 else: -1
  result = newRule[N, T](parser, rule.kind)

proc `*`*[N, T](repetitions: int, rule: Rule[N, T]): Rule[N, T] =
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    var mynodes = newSeq[Node[N]]()
    var total = 0
    for i in 0..<repetitions:
      let success = rule.parser(text, start+total, mynodes)
      if success == -1:
        return success
      else:
        total += success
    nodes.add(mynodes)
    return total
  result = newRule[N, T](parser, rule.kind)

# Positive zero-width lookahead
proc `&`*[N, T](rule: Rule[N, T]): Rule[N, T] =
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    var mynodes = newSeq[Node[N]]()
    let success = rule.parser(text, start, mynodes)
    return if success != -1: 0 else: -1
  result = newRule[N, T](parser, rule.kind)

# Negative zero-width lookahead
proc `!`*[N, T](rule: Rule[N, T]): Rule[N, T] =
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    var mynodes = newSeq[Node[N]]()
    let failure = rule.parser(text, start, mynodes)
    return if failure == -1: 0 else: -1
  result = newRule[N, T](parser, rule.kind)

proc `/`*[N, T](rule: Rule[N, T]): Rule[N, T] =
  let parser = proc (text: T, start: int, nodes: var seq[Node[N]]): int =
    var mynodes = newSeq[Node[N]]()
    var length = 0
    var success = rule.parser(text, start+length, mynodes)
    while success == -1 and start+length < len(text):
      length += 1
      success = rule.parser(text, start+length, mynodes)
    if start+length >= len(text):
      result = -1
    else:
      nodes.add(initNode(start, length, rule.kind))
      nodes.add(mynodes)
      result = length + success
  result = newRule[N, T](parser, rule.kind)

proc `->`*(rule: Rule, production: Rule) =
  assert(not isnil(production.parser), "Right hand side of -> is nil - has the rule been defined yet?")
  rule.parser = production.parser

template grammar*[K](Kind, Text, Symbol: typedesc; default: K, code: untyped): typed {.hint[XDeclaredButNotUsed]: off.} =

    proc newRule(): Rule[Kind, Text] {.inject.} = newRule[Kind, Text](default)
    proc chartest(testfunc: proc(c: Symbol): bool): Rule[Kind, Text] {.inject.} = chartest[Kind, Text, Symbol](testfunc, default)
    proc literal[P](pattern: P, kind: K): Rule[Kind, Text] {.inject.} = literal[Kind, Text, P](pattern, kind)
    proc literal[P](pattern: P): Rule[Kind, Text] {.inject.} = literal[Kind, Text, P](pattern, default)

    when Text is string:
      proc token(pattern: string): Rule[Kind, Text] {.inject.} = token(pattern, default)
      proc fail(message: string): Rule[Kind, Text] {.inject.} = fail[Kind, Text](message, default)
      let alpha {.inject.} = chartest[Kind, Text, Symbol](isAlphaAscii, default)
      let alphanumeric {.inject.}= chartest[Kind, Text, Symbol](isAlphaNumeric, default)
      let digit {.inject.} = chartest[Kind, Text, Symbol](isDigit, default)
      let lower {.inject.} = chartest[Kind, Text, Symbol](isLowerAscii, default)
      let upper {.inject.} = chartest[Kind, Text, Symbol](isUpperAscii, default)
      let isspace = proc (x: char): bool = x.isSpaceAscii and not (x in NewLines)
      let space {.inject.} = chartest[Kind, Text, Symbol](isspace, default)
      let isnewline = proc (x: char): bool = x in NewLines
      let newline {.inject.} = chartest[Kind, Text, Symbol](isnewline, default)
      let alphas {.inject.} = combine(+alpha, default)
      let alphanumerics {.inject.} = combine(+alphanumeric, default)
      let digits {.inject.} = combine(+digit, default)
      let lowers {.inject.} = combine(+lower, default)
      let uppers {.inject.} = combine(+upper, default)
      let spaces {.inject.} = combine(+space, default)
      let newlines {.inject.} = combine(+newline, default)

    proc any(chars: Text): Rule[Kind, Text] {.inject.} = any[Kind, Text, Symbol](chars, default)
    proc combine(rule: Rule[Kind, Text]): Rule[Kind, Text] {.inject.} = combine[Kind, Text](rule, default)

    code

template grammar*[K](Kind: typedesc; default: K, code: untyped): typed {.hint[XDeclaredButNotUsed]: off.} =
  grammar(Kind, string, char, default, code)

block:
  type DummyKind = enum dkDefault
  grammar(DummyKind, string, char, dkDefault):
    let rule = token("h[a]+m") + ignore(token(r"\s+")) + (literal("eggs") / literal("beans"))
    var text = "ham beans"
    discard rule.parse(text)

    var recursive = newRule()
    recursive -> (literal("(") + recursive + literal(")")) / token(r"\d+")
    for test in ["spam", "57", "(25)", "((25))"]:
      discard recursive.parse(test)

    let repeated = +literal("spam") + ?literal("ham") + *literal("salami")
    for test in ["ham", "spam", "spamspamspam" , "spamham", "spamsalami", "spamsalamisalami"]:
      discard  repeated.parse(test)