# # # Nimrod's Runtime Library # (c) Copyright 2010 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## Simple PEG (Parsing expression grammar) matching. Uses no memorization, but ## uses superoperators and symbol inlining to improve performance. Note: ## Matching performance is hopefully competitive with optimized regular ## expression engines. ## ## .. include:: ../doc/pegdocs.txt ## include "system/inclrtl" const useUnicode = true ## change this to deactivate proper UTF-8 support import strutils when useUnicode: import unicode const InlineThreshold = 5 ## number of leaves; -1 to disable inlining MaxSubpatterns* = 10 ## defines the maximum number of subpatterns that ## can be captured. More subpatterns cannot be captured! type TPegKind = enum pkEmpty, pkAny, ## any character (.) pkAnyRune, ## any Unicode character (_) pkNewLine, ## CR-LF, LF, CR pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, ## single character to match pkCharChoice, pkNonTerminal, pkSequence, ## a b c ... --> Internal DSL: peg(a, b, c) pkOrderedChoice, ## a / b / ... --> Internal DSL: a / b or /[a, b, c] pkGreedyRep, ## a* --> Internal DSL: *a ## a+ --> (a a*) pkGreedyRepChar, ## x* where x is a single character (superop) pkGreedyRepSet, ## [set]* (superop) pkGreedyAny, ## .* or _* (superop) pkOption, ## a? --> Internal DSL: ?a pkAndPredicate, ## &a --> Internal DSL: &a pkNotPredicate, ## !a --> Internal DSL: !a pkCapture, ## {a} --> Internal DSL: capture(a) pkBackRef, ## $i --> Internal DSL: backref(i) pkBackRefIgnoreCase, pkBackRefIgnoreStyle, pkSearch, ## @a --> Internal DSL: @a pkRule, ## a <- b pkList ## a, b TNonTerminalFlag = enum ntDeclared, ntUsed TNonTerminal {.final.} = object ## represents a non terminal symbol name: string ## the name of the symbol line: int ## line the symbol has been declared/used in col: int ## column the symbol has been declared/used in flags: set[TNonTerminalFlag] ## the nonterminal's flags rule: TNode ## the rule that the symbol refers to TNode {.final.} = object case kind: TPegKind of pkEmpty, pkAny, pkAnyRune, pkGreedyAny, pkNewLine: nil of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle: term: string of pkChar, pkGreedyRepChar: ch: char of pkCharChoice, pkGreedyRepSet: charChoice: ref set[char] of pkNonTerminal: nt: PNonTerminal of pkBackRef..pkBackRefIgnoreStyle: index: range[1..MaxSubpatterns] else: sons: seq[TNode] PNonTerminal* = ref TNonTerminal TPeg* = TNode ## type that represents a PEG proc term*(t: string): TPeg {.nosideEffect, rtl, extern: "npegs$1Str".} = ## constructs a PEG from a terminal string if t.len != 1: result.kind = pkTerminal result.term = t else: result.kind = pkChar result.ch = t[0] proc termIgnoreCase*(t: string): TPeg {. nosideEffect, rtl, extern: "npegs$1".} = ## constructs a PEG from a terminal string; ignore case for matching result.kind = pkTerminalIgnoreCase result.term = t proc termIgnoreStyle*(t: string): TPeg {. nosideEffect, rtl, extern: "npegs$1".} = ## constructs a PEG from a terminal string; ignore style for matching result.kind = pkTerminalIgnoreStyle result.term = t proc term*(t: char): TPeg {.nosideEffect, rtl, extern: "npegs$1Char".} = ## constructs a PEG from a terminal char assert t != '\0' result.kind = pkChar result.ch = t proc charSet*(s: set[char]): TPeg {.nosideEffect, rtl, extern: "npegs$1".} = ## constructs a PEG from a character set `s` assert '\0' notin s result.kind = pkCharChoice new(result.charChoice) result.charChoice^ = s proc len(a: TPeg): int {.inline.} = return a.sons.len proc add(d: var TPeg, s: TPeg) {.inline.} = add(d.sons, s) proc addChoice(dest: var TPeg, elem: TPeg) = var L = dest.len-1 if L >= 0 and dest.sons[L].kind == pkCharChoice: case elem.kind of pkCharChoice: dest.sons[L].charChoice^ = dest.sons[L].charChoice^ + elem.charChoice^ of pkChar: incl(dest.sons[L].charChoice^, elem.ch) else: add(dest, elem) else: add(dest, elem) template multipleOp(k: TPegKind, localOpt: expr) = result.kind = k result.sons = @[] for x in items(a): if x.kind == k: for y in items(x.sons): localOpt(result, y) else: localOpt(result, x) if result.len == 1: result = result.sons[0] proc `/`*(a: openArray[TPeg]): TPeg {. nosideEffect, rtl, extern: "npegsOrderedChoice".} = ## constructs an ordered choice with the PEGs in `a` multipleOp(pkOrderedChoice, addChoice) proc addSequence(dest: var TPeg, elem: TPeg) = var L = dest.len-1 if L >= 0 and dest.sons[L].kind == pkTerminal: case elem.kind of pkTerminal: add(dest.sons[L].term, elem.term) of pkChar: add(dest.sons[L].term, elem.ch) else: add(dest, elem) else: add(dest, elem) proc sequence*(a: openArray[TPeg]): TPeg {. nosideEffect, rtl, extern: "npegs$1".} = ## constructs a sequence with all the PEGs from `a` multipleOp(pkSequence, addSequence) proc `?`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsOptional".} = ## constructs an optional for the PEG `a` if a.kind in {pkOption, pkGreedyRep, pkGreedyAny, pkGreedyRepChar, pkGreedyRepSet}: # a* ? --> a* # a? ? --> a? result = a else: result.kind = pkOption result.sons = @[a] proc `*`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsGreedyRep".} = ## constructs a "greedy repetition" for the PEG `a` case a.kind of pkGreedyRep, pkGreedyRepChar, pkGreedyRepSet, pkGreedyAny, pkOption: assert false # produces endless loop! of pkChar: result.kind = pkGreedyRepChar result.ch = a.ch of pkCharChoice: result.kind = pkGreedyRepSet result.charChoice = a.charChoice # copying a reference suffices! of pkAny, pkAnyRune: result.kind = pkGreedyAny else: result.kind = pkGreedyRep result.sons = @[a] proc `@`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsSearch".} = ## constructs a "search" for the PEG `a` result.kind = pkSearch result.sons = @[a] when false: proc contains(a: TPeg, k: TPegKind): bool = if a.kind == k: return true case a.kind of pkEmpty, pkAny, pkAnyRune, pkGreedyAny, pkNewLine, pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, pkGreedyRepChar, pkCharChoice, pkGreedyRepSet: nil of pkNonTerminal: return true else: for i in 0..a.sons.len-1: if contains(a.sons[i], k): return true proc `+`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsGreedyPosRep".} = ## constructs a "greedy positive repetition" with the PEG `a` return sequence(a, *a) proc `&`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsAndPredicate".} = ## constructs an "and predicate" with the PEG `a` result.kind = pkAndPredicate result.sons = @[a] proc `!`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsNotPredicate".} = ## constructs a "not predicate" with the PEG `a` result.kind = pkNotPredicate result.sons = @[a] proc any*: TPeg {.inline.} = ## constructs the PEG `any character`:idx: (``.``) result.kind = pkAny proc anyRune*: TPeg {.inline.} = ## constructs the PEG `any rune`:idx: (``_``) result.kind = pkAnyRune proc newLine*: TPeg {.inline.} = ## constructs the PEG `newline`:idx: (``\n``) result.kind = pkNewline proc capture*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsCapture".} = ## constructs a capture with the PEG `a` result.kind = pkCapture result.sons = @[a] proc backref*(index: range[1..MaxSubPatterns]): TPeg {. nosideEffect, rtl, extern: "npegs$1".} = ## constructs a back reference of the given `index`. `index` starts counting ## from 1. result.kind = pkBackRef result.index = index-1 proc backrefIgnoreCase*(index: range[1..MaxSubPatterns]): TPeg {. nosideEffect, rtl, extern: "npegs$1".} = ## constructs a back reference of the given `index`. `index` starts counting ## from 1. Ignores case for matching. result.kind = pkBackRefIgnoreCase result.index = index-1 proc backrefIgnoreStyle*(index: range[1..MaxSubPatterns]): TPeg {. nosideEffect, rtl, extern: "npegs$1".}= ## constructs a back reference of the given `index`. `index` starts counting ## from 1. Ignores style for matching. result.kind = pkBackRefIgnoreStyle result.index = index-1 proc spaceCost(n: TPeg): int = case n.kind of pkEmpty: nil of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, pkGreedyRepChar, pkCharChoice, pkGreedyRepSet, pkAny, pkAnyRune, pkNewLine, pkGreedyAny: result = 1 of pkNonTerminal: # we cannot inline a rule with a non-terminal result = InlineThreshold+1 else: for i in 0..n.len-1: inc(result, spaceCost(n.sons[i])) if result >= InlineThreshold: break proc nonterminal*(n: PNonTerminal): TPeg {. nosideEffect, rtl, extern: "npegs$1".} = ## constructs a PEG that consists of the nonterminal symbol assert n != nil if ntDeclared in n.flags and spaceCost(n.rule) < InlineThreshold: when false: echo "inlining symbol: ", n.name result = n.rule # inlining of rule enables better optimizations else: result.kind = pkNonTerminal result.nt = n proc newNonTerminal*(name: string, line, column: int): PNonTerminal {. nosideEffect, rtl, extern: "npegs$1".} = ## constructs a nonterminal symbol new(result) result.name = name result.line = line result.col = column template letters*: expr = ## expands to ``charset({'A'..'Z', 'a'..'z'})`` charset({'A'..'Z', 'a'..'z'}) template digits*: expr = ## expands to ``charset({'0'..'9'})`` charset({'0'..'9'}) template whitespace*: expr = ## expands to ``charset({' ', '\9'..'\13'})`` charset({' ', '\9'..'\13'}) template identChars*: expr = ## expands to ``charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})`` charset({'a'..'z', 'A'..'Z', '0'..'9', '_'}) template identStartChars*: expr = ## expands to ``charset({'A'..'Z', 'a'..'z', '_'})`` charset({'a'..'z', 'A'..'Z', '_'}) template ident*: expr = ## same as ``[a-zA-Z_][a-zA-z_0-9]*``; standard identifier sequence(charset({'a'..'z', 'A'..'Z', '_'}), *charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})) template natural*: expr = ## same as ``\d+`` +digits # ------------------------- debugging ----------------------------------------- proc esc(c: char, reserved = {'\0'..'\255'}): string = case c of '\b': result = "
#
#
# The Nimrod Compiler
# (c) Copyright 2012 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module contains the data structures for the semantic checking phase.
import
strutils, lists, intsets, options, lexer, ast, astalgo, trees, treetab,
wordrecg,
ropes, msgs, platform, os, condsyms, idents, renderer, types, extccomp, math,
magicsys, nversion, nimsets, parser, times, passes, rodread, evals
type
TOptionEntry* = object of lists.TListEntry # entries to put on a
# stack for pragma parsing
options*: TOptions
defaultCC*: TCallingConvention
dynlib*: PLib
Notes*: TNoteKinds
POptionEntry* = ref TOptionEntry
PProcCon* = ref TProcCon
TProcCon*{.final.} = object # procedure context; also used for top-level
# statements
owner*: PSym # the symbol this context belongs to
resultSym*: PSym # the result symbol (if we are in a proc)
nestedLoopCounter*: int # whether we are in a loop or not
nestedBlockCounter*: int # whether we are in a block or not
InTryStmt*: int # whether we are in a try statement; works also
# in standalone ``except`` and ``finally``
next*: PProcCon # used for stacking procedure contexts
TInstantiatedSymbol* {.final.} = object
genericSym*, instSym*: PSym
concreteTypes*: seq[PType]
# If we generate an instance of a generic, we'd like to re-use that
# instance if possible across module boundaries. However, this is not
# possible if the compilation cache is enabled. So we give up then and use
# the caching of generics only per module, not per project.
TGenericsCache* {.final.} = object
InstTypes*: TIdTable # map PType to PType
generics*: seq[TInstantiatedSymbol] # a list of the things to compile
lastGenericIdx*: int # used for the generics stack
PGenericsCache* = ref TGenericsCache
PContext* = ref TContext
TContext* = object of TPassContext # a context represents a module
module*: PSym # the module sym belonging to the context
p*: PProcCon # procedure context
generics*: PGenericsCache # may point to a global or module-local structure
friendModule*: PSym # current friend module; may access private data;
# this is used so that generic instantiations
# can access private object fields
InstCounter*: int # to prevent endless instantiations
threadEntries*: TSymSeq # list of thread entries to check
tab*: TSymTab # each module has its own symbol table
AmbiguousSymbols*: TIntSet # ids of all ambiguous symbols (cannot
# store this info in the syms themselves!)
InGenericContext*: int # > 0 if we are in a generic
InUnrolledContext*: int # > 0 if we are unrolling a loop
InCompilesContext*: int # > 0 if we are in a ``compiles`` magic
converters*: TSymSeq # sequence of converters
optionStack*: TLinkedList
libs*: TLinkedList # all libs used by this module
semConstExpr*: proc (c: PContext, n: PNode): PNode {.nimcall.} # for the pragmas
semExpr*: proc (c: PContext, n: PNode): PNode {.nimcall.} # for the pragmas
semConstBoolExpr*: proc (c: PContext, n: PNode): PNode {.nimcall.} # XXX bite the bullet
semOverloadedCall*: proc (c: PContext, n, nOrig: PNode,
filter: TSymKinds): PNode {.nimcall.}
includedFiles*: TIntSet # used to detect recursive include files
filename*: string # the module's filename
userPragmas*: TStrTable
evalContext*: PEvalContext
UnknownIdents*: TIntSet # ids of all unknown identifiers to prevent
# naming it multiple times
var
gGenericsCache: PGenericsCache # save for modularity
proc newGenericsCache*(): PGenericsCache =
new(result)
initIdTable(result.InstTypes)
result.generics = @[]
proc newContext*(module: PSym, nimfile: string): PContext
proc lastOptionEntry*(c: PContext): POptionEntry
proc newOptionEntry*(): POptionEntry
proc addConverter*(c: PContext, conv: PSym)
proc newLib*(kind: TLibKind): PLib
proc addToLib*(lib: PLib, sym: PSym)
proc makePtrType*(c: PContext, baseType: PType): PType
proc makeVarType*(c: PContext, baseType: PType): PType
proc newTypeS*(kind: TTypeKind, c: PContext): PType
proc fillTypeS*(dest: PType, kind: TTypeKind, c: PContext)
# owner handling:
proc getCurrOwner*(): PSym
proc PushOwner*(owner: PSym)
proc PopOwner*()
# implementation
var gOwners*: seq[PSym] = @[]
proc getCurrOwner(): PSym =
# owner stack (used for initializing the
# owner field of syms)
# the documentation comment always gets
# assigned to the current owner
# BUGFIX: global array is needed!
result = gOwners[high(gOwners)]
proc PushOwner(owner: PSym) =
add(gOwners, owner)
proc PopOwner() =
var length = len(gOwners)
if length > 0: setlen(gOwners, length - 1)
else: InternalError("popOwner")
proc lastOptionEntry(c: PContext): POptionEntry =
result = POptionEntry(c.optionStack.tail)
proc pushProcCon*(c: PContext, owner: PSym) {.inline.} =
if owner == nil:
InternalError("owner is nil")
return
var x: PProcCon
new(x)
x.owner = owner
x.next = c.p
c.p =