# # # Nimrod's Runtime Library # (c) Copyright 2010 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## Simple PEG (Parsing expression grammar) matching. Uses no memorization, but ## uses superoperators and symbol inlining to improve performance. Note: ## Matching performance is hopefully competitive with optimized regular ## expression engines. ## ## .. include:: ../doc/pegdocs.txt ## include "system/inclrtl" const useUnicode = true ## change this to deactivate proper UTF-8 support import strutils when useUnicode: import unicode const InlineThreshold = 5 ## number of leaves; -1 to disable inlining MaxSubpatterns* = 10 ## defines the maximum number of subpatterns that ## can be captured. More subpatterns cannot be captured! type TPegKind = enum pkEmpty, pkAny, ## any character (.) pkAnyRune, ## any Unicode character (_) pkNewLine, ## CR-LF, LF, CR pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, ## single character to match pkCharChoice, pkNonTerminal, pkSequence, ## a b c ... --> Internal DSL: peg(a, b, c) pkOrderedChoice, ## a / b / ... --> Internal DSL: a / b or /[a, b, c] pkGreedyRep, ## a* --> Internal DSL: *a ## a+ --> (a a*) pkGreedyRepChar, ## x* where x is a single character (superop) pkGreedyRepSet, ## [set]* (superop) pkGreedyAny, ## .* or _* (superop) pkOption, ## a? --> Internal DSL: ?a pkAndPredicate, ## &a --> Internal DSL: &a pkNotPredicate, ## !a --> Internal DSL: !a pkCapture, ## {a} --> Internal DSL: capture(a) pkBackRef, ## $i --> Internal DSL: backref(i) pkBackRefIgnoreCase, pkBackRefIgnoreStyle, pkSearch, ## @a --> Internal DSL: @a pkRule, ## a <- b pkList ## a, b TNonTerminalFlag = enum ntDeclared, ntUsed TNonTerminal {.final.} = object ## represents a non terminal symbol name: string ## the name of the symbol line: int ## line the symbol has been declared/used in col: int ## column the symbol has been declared/used in flags: set[TNonTerminalFlag] ## the nonterminal's flags rule: TNode ## the rule that the symbol refers to TNode {.final.} = object case kind: TPegKind of pkEmpty, pkAny, pkAnyRune, pkGreedyAny, pkNewLine: nil of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle: term: string of pkChar, pkGreedyRepChar: ch: char of pkCharChoice, pkGreedyRepSet: charChoice: ref set[char] of pkNonTerminal: nt: PNonTerminal of pkBackRef..pkBackRefIgnoreStyle: index: range[1..MaxSubpatterns] else: sons: seq[TNode] PNonTerminal* = ref TNonTerminal TPeg* = TNode ## type that represents a PEG proc term*(t: string): TPeg {.nosideEffect, rtl, extern: "npegs$1Str".} = ## constructs a PEG from a terminal string if t.len != 1: result.kind = pkTerminal result.term = t else: result.kind = pkChar result.ch = t[0] proc termIgnoreCase*(t: string): TPeg {. nosideEffect, rtl, extern: "npegs$1".} = ## constructs a PEG from a terminal string; ignore case for matching result.kind = pkTerminalIgnoreCase result.term = t proc termIgnoreStyle*(t: string): TPeg {. nosideEffect, rtl, extern: "npegs$1".} = ## constructs a PEG from a terminal string; ignore style for matching result.kind = pkTerminalIgnoreStyle result.term = t proc term*(t: char): TPeg {.nosideEffect, rtl, extern: "npegs$1Char".} = ## constructs a PEG from a terminal char assert t != '\0' result.kind = pkChar result.ch = t proc charSet*(s: set[char]): TPeg {.nosideEffect, rtl, extern: "npegs$1".} = ## constructs a PEG from a character set `s` assert '\0' notin s result.kind = pkCharChoice new(result.charChoice) result.charChoice^ = s proc len(a: TPeg): int {.inline.} = return a.sons.len proc add(d: var TPeg, s: TPeg) {.inline.} = add(d.sons, s) proc addChoice(dest: var TPeg, elem: TPeg) = var L = dest.len-1 if L >= 0 and dest.sons[L].kind == pkCharChoice: case elem.kind of pkCharChoice: dest.sons[L].charChoice^ = dest.sons[L].charChoice^ + elem.charChoice^ of pkChar: incl(dest.sons[L].charChoice^, elem.ch) else: add(dest, elem) else: add(dest, elem) template multipleOp(k: TPegKind, localOpt: expr) = result.kind = k result.sons = @[] for x in items(a): if x.kind == k: for y in items(x.sons): localOpt(result, y) else: localOpt(result, x) if result.len == 1: result = result.sons[0] proc `/`*(a: openArray[TPeg]): TPeg {. nosideEffect, rtl, extern: "npegsOrderedChoice".} = ## constructs an ordered choice with the PEGs in `a` multipleOp(pkOrderedChoice, addChoice) proc addSequence(dest: var TPeg, elem: TPeg) = var L = dest.len-1 if L >= 0 and dest.sons[L].kind == pkTerminal: case elem.kind of pkTerminal: add(dest.sons[L].term, elem.term) of pkChar: add(dest.sons[L].term, elem.ch) else: add(dest, elem) else: add(dest, elem) proc sequence*(a: openArray[TPeg]): TPeg {. nosideEffect, rtl, extern: "npegs$1".} = ## constructs a sequence with all the PEGs from `a` multipleOp(pkSequence, addSequence) proc `?`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsOptional".} = ## constructs an optional for the PEG `a` if a.kind in {pkOption, pkGreedyRep, pkGreedyAny, pkGreedyRepChar, pkGreedyRepSet}: # a* ? --> a* # a? ? --> a? result = a else: result.kind = pkOption result.sons = @[a] proc `*`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsGreedyRep".} = ## constructs a "greedy repetition" for the PEG `a` case a.kind of pkGreedyRep, pkGreedyRepChar, pkGreedyRepSet, pkGreedyAny, pkOption: assert false # produces endless loop! of pkChar: result.kind = pkGreedyRepChar result.ch = a.ch of pkCharChoice: result.kind = pkGreedyRepSet result.charChoice = a.charChoice # copying a reference suffices! of pkAny, pkAnyRune: result.kind = pkGreedyAny else: result.kind = pkGreedyRep result.sons = @[a] proc `@`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsSearch".} = ## constructs a "search" for the PEG `a` result.kind = pkSearch result.sons = @[a] when false: proc contains(a: TPeg, k: TPegKind): bool = if a.kind == k: return true case a.kind of pkEmpty, pkAny, pkAnyRune, pkGreedyAny, pkNewLine, pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, pkGreedyRepChar, pkCharChoice, pkGreedyRepSet: nil of pkNonTerminal: return true else: for i in 0..a.sons.len-1: if contains(a.sons[i], k): return true proc `+`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsGreedyPosRep".} = ## constructs a "greedy positive repetition" with the PEG `a` return sequence(a, *a) proc `&`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsAndPredicate".} = ## constructs an "and predicate" with the PEG `a` result.kind = pkAndPredicate result.sons = @[a] proc `!`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsNotPredicate".} = ## constructs a "not predicate" with the PEG `a` result.kind = pkNotPredicate result.sons = @[a] proc any*: TPeg {.inline.} = ## constructs the PEG `any character`:idx: (``.``) result.kind = pkAny proc anyRune*: TPeg {.inline.} = ## constructs the PEG `any rune`:idx: (``_``) result.kind = pkAnyRune proc newLine*: TPeg {.inline.} = ## constructs the PEG `newline`:idx: (``\n``) result.kind = pkNewline proc capture*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsCapture".} = ## constructs a capture with the PEG `a` result.kind = pkCapture result.sons = @[a] proc backref*(index: range[1..MaxSubPatterns]): TPeg {. nosideEffect, rtl, extern: "npegs$1".} = ## constructs a back reference of the given `index`. `index` starts counting ## from 1. result.kind = pkBackRef result.index = index-1 proc backrefIgnoreCase*(index: range[1..MaxSubPatterns]): TPeg {. nosideEffect, rtl, extern: "npegs$1".} = ## constructs a back reference of the given `index`. `index` starts counting ## from 1. Ignores case for matching. result.kind = pkBackRefIgnoreCase result.index = index-1 proc backrefIgnoreStyle*(index: range[1..MaxSubPatterns]): TPeg {. nosideEffect, rtl, extern: "npegs$1".}= ## constructs a back reference of the given `index`. `index` starts counting ## from 1. Ignores style for matching. result.kind = pkBackRefIgnoreStyle result.index = index-1 proc spaceCost(n: TPeg): int = case n.kind of pkEmpty: nil of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, pkGreedyRepChar, pkCharChoice, pkGreedyRepSet, pkAny, pkAnyRune, pkNewLine, pkGreedyAny: result = 1 of pkNonTerminal: # we cannot inline a rule with a non-terminal result = InlineThreshold+1 else: for i in 0..n.len-1: inc(result, spaceCost(n.sons[i])) if result >= InlineThreshold: break proc nonterminal*(n: PNonTerminal): TPeg {. nosideEffect, rtl, extern: "npegs$1".} = ## constructs a PEG that consists of the nonterminal symbol assert n != nil if ntDeclared in n.flags and spaceCost(n.rule) < InlineThreshold: when false: echo "inlining symbol: ", n.name result = n.rule # inlining of rule enables better optimizations else: result.kind = pkNonTerminal result.nt = n proc newNonTerminal*(name: string, line, column: int): PNonTerminal {. nosideEffect, rtl, extern: "npegs$1".} = ## constructs a nonterminal symbol new(result) result.name = name result.line = line result.col = column template letters*: expr = ## expands to ``charset({'A'..'Z', 'a'..'z'})`` charset({'A'..'Z', 'a'..'z'}) template digits*: expr = ## expands to ``charset({'0'..'9'})`` charset({'0'..'9'}) template whitespace*: expr = ## expands to ``charset({' ', '\9'..'\13'})`` charset({' ', '\9'..'\13'}) template identChars*: expr = ## expands to ``charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})`` charset({'a'..'z', 'A'..'Z', '0'..'9', '_'}) template identStartChars*: expr = ## expands to ``charset({'A'..'Z', 'a'..'z', '_'})`` charset({'a'..'z', 'A'..'Z', '_'}) template ident*: expr = ## same as ``[a-zA-Z_][a-zA-z_0-9]*``; standard identifier sequence(charset({'a'..'z', 'A'..'Z', '_'}), *charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})) template natural*: expr = ## same as ``\d+`` +digits # ------------------------- debugging ----------------------------------------- proc esc(c: char, reserved = {'\0'..'\255'}): string = case c of '\b': result = "
#
#
#           The Nimrod Compiler
#        (c) Copyright 2012 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

## This module contains the data structures for the semantic checking phase.

import 
  strutils, lists, intsets, options, lexer, ast, astalgo, trees, treetab,
  wordrecg, 
  ropes, msgs, platform, os, condsyms, idents, renderer, types, extccomp, math, 
  magicsys, nversion, nimsets, parser, times, passes, rodread, evals

type 
  TOptionEntry* = object of lists.TListEntry # entries to put on a
                                             # stack for pragma parsing
    options*: TOptions
    defaultCC*: TCallingConvention
    dynlib*: PLib
    Notes*: TNoteKinds

  POptionEntry* = ref TOptionEntry
  PProcCon* = ref TProcCon
  TProcCon*{.final.} = object # procedure context; also used for top-level
                              # statements
    owner*: PSym              # the symbol this context belongs to
    resultSym*: PSym          # the result symbol (if we are in a proc)
    nestedLoopCounter*: int   # whether we are in a loop or not
    nestedBlockCounter*: int  # whether we are in a block or not
    InTryStmt*: int           # whether we are in a try statement; works also
                              # in standalone ``except`` and ``finally``
    next*: PProcCon           # used for stacking procedure contexts
  
  TInstantiatedSymbol* {.final.} = object
    genericSym*, instSym*: PSym
    concreteTypes*: seq[PType]
  
  # If we generate an instance of a generic, we'd like to re-use that
  # instance if possible across module boundaries. However, this is not
  # possible if the compilation cache is enabled. So we give up then and use
  # the caching of generics only per module, not per project.
  TGenericsCache* {.final.} = object
    InstTypes*: TIdTable # map PType to PType
    generics*: seq[TInstantiatedSymbol] # a list of the things to compile
    lastGenericIdx*: int      # used for the generics stack
  
  PGenericsCache* = ref TGenericsCache
  PContext* = ref TContext
  TContext* = object of TPassContext # a context represents a module
    module*: PSym              # the module sym belonging to the context
    p*: PProcCon               # procedure context
    generics*: PGenericsCache  # may point to a global or module-local structure
    friendModule*: PSym        # current friend module; may access private data;
                               # this is used so that generic instantiations
                               # can access private object fields
    InstCounter*: int          # to prevent endless instantiations
   
    threadEntries*: TSymSeq    # list of thread entries to check
    tab*: TSymTab              # each module has its own symbol table
    AmbiguousSymbols*: TIntSet # ids of all ambiguous symbols (cannot
                               # store this info in the syms themselves!)
    InGenericContext*: int     # > 0 if we are in a generic
    InUnrolledContext*: int    # > 0 if we are unrolling a loop
    InCompilesContext*: int    # > 0 if we are in a ``compiles`` magic
    converters*: TSymSeq       # sequence of converters
    optionStack*: TLinkedList
    libs*: TLinkedList         # all libs used by this module
    semConstExpr*: proc (c: PContext, n: PNode): PNode {.nimcall.} # for the pragmas
    semExpr*: proc (c: PContext, n: PNode): PNode {.nimcall.}      # for the pragmas
    semConstBoolExpr*: proc (c: PContext, n: PNode): PNode {.nimcall.} # XXX bite the bullet
    semOverloadedCall*: proc (c: PContext, n, nOrig: PNode,
                              filter: TSymKinds): PNode {.nimcall.}
    includedFiles*: TIntSet    # used to detect recursive include files
    filename*: string          # the module's filename
    userPragmas*: TStrTable
    evalContext*: PEvalContext
    UnknownIdents*: TIntSet    # ids of all unknown identifiers to prevent
                               # naming it multiple times

var
  gGenericsCache: PGenericsCache # save for modularity

proc newGenericsCache*(): PGenericsCache =
  new(result)
  initIdTable(result.InstTypes)
  result.generics = @[]

proc newContext*(module: PSym, nimfile: string): PContext

proc lastOptionEntry*(c: PContext): POptionEntry
proc newOptionEntry*(): POptionEntry
proc addConverter*(c: PContext, conv: PSym)
proc newLib*(kind: TLibKind): PLib
proc addToLib*(lib: PLib, sym: PSym)
proc makePtrType*(c: PContext, baseType: PType): PType
proc makeVarType*(c: PContext, baseType: PType): PType
proc newTypeS*(kind: TTypeKind, c: PContext): PType
proc fillTypeS*(dest: PType, kind: TTypeKind, c: PContext)

# owner handling:
proc getCurrOwner*(): PSym
proc PushOwner*(owner: PSym)
proc PopOwner*()
# implementation

var gOwners*: seq[PSym] = @[]

proc getCurrOwner(): PSym = 
  # owner stack (used for initializing the
  # owner field of syms)
  # the documentation comment always gets
  # assigned to the current owner
  # BUGFIX: global array is needed!
  result = gOwners[high(gOwners)]

proc PushOwner(owner: PSym) = 
  add(gOwners, owner)

proc PopOwner() = 
  var length = len(gOwners)
  if length > 0: setlen(gOwners, length - 1)
  else: InternalError("popOwner")

proc lastOptionEntry(c: PContext): POptionEntry = 
  result = POptionEntry(c.optionStack.tail)

proc pushProcCon*(c: PContext, owner: PSym) {.inline.} = 
  if owner == nil: 
    InternalError("owner is nil")
    return
  var x: PProcCon
  new(x)
  x.owner = owner
  x.next = c.p
  c.p = </