diff options
Diffstat (limited to 'compiler/ic')
-rw-r--r-- | compiler/ic/bitabs.nim | 178 | ||||
-rw-r--r-- | compiler/ic/cbackend.nim | 180 | ||||
-rw-r--r-- | compiler/ic/dce.nim | 169 | ||||
-rw-r--r-- | compiler/ic/design.rst | 56 | ||||
-rw-r--r-- | compiler/ic/ic.nim | 1343 | ||||
-rw-r--r-- | compiler/ic/iclineinfos.nim | 84 | ||||
-rw-r--r-- | compiler/ic/integrity.nim | 155 | ||||
-rw-r--r-- | compiler/ic/navigator.nim | 183 | ||||
-rw-r--r-- | compiler/ic/packed_ast.nim | 367 | ||||
-rw-r--r-- | compiler/ic/replayer.nim | 171 | ||||
-rw-r--r-- | compiler/ic/rodfiles.nim | 283 |
11 files changed, 3169 insertions, 0 deletions
diff --git a/compiler/ic/bitabs.nim b/compiler/ic/bitabs.nim new file mode 100644 index 000000000..0c9994c83 --- /dev/null +++ b/compiler/ic/bitabs.nim @@ -0,0 +1,178 @@ +## A BiTable is a table that can be seen as an optimized pair +## of `(Table[LitId, Val], Table[Val, LitId])`. + +import std/hashes +import rodfiles + +when defined(nimPreviewSlimSystem): + import std/assertions + +type + LitId* = distinct uint32 + + BiTable*[T] = object + vals: seq[T] # indexed by LitId + keys: seq[LitId] # indexed by hash(val) + +proc initBiTable*[T](): BiTable[T] = BiTable[T](vals: @[], keys: @[]) + +proc nextTry(h, maxHash: Hash): Hash {.inline.} = + result = (h + 1) and maxHash + +template maxHash(t): untyped = high(t.keys) +template isFilled(x: LitId): bool = x.uint32 > 0'u32 + +proc `$`*(x: LitId): string {.borrow.} +proc `<`*(x, y: LitId): bool {.borrow.} +proc `<=`*(x, y: LitId): bool {.borrow.} +proc `==`*(x, y: LitId): bool {.borrow.} +proc hash*(x: LitId): Hash {.borrow.} + + +proc len*[T](t: BiTable[T]): int = t.vals.len + +proc mustRehash(length, counter: int): bool {.inline.} = + assert(length > counter) + result = (length * 2 < counter * 3) or (length - counter < 4) + +const + idStart = 1 + +template idToIdx(x: LitId): int = x.int - idStart + +proc hasLitId*[T](t: BiTable[T]; x: LitId): bool = + let idx = idToIdx(x) + result = idx >= 0 and idx < t.vals.len + +proc enlarge[T](t: var BiTable[T]) = + var n: seq[LitId] + newSeq(n, len(t.keys) * 2) + swap(t.keys, n) + for i in 0..high(n): + let eh = n[i] + if isFilled(eh): + var j = hash(t.vals[idToIdx eh]) and maxHash(t) + while isFilled(t.keys[j]): + j = nextTry(j, maxHash(t)) + t.keys[j] = move n[i] + +proc getKeyId*[T](t: BiTable[T]; v: T): LitId = + let origH = hash(v) + var h = origH and maxHash(t) + if t.keys.len != 0: + while true: + let litId = t.keys[h] + if not isFilled(litId): break + if t.vals[idToIdx t.keys[h]] == v: return litId + h = nextTry(h, maxHash(t)) + return LitId(0) + +proc getOrIncl*[T](t: var BiTable[T]; v: T): LitId = + let origH = hash(v) + var h = origH and maxHash(t) + if t.keys.len != 0: + while true: + let litId = t.keys[h] + if not isFilled(litId): break + if t.vals[idToIdx t.keys[h]] == v: return litId + h = nextTry(h, maxHash(t)) + # not found, we need to insert it: + if mustRehash(t.keys.len, t.vals.len): + enlarge(t) + # recompute where to insert: + h = origH and maxHash(t) + while true: + let litId = t.keys[h] + if not isFilled(litId): break + h = nextTry(h, maxHash(t)) + else: + setLen(t.keys, 16) + h = origH and maxHash(t) + + result = LitId(t.vals.len + idStart) + t.keys[h] = result + t.vals.add v + + +proc `[]`*[T](t: var BiTable[T]; litId: LitId): var T {.inline.} = + let idx = idToIdx litId + assert idx < t.vals.len + result = t.vals[idx] + +proc `[]`*[T](t: BiTable[T]; litId: LitId): lent T {.inline.} = + let idx = idToIdx litId + assert idx < t.vals.len + result = t.vals[idx] + +proc hash*[T](t: BiTable[T]): Hash = + ## as the keys are hashes of the values, we simply use them instead + var h: Hash = 0 + for i, n in pairs t.keys: + h = h !& hash((i, n)) + result = !$h + +proc store*[T](f: var RodFile; t: BiTable[T]) = + storeSeq(f, t.vals) + storeSeq(f, t.keys) + +proc load*[T](f: var RodFile; t: var BiTable[T]) = + loadSeq(f, t.vals) + loadSeq(f, t.keys) + +proc sizeOnDisc*(t: BiTable[string]): int = + result = 4 + for x in t.vals: + result += x.len + 4 + result += t.keys.len * sizeof(LitId) + +when isMainModule: + + var t: BiTable[string] + + echo getOrIncl(t, "hello") + + echo getOrIncl(t, "hello") + echo getOrIncl(t, "hello3") + echo getOrIncl(t, "hello4") + echo getOrIncl(t, "helloasfasdfdsa") + echo getOrIncl(t, "hello") + echo getKeyId(t, "hello") + echo getKeyId(t, "none") + + for i in 0 ..< 100_000: + discard t.getOrIncl($i & "___" & $i) + + for i in 0 ..< 100_000: + assert t.getOrIncl($i & "___" & $i).idToIdx == i + 4 + echo "begin" + echo t.vals.len + + echo t.vals[0] + echo t.vals[1004] + + echo "middle" + + var tf: BiTable[float] + + discard tf.getOrIncl(0.4) + discard tf.getOrIncl(16.4) + discard tf.getOrIncl(32.4) + echo getKeyId(tf, 32.4) + + var f2 = open("testblah.bin", fmWrite) + echo store(f2, tf) + f2.close + + var f1 = open("testblah.bin", fmRead) + + var t2: BiTable[float] + + echo f1.load(t2) + echo t2.vals.len + + echo getKeyId(t2, 32.4) + + echo "end" + + + f1.close diff --git a/compiler/ic/cbackend.nim b/compiler/ic/cbackend.nim new file mode 100644 index 000000000..83f1b4cc7 --- /dev/null +++ b/compiler/ic/cbackend.nim @@ -0,0 +1,180 @@ +# +# +# The Nim Compiler +# (c) Copyright 2021 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## New entry point into our C/C++ code generator. Ideally +## somebody would rewrite the old backend (which is 8000 lines of crufty Nim code) +## to work on packed trees directly and produce the C code as an AST which can +## then be rendered to text in a very simple manner. Unfortunately nobody wrote +## this code. So instead we wrap the existing cgen.nim and its friends so that +## we call directly into the existing code generation logic but avoiding the +## naive, outdated `passes` design. Thus you will see some +## `useAliveDataFromDce in flags` checks in the old code -- the old code is +## also doing cross-module dependency tracking and DCE that we don't need +## anymore. DCE is now done as prepass over the entire packed module graph. + +import std/[packedsets, algorithm, tables] + +when defined(nimPreviewSlimSystem): + import std/assertions + +import ".."/[ast, options, lineinfos, modulegraphs, cgendata, cgen, + pathutils, extccomp, msgs, modulepaths] + +import packed_ast, ic, dce, rodfiles + +proc unpackTree(g: ModuleGraph; thisModule: int; + tree: PackedTree; n: NodePos): PNode = + var decoder = initPackedDecoder(g.config, g.cache) + result = loadNodes(decoder, g.packed, thisModule, tree, n) + +proc setupBackendModule(g: ModuleGraph; m: var LoadedModule) = + if g.backend == nil: + g.backend = cgendata.newModuleList(g) + assert g.backend != nil + var bmod = cgen.newModule(BModuleList(g.backend), m.module, g.config) + bmod.idgen = idgenFromLoadedModule(m) + +proc generateCodeForModule(g: ModuleGraph; m: var LoadedModule; alive: var AliveSyms) = + var bmod = BModuleList(g.backend).modules[m.module.position] + assert bmod != nil + bmod.flags.incl useAliveDataFromDce + bmod.alive = move alive[m.module.position] + + for p in allNodes(m.fromDisk.topLevel): + let n = unpackTree(g, m.module.position, m.fromDisk.topLevel, p) + cgen.genTopLevelStmt(bmod, n) + + finalCodegenActions(g, bmod, newNodeI(nkStmtList, m.module.info)) + for disp in getDispatchers(g): + genProcAux(bmod, disp) + m.fromDisk.backendFlags = cgen.whichInitProcs(bmod) + +proc replayTypeInfo(g: ModuleGraph; m: var LoadedModule; origin: FileIndex) = + for x in mitems(m.fromDisk.emittedTypeInfo): + #echo "found type ", x, " for file ", int(origin) + g.emittedTypeInfo[x] = origin + +proc addFileToLink(config: ConfigRef; m: PSym) = + let filename = AbsoluteFile toFullPath(config, m.position.FileIndex) + let ext = + if config.backend == backendCpp: ".nim.cpp" + elif config.backend == backendObjc: ".nim.m" + else: ".nim.c" + let cfile = changeFileExt(completeCfilePath(config, + mangleModuleName(config, filename).AbsoluteFile), ext) + let objFile = completeCfilePath(config, toObjFile(config, cfile)) + if fileExists(objFile): + var cf = Cfile(nimname: m.name.s, cname: cfile, + obj: objFile, + flags: {CfileFlag.Cached}) + addFileToCompile(config, cf) + +when defined(debugDce): + import os, std/packedsets + +proc storeAliveSymsImpl(asymFile: AbsoluteFile; s: seq[int32]) = + var f = rodfiles.create(asymFile.string) + f.storeHeader() + f.storeSection aliveSymsSection + f.storeSeq(s) + close f + +template prepare {.dirty.} = + let asymFile = toRodFile(config, AbsoluteFile toFullPath(config, position.FileIndex), ".alivesyms") + var s = newSeqOfCap[int32](alive[position].len) + for a in items(alive[position]): s.add int32(a) + sort(s) + +proc storeAliveSyms(config: ConfigRef; position: int; alive: AliveSyms) = + prepare() + storeAliveSymsImpl(asymFile, s) + +proc aliveSymsChanged(config: ConfigRef; position: int; alive: AliveSyms): bool = + prepare() + var f2 = rodfiles.open(asymFile.string) + f2.loadHeader() + f2.loadSection aliveSymsSection + var oldData: seq[int32] = @[] + f2.loadSeq(oldData) + f2.close + if f2.err == ok and oldData == s: + result = false + else: + when defined(debugDce): + let oldAsSet = toPackedSet[int32](oldData) + let newAsSet = toPackedSet[int32](s) + echo "set of live symbols changed ", asymFile.changeFileExt("rod"), " ", position, " ", f2.err + echo "in old but not in new ", oldAsSet.difference(newAsSet), " number of entries in old ", oldAsSet.len + echo "in new but not in old ", newAsSet.difference(oldAsSet), " number of entries in new ", newAsSet.len + #if execShellCmd(getAppFilename() & " rod " & quoteShell(asymFile.changeFileExt("rod"))) != 0: + # echo "command failed" + result = true + storeAliveSymsImpl(asymFile, s) + +proc genPackedModule(g: ModuleGraph, i: int; alive: var AliveSyms) = + # case statement here to enforce exhaustive checks. + case g.packed[i].status + of undefined: + discard "nothing to do" + of loading, stored: + assert false + of storing, outdated: + storeAliveSyms(g.config, g.packed[i].module.position, alive) + generateCodeForModule(g, g.packed[i], alive) + closeRodFile(g, g.packed[i].module) + of loaded: + if g.packed[i].loadedButAliveSetChanged: + generateCodeForModule(g, g.packed[i], alive) + else: + addFileToLink(g.config, g.packed[i].module) + replayTypeInfo(g, g.packed[i], FileIndex(i)) + + if g.backend == nil: + g.backend = cgendata.newModuleList(g) + registerInitProcs(BModuleList(g.backend), g.packed[i].module, g.packed[i].fromDisk.backendFlags) + +proc generateCode*(g: ModuleGraph) = + ## The single entry point, generate C(++) code for the entire + ## Nim program aka `ModuleGraph`. + resetForBackend(g) + var alive = computeAliveSyms(g.packed, g.config) + + when false: + for i in 0..<len(g.packed): + echo i, " is of status ", g.packed[i].status, " ", toFullPath(g.config, FileIndex(i)) + + # First pass: Setup all the backend modules for all the modules that have + # changed: + for i in 0..<len(g.packed): + # case statement here to enforce exhaustive checks. + case g.packed[i].status + of undefined: + discard "nothing to do" + of loading, stored: + assert false + of storing, outdated: + setupBackendModule(g, g.packed[i]) + of loaded: + # Even though this module didn't change, DCE might trigger a change. + # Consider this case: Module A uses symbol S from B and B does not use + # S itself. A is then edited not to use S either. Thus we have to + # recompile B in order to remove S from the final result. + if aliveSymsChanged(g.config, g.packed[i].module.position, alive): + g.packed[i].loadedButAliveSetChanged = true + setupBackendModule(g, g.packed[i]) + + # Second pass: Code generation. + let mainModuleIdx = g.config.projectMainIdx2.int + # We need to generate the main module last, because only then + # all init procs have been registered: + for i in 0..<len(g.packed): + if i != mainModuleIdx: + genPackedModule(g, i, alive) + if mainModuleIdx >= 0: + genPackedModule(g, mainModuleIdx, alive) diff --git a/compiler/ic/dce.nim b/compiler/ic/dce.nim new file mode 100644 index 000000000..6eb36431e --- /dev/null +++ b/compiler/ic/dce.nim @@ -0,0 +1,169 @@ +# +# +# The Nim Compiler +# (c) Copyright 2021 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Dead code elimination (=DCE) for IC. + +import std/[intsets, tables] + +when defined(nimPreviewSlimSystem): + import std/assertions + +import ".." / [ast, options, lineinfos, types] + +import packed_ast, ic, bitabs + +type + AliveSyms* = seq[IntSet] + AliveContext* = object ## Purpose is to fill the 'alive' field. + stack: seq[(int, TOptions, NodePos)] ## A stack for marking symbols as alive. + decoder: PackedDecoder ## We need a PackedDecoder for module ID address translations. + thisModule: int ## The module we're currently analysing for DCE. + alive: AliveSyms ## The final result of our computation. + options: TOptions + compilerProcs: Table[string, (int, int32)] + +proc isExportedToC(c: var AliveContext; g: PackedModuleGraph; symId: int32): bool = + ## "Exported to C" procs are special (these are marked with '.exportc') because these + ## must not be optimized away! + let symPtr = unsafeAddr g[c.thisModule].fromDisk.syms[symId] + let flags = symPtr.flags + # due to a bug/limitation in the lambda lifting, unused inner procs + # are not transformed correctly; issue (#411). However, the whole purpose here + # is to eliminate unused procs. So there is no special logic required for this case. + if sfCompileTime notin flags: + if ({sfExportc, sfCompilerProc} * flags != {}) or + (symPtr.kind == skMethod): + result = true + else: + result = false + # XXX: This used to be a condition to: + # (sfExportc in prc.flags and lfExportLib in prc.loc.flags) or + if sfCompilerProc in flags: + c.compilerProcs[g[c.thisModule].fromDisk.strings[symPtr.name]] = (c.thisModule, symId) + else: + result = false + +template isNotGeneric(n: NodePos): bool = ithSon(tree, n, genericParamsPos).kind == nkEmpty + +proc followLater(c: var AliveContext; g: PackedModuleGraph; module: int; item: int32) = + ## Marks a symbol 'item' as used and later in 'followNow' the symbol's body will + ## be analysed. + if not c.alive[module].containsOrIncl(item): + var body = g[module].fromDisk.syms[item].ast + if body != emptyNodeId: + let opt = g[module].fromDisk.syms[item].options + if g[module].fromDisk.syms[item].kind in routineKinds: + body = NodeId ithSon(g[module].fromDisk.bodies, NodePos body, bodyPos) + c.stack.add((module, opt, NodePos(body))) + + when false: + let nid = g[module].fromDisk.syms[item].name + if nid != LitId(0): + let name = g[module].fromDisk.strings[nid] + if name in ["nimFrame", "callDepthLimitReached"]: + echo "I was called! ", name, " body exists: ", body != emptyNodeId, " ", module, " ", item + +proc requestCompilerProc(c: var AliveContext; g: PackedModuleGraph; name: string) = + let (module, item) = c.compilerProcs[name] + followLater(c, g, module, item) + +proc loadTypeKind(t: PackedItemId; c: AliveContext; g: PackedModuleGraph; toSkip: set[TTypeKind]): TTypeKind = + template kind(t: ItemId): TTypeKind = g[t.module].fromDisk.types[t.item].kind + + var t2 = translateId(t, g, c.thisModule, c.decoder.config) + result = t2.kind + while result in toSkip: + t2 = translateId(g[t2.module].fromDisk.types[t2.item].types[^1], g, t2.module, c.decoder.config) + result = t2.kind + +proc rangeCheckAnalysis(c: var AliveContext; g: PackedModuleGraph; tree: PackedTree; n: NodePos) = + ## Replicates the logic of `ccgexprs.genRangeChck`. + ## XXX Refactor so that the duplicated logic is avoided. However, for now it's not clear + ## the approach has enough merit. + var dest = loadTypeKind(n.typ, c, g, abstractVar) + if optRangeCheck notin c.options or dest in {tyUInt..tyUInt64}: + discard "no need to generate a check because it was disabled" + else: + let n0t = loadTypeKind(n.firstSon.typ, c, g, {}) + if n0t in {tyUInt, tyUInt64}: + c.requestCompilerProc(g, "raiseRangeErrorNoArgs") + else: + let raiser = + case loadTypeKind(n.typ, c, g, abstractVarRange) + of tyUInt..tyUInt64, tyChar: "raiseRangeErrorU" + of tyFloat..tyFloat128: "raiseRangeErrorF" + else: "raiseRangeErrorI" + c.requestCompilerProc(g, raiser) + +proc aliveCode(c: var AliveContext; g: PackedModuleGraph; tree: PackedTree; n: NodePos) = + ## Marks the symbols we encounter when we traverse the AST at `tree[n]` as alive, unless + ## it is purely in a declarative context (type section etc.). + case n.kind + of nkNone..pred(nkSym), succ(nkSym)..nkNilLit: + discard "ignore non-sym atoms" + of nkSym: + # This symbol is alive and everything its body references. + followLater(c, g, c.thisModule, tree[n].soperand) + of nkModuleRef: + let (n1, n2) = sons2(tree, n) + assert n1.kind == nkNone + assert n2.kind == nkNone + let m = n1.litId + let item = tree[n2].soperand + let otherModule = toFileIndexCached(c.decoder, g, c.thisModule, m).int + followLater(c, g, otherModule, item) + of nkMacroDef, nkTemplateDef, nkTypeSection, nkTypeOfExpr, + nkCommentStmt, nkIncludeStmt, + nkImportStmt, nkImportExceptStmt, nkExportStmt, nkExportExceptStmt, + nkFromStmt, nkStaticStmt: + discard + of nkVarSection, nkLetSection, nkConstSection: + # XXX ignore the defining local variable name? + for son in sonsReadonly(tree, n): + aliveCode(c, g, tree, son) + of nkChckRangeF, nkChckRange64, nkChckRange: + rangeCheckAnalysis(c, g, tree, n) + of nkProcDef, nkConverterDef, nkMethodDef, nkFuncDef, nkIteratorDef: + if n.firstSon.kind == nkSym and isNotGeneric(n): + let item = tree[n.firstSon].soperand + if isExportedToC(c, g, item): + # This symbol is alive and everything its body references. + followLater(c, g, c.thisModule, item) + else: + for son in sonsReadonly(tree, n): + aliveCode(c, g, tree, son) + +proc followNow(c: var AliveContext; g: PackedModuleGraph) = + ## Mark all entries in the stack. Marking can add more entries + ## to the stack but eventually we have looked at every alive symbol. + while c.stack.len > 0: + let (modId, opt, ast) = c.stack.pop() + c.thisModule = modId + c.options = opt + aliveCode(c, g, g[modId].fromDisk.bodies, ast) + +proc computeAliveSyms*(g: PackedModuleGraph; conf: ConfigRef): AliveSyms = + ## Entry point for our DCE algorithm. + var c = AliveContext(stack: @[], decoder: PackedDecoder(config: conf), + thisModule: -1, alive: newSeq[IntSet](g.len), + options: conf.options) + for i in countdown(len(g)-1, 0): + if g[i].status != undefined: + c.thisModule = i + for p in allNodes(g[i].fromDisk.topLevel): + aliveCode(c, g, g[i].fromDisk.topLevel, p) + + followNow(c, g) + result = move(c.alive) + +proc isAlive*(a: AliveSyms; module: int, item: int32): bool = + ## Backends use this to query if a symbol is `alive` which means + ## we need to produce (C/C++/etc) code for it. + result = a[module].contains(item) + diff --git a/compiler/ic/design.rst b/compiler/ic/design.rst new file mode 100644 index 000000000..b096e3103 --- /dev/null +++ b/compiler/ic/design.rst @@ -0,0 +1,56 @@ +==================================== + Incremental Recompilations +==================================== + +We split the Nim compiler into a frontend and a backend. +The frontend produces a set of `.rod` files. Every `.nim` module +produces its own `.rod` file. + +- The IR must be a faithful representation of the AST in memory. +- The backend can do its own caching but doesn't have to. In the + current implementation the backend also caches its results. + +Advantage of the "set of files" vs the previous global database: +- By construction, we either read from the `.rod` file or from the + `.nim` file, there can be no inconsistency. There can also be no + partial updates. +- No dependency to external packages (SQLite). SQLite simply is too + slow and the old way of serialization was too slow too. We use a + format designed for Nim and expect to base further tools on this + file format. + +References to external modules must be (moduleId, symId) pairs. +The symbol IDs are module specific. This way no global ID increment +mechanism needs to be implemented that we could get wrong. ModuleIds +are rod-file specific too. + + + +Global state +------------ + +There is no global state. + +Rod File Format +--------------- + +It's a simple binary file format. `rodfiles.nim` contains some details. + + +Backend +------- + +Nim programmers have to come to enjoy whole-program dead code elimination, +by default. Since this is a "whole program" optimization, it does break +modularity. However, thanks to the packed AST representation we can perform +this global analysis without having to unpack anything. This is basically +a mark&sweep GC algorithm: + +- Start with the top level statements. Every symbol that is referenced + from a top level statement is not "dead" and needs to be compiled by + the backend. +- Every symbol referenced from a referenced symbol also has to be + compiled. + +Caching logic: Only if the set of alive symbols is different from the +last run, the module has to be regenerated. diff --git a/compiler/ic/ic.nim b/compiler/ic/ic.nim new file mode 100644 index 000000000..8e81633ef --- /dev/null +++ b/compiler/ic/ic.nim @@ -0,0 +1,1343 @@ +# +# +# The Nim Compiler +# (c) Copyright 2020 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +import std/[hashes, tables, intsets, monotimes] +import packed_ast, bitabs, rodfiles +import ".." / [ast, idents, lineinfos, msgs, ropes, options, + pathutils, condsyms, packages, modulepaths] +#import ".." / [renderer, astalgo] +from std/os import removeFile, isAbsolute + +import ../../dist/checksums/src/checksums/sha1 + +import iclineinfos + +when defined(nimPreviewSlimSystem): + import std/[syncio, assertions, formatfloat] + +type + PackedConfig* = object + backend: TBackend + selectedGC: TGCMode + cCompiler: TSystemCC + options: TOptions + globalOptions: TGlobalOptions + + ModuleBackendFlag* = enum + HasDatInitProc + HasModuleInitProc + + PackedModule* = object ## the parts of a PackedEncoder that are part of the .rod file + definedSymbols: string + moduleFlags: TSymFlags + includes*: seq[(LitId, string)] # first entry is the module filename itself + imports: seq[LitId] # the modules this module depends on + toReplay*: PackedTree # pragmas and VM specific state to replay. + topLevel*: PackedTree # top level statements + bodies*: PackedTree # other trees. Referenced from typ.n and sym.ast by their position. + #producedGenerics*: Table[GenericKey, SymId] + exports*: seq[(LitId, int32)] + hidden: seq[(LitId, int32)] + reexports: seq[(LitId, PackedItemId)] + compilerProcs*: seq[(LitId, int32)] + converters*, methods*, trmacros*, pureEnums*: seq[int32] + + typeInstCache*: seq[(PackedItemId, PackedItemId)] + procInstCache*: seq[PackedInstantiation] + attachedOps*: seq[(PackedItemId, TTypeAttachedOp, PackedItemId)] + methodsPerGenericType*: seq[(PackedItemId, int, PackedItemId)] + enumToStringProcs*: seq[(PackedItemId, PackedItemId)] + methodsPerType*: seq[(PackedItemId, PackedItemId)] + dispatchers*: seq[PackedItemId] + + emittedTypeInfo*: seq[string] + backendFlags*: set[ModuleBackendFlag] + + syms*: OrderedTable[int32, PackedSym] + types*: OrderedTable[int32, PackedType] + strings*: BiTable[string] # we could share these between modules. + numbers*: BiTable[BiggestInt] # we also store floats in here so + # that we can assure that every bit is kept + man*: LineInfoManager + + cfg: PackedConfig + + PackedEncoder* = object + #m*: PackedModule + thisModule*: int32 + lastFile*: FileIndex # remember the last lookup entry. + lastLit*: LitId + filenames*: Table[FileIndex, LitId] + pendingTypes*: seq[PType] + pendingSyms*: seq[PSym] + typeMarker*: IntSet #Table[ItemId, TypeId] # ItemId.item -> TypeId + symMarker*: IntSet #Table[ItemId, SymId] # ItemId.item -> SymId + config*: ConfigRef + +proc toString*(tree: PackedTree; pos: NodePos; m: PackedModule; nesting: int; + result: var string) = + if result.len > 0 and result[^1] notin {' ', '\n'}: + result.add ' ' + + result.add $tree[pos].kind + case tree[pos].kind + of nkEmpty, nkNilLit, nkType: discard + of nkIdent, nkStrLit..nkTripleStrLit: + result.add " " + result.add m.strings[LitId tree[pos].uoperand] + of nkSym: + result.add " " + result.add m.strings[m.syms[tree[pos].soperand].name] + of directIntLit: + result.add " " + result.addInt tree[pos].soperand + of externSIntLit: + result.add " " + result.addInt m.numbers[LitId tree[pos].uoperand] + of externUIntLit: + result.add " " + result.addInt cast[uint64](m.numbers[LitId tree[pos].uoperand]) + of nkFloatLit..nkFloat128Lit: + result.add " " + result.addFloat cast[BiggestFloat](m.numbers[LitId tree[pos].uoperand]) + else: + result.add "(\n" + for i in 1..(nesting+1)*2: result.add ' ' + for child in sonsReadonly(tree, pos): + toString(tree, child, m, nesting + 1, result) + result.add "\n" + for i in 1..nesting*2: result.add ' ' + result.add ")" + #for i in 1..nesting*2: result.add ' ' + +proc toString*(tree: PackedTree; n: NodePos; m: PackedModule): string = + result = "" + toString(tree, n, m, 0, result) + +proc debug*(tree: PackedTree; m: PackedModule) = + stdout.write toString(tree, NodePos 0, m) + +proc isActive*(e: PackedEncoder): bool = e.config != nil +proc disable(e: var PackedEncoder) = e.config = nil + +template primConfigFields(fn: untyped) {.dirty.} = + fn backend + fn selectedGC + fn cCompiler + fn options + fn globalOptions + +proc definedSymbolsAsString(config: ConfigRef): string = + result = newStringOfCap(200) + result.add "config" + for d in definedSymbolNames(config.symbols): + result.add ' ' + result.add d + +proc rememberConfig(c: var PackedEncoder; m: var PackedModule; config: ConfigRef; pc: PackedConfig) = + m.definedSymbols = definedSymbolsAsString(config) + #template rem(x) = + # c.m.cfg.x = config.x + #primConfigFields rem + m.cfg = pc + +const + debugConfigDiff = defined(debugConfigDiff) + +when debugConfigDiff: + import hashes, tables, intsets, sha1, strutils, sets + +proc configIdentical(m: PackedModule; config: ConfigRef): bool = + result = m.definedSymbols == definedSymbolsAsString(config) + when debugConfigDiff: + if not result: + var wordsA = m.definedSymbols.split(Whitespace).toHashSet() + var wordsB = definedSymbolsAsString(config).split(Whitespace).toHashSet() + for c in wordsA - wordsB: + echo "in A but not in B ", c + for c in wordsB - wordsA: + echo "in B but not in A ", c + template eq(x) = + result = result and m.cfg.x == config.x + when debugConfigDiff: + if m.cfg.x != config.x: + echo "B ", m.cfg.x, " ", config.x + primConfigFields eq + +proc rememberStartupConfig*(dest: var PackedConfig, config: ConfigRef) = + template rem(x) = + dest.x = config.x + primConfigFields rem + dest.globalOptions.excl optForceFullMake + +proc hashFileCached(conf: ConfigRef; fileIdx: FileIndex): string = + result = msgs.getHash(conf, fileIdx) + if result.len == 0: + let fullpath = msgs.toFullPath(conf, fileIdx) + result = $secureHashFile(fullpath) + msgs.setHash(conf, fileIdx, result) + +proc toLitId(x: FileIndex; c: var PackedEncoder; m: var PackedModule): LitId = + ## store a file index as a literal + if x == c.lastFile: + result = c.lastLit + else: + result = c.filenames.getOrDefault(x) + if result == LitId(0): + let p = msgs.toFullPath(c.config, x) + result = getOrIncl(m.strings, p) + c.filenames[x] = result + c.lastFile = x + c.lastLit = result + assert result != LitId(0) + +proc toFileIndex*(x: LitId; m: PackedModule; config: ConfigRef): FileIndex = + result = msgs.fileInfoIdx(config, AbsoluteFile m.strings[x]) + +proc includesIdentical(m: var PackedModule; config: ConfigRef): bool = + for it in mitems(m.includes): + if hashFileCached(config, toFileIndex(it[0], m, config)) != it[1]: + return false + result = true + +proc initEncoder*(c: var PackedEncoder; m: var PackedModule; moduleSym: PSym; config: ConfigRef; pc: PackedConfig) = + ## setup a context for serializing to packed ast + c.thisModule = moduleSym.itemId.module + c.config = config + m.moduleFlags = moduleSym.flags + m.bodies = newTreeFrom(m.topLevel) + m.toReplay = newTreeFrom(m.topLevel) + + c.lastFile = FileIndex(-10) + + let thisNimFile = FileIndex c.thisModule + var h = msgs.getHash(config, thisNimFile) + if h.len == 0: + let fullpath = msgs.toFullPath(config, thisNimFile) + if isAbsolute(fullpath): + # For NimScript compiler API support the main Nim file might be from a stream. + h = $secureHashFile(fullpath) + msgs.setHash(config, thisNimFile, h) + m.includes.add((toLitId(thisNimFile, c, m), h)) # the module itself + + rememberConfig(c, m, config, pc) + +proc addIncludeFileDep*(c: var PackedEncoder; m: var PackedModule; f: FileIndex) = + m.includes.add((toLitId(f, c, m), hashFileCached(c.config, f))) + +proc addImportFileDep*(c: var PackedEncoder; m: var PackedModule; f: FileIndex) = + m.imports.add toLitId(f, c, m) + +proc addHidden*(c: var PackedEncoder; m: var PackedModule; s: PSym) = + assert s.kind != skUnknown + let nameId = getOrIncl(m.strings, s.name.s) + m.hidden.add((nameId, s.itemId.item)) + assert s.itemId.module == c.thisModule + +proc addExported*(c: var PackedEncoder; m: var PackedModule; s: PSym) = + assert s.kind != skUnknown + assert s.itemId.module == c.thisModule + let nameId = getOrIncl(m.strings, s.name.s) + m.exports.add((nameId, s.itemId.item)) + +proc addConverter*(c: var PackedEncoder; m: var PackedModule; s: PSym) = + assert c.thisModule == s.itemId.module + m.converters.add(s.itemId.item) + +proc addTrmacro*(c: var PackedEncoder; m: var PackedModule; s: PSym) = + m.trmacros.add(s.itemId.item) + +proc addPureEnum*(c: var PackedEncoder; m: var PackedModule; s: PSym) = + assert s.kind == skType + m.pureEnums.add(s.itemId.item) + +proc addMethod*(c: var PackedEncoder; m: var PackedModule; s: PSym) = + m.methods.add s.itemId.item + +proc addReexport*(c: var PackedEncoder; m: var PackedModule; s: PSym) = + assert s.kind != skUnknown + if s.kind == skModule: return + let nameId = getOrIncl(m.strings, s.name.s) + m.reexports.add((nameId, PackedItemId(module: toLitId(s.itemId.module.FileIndex, c, m), + item: s.itemId.item))) + +proc addCompilerProc*(c: var PackedEncoder; m: var PackedModule; s: PSym) = + let nameId = getOrIncl(m.strings, s.name.s) + m.compilerProcs.add((nameId, s.itemId.item)) + +proc toPackedNode*(n: PNode; ir: var PackedTree; c: var PackedEncoder; m: var PackedModule) +proc storeSym*(s: PSym; c: var PackedEncoder; m: var PackedModule): PackedItemId +proc storeType(t: PType; c: var PackedEncoder; m: var PackedModule): PackedItemId + +proc flush(c: var PackedEncoder; m: var PackedModule) = + ## serialize any pending types or symbols from the context + while true: + if c.pendingTypes.len > 0: + discard storeType(c.pendingTypes.pop, c, m) + elif c.pendingSyms.len > 0: + discard storeSym(c.pendingSyms.pop, c, m) + else: + break + +proc toLitId(x: string; m: var PackedModule): LitId = + ## store a string as a literal + result = getOrIncl(m.strings, x) + +proc toLitId(x: BiggestInt; m: var PackedModule): LitId = + ## store an integer as a literal + result = getOrIncl(m.numbers, x) + +proc toPackedInfo(x: TLineInfo; c: var PackedEncoder; m: var PackedModule): PackedLineInfo = + pack(m.man, toLitId(x.fileIndex, c, m), x.line.int32, x.col.int32) + #PackedLineInfo(line: x.line, col: x.col, file: toLitId(x.fileIndex, c, m)) + +proc safeItemId(s: PSym; c: var PackedEncoder; m: var PackedModule): PackedItemId {.inline.} = + ## given a symbol, produce an ItemId with the correct properties + ## for local or remote symbols, packing the symbol as necessary + if s == nil or s.kind == skPackage: + result = nilItemId + #elif s.itemId.module == c.thisModule: + # result = PackedItemId(module: LitId(0), item: s.itemId.item) + else: + assert int(s.itemId.module) >= 0 + result = PackedItemId(module: toLitId(s.itemId.module.FileIndex, c, m), + item: s.itemId.item) + +proc addMissing(c: var PackedEncoder; p: PSym) = + ## consider queuing a symbol for later addition to the packed tree + if p != nil and p.itemId.module == c.thisModule: + if p.itemId.item notin c.symMarker: + if not (sfForward in p.flags and p.kind in routineKinds): + c.pendingSyms.add p + +proc addMissing(c: var PackedEncoder; p: PType) = + ## consider queuing a type for later addition to the packed tree + if p != nil and p.uniqueId.module == c.thisModule: + if p.uniqueId.item notin c.typeMarker: + c.pendingTypes.add p + +template storeNode(dest, src, field) = + var nodeId: NodeId + if src.field != nil: + nodeId = getNodeId(m.bodies) + toPackedNode(src.field, m.bodies, c, m) + else: + nodeId = emptyNodeId + dest.field = nodeId + +proc storeTypeLater(t: PType; c: var PackedEncoder; m: var PackedModule): PackedItemId = + # We store multiple different trees in m.bodies. For this to work out, we + # cannot immediately store types/syms. We enqueue them instead to ensure + # we only write one tree into m.bodies after the other. + if t.isNil: return nilItemId + + assert t.uniqueId.module >= 0 + assert t.uniqueId.item > 0 + result = PackedItemId(module: toLitId(t.uniqueId.module.FileIndex, c, m), item: t.uniqueId.item) + if t.uniqueId.module == c.thisModule: + # the type belongs to this module, so serialize it here, eventually. + addMissing(c, t) + +proc storeSymLater(s: PSym; c: var PackedEncoder; m: var PackedModule): PackedItemId = + if s.isNil: return nilItemId + assert s.itemId.module >= 0 + assert s.itemId.item >= 0 + result = PackedItemId(module: toLitId(s.itemId.module.FileIndex, c, m), item: s.itemId.item) + if s.itemId.module == c.thisModule: + # the sym belongs to this module, so serialize it here, eventually. + addMissing(c, s) + +proc storeType(t: PType; c: var PackedEncoder; m: var PackedModule): PackedItemId = + ## serialize a ptype + if t.isNil: return nilItemId + + assert t.uniqueId.module >= 0 + assert t.uniqueId.item > 0 + result = PackedItemId(module: toLitId(t.uniqueId.module.FileIndex, c, m), item: t.uniqueId.item) + + if t.uniqueId.module == c.thisModule and not c.typeMarker.containsOrIncl(t.uniqueId.item): + #if t.uniqueId.item >= m.types.len: + # setLen m.types, t.uniqueId.item+1 + + var p = PackedType(id: t.uniqueId.item, kind: t.kind, flags: t.flags, callConv: t.callConv, + size: t.size, align: t.align, nonUniqueId: t.itemId.item, + paddingAtEnd: t.paddingAtEnd) + storeNode(p, t, n) + p.typeInst = t.typeInst.storeType(c, m) + for kid in kids t: + p.types.add kid.storeType(c, m) + c.addMissing t.sym + p.sym = t.sym.safeItemId(c, m) + c.addMissing t.owner + p.owner = t.owner.safeItemId(c, m) + + # fill the reserved slot, nothing else: + m.types[t.uniqueId.item] = p + +proc toPackedLib(l: PLib; c: var PackedEncoder; m: var PackedModule): PackedLib = + ## the plib hangs off the psym via the .annex field + if l.isNil: return + result = PackedLib(kind: l.kind, generated: l.generated, + isOverridden: l.isOverridden, name: toLitId($l.name, m) + ) + storeNode(result, l, path) + +proc storeSym*(s: PSym; c: var PackedEncoder; m: var PackedModule): PackedItemId = + ## serialize a psym + if s.isNil: return nilItemId + + assert s.itemId.module >= 0 + result = PackedItemId(module: toLitId(s.itemId.module.FileIndex, c, m), item: s.itemId.item) + + if s.itemId.module == c.thisModule and not c.symMarker.containsOrIncl(s.itemId.item): + #if s.itemId.item >= m.syms.len: + # setLen m.syms, s.itemId.item+1 + + assert sfForward notin s.flags + + var p = PackedSym(id: s.itemId.item, kind: s.kind, flags: s.flags, info: s.info.toPackedInfo(c, m), magic: s.magic, + position: s.position, offset: s.offset, disamb: s.disamb, options: s.options, + name: s.name.s.toLitId(m)) + + storeNode(p, s, ast) + storeNode(p, s, constraint) + + if s.kind in {skLet, skVar, skField, skForVar}: + c.addMissing s.guard + p.guard = s.guard.safeItemId(c, m) + p.bitsize = s.bitsize + p.alignment = s.alignment + + p.externalName = toLitId(s.loc.snippet, m) + p.locFlags = s.loc.flags + c.addMissing s.typ + p.typ = s.typ.storeType(c, m) + c.addMissing s.owner + p.owner = s.owner.safeItemId(c, m) + p.annex = toPackedLib(s.annex, c, m) + when hasFFI: + p.cname = toLitId(s.cname, m) + p.instantiatedFrom = s.instantiatedFrom.safeItemId(c, m) + + # fill the reserved slot, nothing else: + m.syms[s.itemId.item] = p + +proc addModuleRef(n: PNode; ir: var PackedTree; c: var PackedEncoder; m: var PackedModule) = + ## add a remote symbol reference to the tree + let info = n.info.toPackedInfo(c, m) + if n.typ != n.sym.typ: + ir.addNode(kind = nkModuleRef, operand = 3.int32, # spans 3 nodes in total + info = info, flags = n.flags, + typeId = storeTypeLater(n.typ, c, m)) + else: + ir.addNode(kind = nkModuleRef, operand = 3.int32, # spans 3 nodes in total + info = info, flags = n.flags) + ir.addNode(kind = nkNone, info = info, + operand = toLitId(n.sym.itemId.module.FileIndex, c, m).int32) + ir.addNode(kind = nkNone, info = info, + operand = n.sym.itemId.item) + +proc toPackedNode*(n: PNode; ir: var PackedTree; c: var PackedEncoder; m: var PackedModule) = + ## serialize a node into the tree + if n == nil: + ir.addNode(kind = nkNilRodNode, operand = 1, info = NoLineInfo) + return + let info = toPackedInfo(n.info, c, m) + case n.kind + of nkNone, nkEmpty, nkNilLit, nkType: + ir.addNode(kind = n.kind, flags = n.flags, operand = 0, + typeId = storeTypeLater(n.typ, c, m), info = info) + of nkIdent: + ir.addNode(kind = n.kind, flags = n.flags, + operand = int32 getOrIncl(m.strings, n.ident.s), + typeId = storeTypeLater(n.typ, c, m), info = info) + of nkSym: + if n.sym.itemId.module == c.thisModule: + # it is a symbol that belongs to the module we're currently + # packing: + let id = n.sym.storeSymLater(c, m).item + if n.typ != n.sym.typ: + ir.addNode(kind = nkSym, flags = n.flags, operand = id, + info = info, + typeId = storeTypeLater(n.typ, c, m)) + else: + ir.addNode(kind = nkSym, flags = n.flags, operand = id, + info = info) + else: + # store it as an external module reference: + addModuleRef(n, ir, c, m) + of externIntLit: + ir.addNode(kind = n.kind, flags = n.flags, + operand = int32 getOrIncl(m.numbers, n.intVal), + typeId = storeTypeLater(n.typ, c, m), info = info) + of nkStrLit..nkTripleStrLit: + ir.addNode(kind = n.kind, flags = n.flags, + operand = int32 getOrIncl(m.strings, n.strVal), + typeId = storeTypeLater(n.typ, c, m), info = info) + of nkFloatLit..nkFloat128Lit: + ir.addNode(kind = n.kind, flags = n.flags, + operand = int32 getOrIncl(m.numbers, cast[BiggestInt](n.floatVal)), + typeId = storeTypeLater(n.typ, c, m), info = info) + else: + let patchPos = ir.prepare(n.kind, n.flags, + storeTypeLater(n.typ, c, m), info) + for i in 0..<n.len: + toPackedNode(n[i], ir, c, m) + ir.patch patchPos + +proc storeTypeInst*(c: var PackedEncoder; m: var PackedModule; s: PSym; inst: PType) = + m.typeInstCache.add (storeSymLater(s, c, m), storeTypeLater(inst, c, m)) + +proc addPragmaComputation*(c: var PackedEncoder; m: var PackedModule; n: PNode) = + toPackedNode(n, m.toReplay, c, m) + +proc toPackedProcDef(n: PNode; ir: var PackedTree; c: var PackedEncoder; m: var PackedModule) = + let info = toPackedInfo(n.info, c, m) + let patchPos = ir.prepare(n.kind, n.flags, + storeTypeLater(n.typ, c, m), info) + for i in 0..<n.len: + if i != bodyPos: + toPackedNode(n[i], ir, c, m) + else: + # do not serialize the body of the proc, it's unnecessary since + # n[0].sym.ast has the sem'checked variant of it which is what + # everybody should use instead. + ir.addNode(kind = nkEmpty, flags = {}, operand = 0, + typeId = nilItemId, info = info) + ir.patch patchPos + +proc toPackedNodeIgnoreProcDefs(n: PNode, encoder: var PackedEncoder; m: var PackedModule) = + case n.kind + of routineDefs: + toPackedProcDef(n, m.topLevel, encoder, m) + when false: + # we serialize n[namePos].sym instead + if n[namePos].kind == nkSym: + let s = n[namePos].sym + discard storeSym(s, encoder, m) + if s.flags * {sfExportc, sfCompilerProc, sfCompileTime} == {sfExportc}: + m.exportCProcs.add(s.itemId.item) + else: + toPackedNode(n, m.topLevel, encoder, m) + of nkStmtList, nkStmtListExpr: + for it in n: + toPackedNodeIgnoreProcDefs(it, encoder, m) + of nkImportStmt, nkImportExceptStmt, nkExportStmt, nkExportExceptStmt, + nkFromStmt, nkIncludeStmt: + discard "nothing to do" + else: + toPackedNode(n, m.topLevel, encoder, m) + +proc toPackedNodeTopLevel*(n: PNode, encoder: var PackedEncoder; m: var PackedModule) = + toPackedNodeIgnoreProcDefs(n, encoder, m) + flush encoder, m + +proc toPackedGeneratedProcDef*(s: PSym, encoder: var PackedEncoder; m: var PackedModule) = + ## Generic procs and generated `=hook`'s need explicit top-level entries so + ## that the code generator can work without having to special case these. These + ## entries will also be useful for other tools and are the cleanest design + ## I can come up with. + assert s.kind in routineKinds + toPackedProcDef(s.ast, m.topLevel, encoder, m) + #flush encoder, m + +proc storeAttachedProcDef*(t: PType; op: TTypeAttachedOp; s: PSym, + encoder: var PackedEncoder; m: var PackedModule) = + assert s.kind in routineKinds + assert isActive(encoder) + let tid = storeTypeLater(t, encoder, m) + let sid = storeSymLater(s, encoder, m) + m.attachedOps.add (tid, op, sid) + toPackedGeneratedProcDef(s, encoder, m) + +proc storeInstantiation*(c: var PackedEncoder; m: var PackedModule; s: PSym; i: PInstantiation) = + var t = newSeq[PackedItemId](i.concreteTypes.len) + for j in 0..high(i.concreteTypes): + t[j] = storeTypeLater(i.concreteTypes[j], c, m) + m.procInstCache.add PackedInstantiation(key: storeSymLater(s, c, m), + sym: storeSymLater(i.sym, c, m), + concreteTypes: t) + toPackedGeneratedProcDef(i.sym, c, m) + +proc storeExpansion*(c: var PackedEncoder; m: var PackedModule; info: TLineInfo; s: PSym) = + toPackedNode(newSymNode(s, info), m.bodies, c, m) + +proc loadError(err: RodFileError; filename: AbsoluteFile; config: ConfigRef;) = + case err + of cannotOpen: + rawMessage(config, warnCannotOpenFile, filename.string) + of includeFileChanged: + rawMessage(config, warnFileChanged, filename.string) + else: + rawMessage(config, warnCannotOpenFile, filename.string & " reason: " & $err) + #echo "Error: ", $err, " loading file: ", filename.string + +proc toRodFile*(conf: ConfigRef; f: AbsoluteFile; ext = RodExt): AbsoluteFile = + result = changeFileExt(completeGeneratedFilePath(conf, + mangleModuleName(conf, f).AbsoluteFile), ext) + +const + BenchIC* = false + +when BenchIC: + var gloadBodies: MonoTime + + template bench(x, body) = + let start = getMonoTime() + body + x = x + (getMonoTime() - start) + +else: + template bench(x, body) = body + +proc loadRodFile*(filename: AbsoluteFile; m: var PackedModule; config: ConfigRef; + ignoreConfig = false): RodFileError = + var f = rodfiles.open(filename.string) + f.loadHeader() + f.loadSection configSection + + f.loadPrim m.definedSymbols + f.loadPrim m.moduleFlags + f.loadPrim m.cfg + + if f.err == ok and not configIdentical(m, config) and not ignoreConfig: + f.err = configMismatch + + template loadSeqSection(section, data) {.dirty.} = + f.loadSection section + f.loadSeq data + + template loadTableSection(section, data) {.dirty.} = + f.loadSection section + f.loadOrderedTable data + + template loadTabSection(section, data) {.dirty.} = + f.loadSection section + f.load data + + loadTabSection stringsSection, m.strings + + loadSeqSection checkSumsSection, m.includes + if config.cmd != cmdM and not includesIdentical(m, config): + f.err = includeFileChanged + + loadSeqSection depsSection, m.imports + + bench gloadBodies: + + loadTabSection numbersSection, m.numbers + + loadSeqSection exportsSection, m.exports + loadSeqSection hiddenSection, m.hidden + loadSeqSection reexportsSection, m.reexports + + loadSeqSection compilerProcsSection, m.compilerProcs + + loadSeqSection trmacrosSection, m.trmacros + + loadSeqSection convertersSection, m.converters + loadSeqSection methodsSection, m.methods + loadSeqSection pureEnumsSection, m.pureEnums + + loadTabSection toReplaySection, m.toReplay + loadTabSection topLevelSection, m.topLevel + + loadTabSection bodiesSection, m.bodies + loadTableSection symsSection, m.syms + loadTableSection typesSection, m.types + + loadSeqSection typeInstCacheSection, m.typeInstCache + loadSeqSection procInstCacheSection, m.procInstCache + loadSeqSection attachedOpsSection, m.attachedOps + loadSeqSection methodsPerGenericTypeSection, m.methodsPerGenericType + loadSeqSection enumToStringProcsSection, m.enumToStringProcs + loadSeqSection methodsPerTypeSection, m.methodsPerType + loadSeqSection dispatchersSection, m.dispatchers + loadSeqSection typeInfoSection, m.emittedTypeInfo + + f.loadSection backendFlagsSection + f.loadPrim m.backendFlags + + f.loadSection sideChannelSection + f.load m.man + + close(f) + result = f.err + +# ------------------------------------------------------------------------- + +proc storeError(err: RodFileError; filename: AbsoluteFile) = + echo "Error: ", $err, "; couldn't write to ", filename.string + removeFile(filename.string) + +proc saveRodFile*(filename: AbsoluteFile; encoder: var PackedEncoder; m: var PackedModule) = + flush encoder, m + #rememberConfig(encoder, encoder.config) + + var f = rodfiles.create(filename.string) + f.storeHeader() + f.storeSection configSection + f.storePrim m.definedSymbols + f.storePrim m.moduleFlags + f.storePrim m.cfg + + template storeSeqSection(section, data) {.dirty.} = + f.storeSection section + f.storeSeq data + + template storeTabSection(section, data) {.dirty.} = + f.storeSection section + f.store data + + template storeTableSection(section, data) {.dirty.} = + f.storeSection section + f.storeOrderedTable data + + storeTabSection stringsSection, m.strings + + storeSeqSection checkSumsSection, m.includes + + storeSeqSection depsSection, m.imports + + storeTabSection numbersSection, m.numbers + + storeSeqSection exportsSection, m.exports + storeSeqSection hiddenSection, m.hidden + storeSeqSection reexportsSection, m.reexports + + storeSeqSection compilerProcsSection, m.compilerProcs + + storeSeqSection trmacrosSection, m.trmacros + storeSeqSection convertersSection, m.converters + storeSeqSection methodsSection, m.methods + storeSeqSection pureEnumsSection, m.pureEnums + + storeTabSection toReplaySection, m.toReplay + storeTabSection topLevelSection, m.topLevel + + storeTabSection bodiesSection, m.bodies + storeTableSection symsSection, m.syms + + storeTableSection typesSection, m.types + + storeSeqSection typeInstCacheSection, m.typeInstCache + storeSeqSection procInstCacheSection, m.procInstCache + storeSeqSection attachedOpsSection, m.attachedOps + storeSeqSection methodsPerGenericTypeSection, m.methodsPerGenericType + storeSeqSection enumToStringProcsSection, m.enumToStringProcs + storeSeqSection methodsPerTypeSection, m.methodsPerType + storeSeqSection dispatchersSection, m.dispatchers + storeSeqSection typeInfoSection, m.emittedTypeInfo + + f.storeSection backendFlagsSection + f.storePrim m.backendFlags + + f.storeSection sideChannelSection + f.store m.man + + close(f) + encoder.disable() + if f.err != ok: + storeError(f.err, filename) + + when false: + # basic loader testing: + var m2: PackedModule + discard loadRodFile(filename, m2, encoder.config) + echo "loaded ", filename.string + +# ---------------------------------------------------------------------------- + +type + PackedDecoder* = object + lastModule: int + lastLit: LitId + lastFile: FileIndex # remember the last lookup entry. + config*: ConfigRef + cache*: IdentCache + +type + ModuleStatus* = enum + undefined, + storing, # state is strictly for stress-testing purposes + loading, + loaded, + outdated, + stored # store is complete, no further additions possible + + LoadedModule* = object + status*: ModuleStatus + symsInit, typesInit, loadedButAliveSetChanged*: bool + fromDisk*: PackedModule + syms: OrderedTable[int32, PSym] # indexed by itemId + types: OrderedTable[int32, PType] + module*: PSym # the one true module symbol. + iface, ifaceHidden: Table[PIdent, seq[PackedItemId]] + # PackedItemId so that it works with reexported symbols too + # ifaceHidden includes private symbols + +type + PackedModuleGraph* = object + pm*: seq[LoadedModule] # indexed by FileIndex + when BenchIC: + depAnalysis: MonoTime + loadBody: MonoTime + loadSym, loadType, loadBodies: MonoTime + +when BenchIC: + proc echoTimes*(m: PackedModuleGraph) = + echo "analysis: ", m.depAnalysis, " loadBody: ", m.loadBody, " loadSym: ", + m.loadSym, " loadType: ", m.loadType, " all bodies: ", gloadBodies + +template `[]`*(m: PackedModuleGraph; i: int): LoadedModule = m.pm[i] +template len*(m: PackedModuleGraph): int = m.pm.len + +proc loadType(c: var PackedDecoder; g: var PackedModuleGraph; thisModule: int; t: PackedItemId): PType +proc loadSym(c: var PackedDecoder; g: var PackedModuleGraph; thisModule: int; s: PackedItemId): PSym + +proc toFileIndexCached*(c: var PackedDecoder; g: PackedModuleGraph; thisModule: int; f: LitId): FileIndex = + if f == LitId(0): + result = InvalidFileIdx + elif c.lastLit == f and c.lastModule == thisModule: + result = c.lastFile + else: + result = toFileIndex(f, g[thisModule].fromDisk, c.config) + c.lastModule = thisModule + c.lastLit = f + c.lastFile = result + +proc translateLineInfo(c: var PackedDecoder; g: var PackedModuleGraph; thisModule: int; + x: PackedLineInfo): TLineInfo = + assert g[thisModule].status in {loaded, storing, stored} + let (fileId, line, col) = unpack(g[thisModule].fromDisk.man, x) + result = TLineInfo(line: line.uint16, col: col.int16, + fileIndex: toFileIndexCached(c, g, thisModule, fileId)) + +proc loadNodes*(c: var PackedDecoder; g: var PackedModuleGraph; thisModule: int; + tree: PackedTree; n: NodePos): PNode = + let k = n.kind + if k == nkNilRodNode: + return nil + when false: + echo "loading node ", c.config $ translateLineInfo(c, g, thisModule, n.info) + result = newNodeIT(k, translateLineInfo(c, g, thisModule, n.info), + loadType(c, g, thisModule, n.typ)) + result.flags = n.flags + + case k + of nkNone, nkEmpty, nkNilLit, nkType: + discard + of nkIdent: + result.ident = getIdent(c.cache, g[thisModule].fromDisk.strings[n.litId]) + of nkSym: + result.sym = loadSym(c, g, thisModule, PackedItemId(module: LitId(0), item: tree[n].soperand)) + if result.typ == nil: + result.typ = result.sym.typ + of externIntLit: + result.intVal = g[thisModule].fromDisk.numbers[n.litId] + of nkStrLit..nkTripleStrLit: + result.strVal = g[thisModule].fromDisk.strings[n.litId] + of nkFloatLit..nkFloat128Lit: + result.floatVal = cast[BiggestFloat](g[thisModule].fromDisk.numbers[n.litId]) + of nkModuleRef: + let (n1, n2) = sons2(tree, n) + assert n1.kind == nkNone + assert n2.kind == nkNone + transitionNoneToSym(result) + result.sym = loadSym(c, g, thisModule, PackedItemId(module: n1.litId, item: tree[n2].soperand)) + if result.typ == nil: + result.typ = result.sym.typ + else: + for n0 in sonsReadonly(tree, n): + result.addAllowNil loadNodes(c, g, thisModule, tree, n0) + +proc initPackedDecoder*(config: ConfigRef; cache: IdentCache): PackedDecoder = + result = PackedDecoder( + lastModule: int32(-1), + lastLit: LitId(0), + lastFile: FileIndex(-1), + config: config, + cache: cache) + +proc loadProcHeader(c: var PackedDecoder; g: var PackedModuleGraph; thisModule: int; + tree: PackedTree; n: NodePos): PNode = + # do not load the body of the proc. This will be done later in + # getProcBody, if required. + let k = n.kind + result = newNodeIT(k, translateLineInfo(c, g, thisModule, n.info), + loadType(c, g, thisModule, n.typ)) + result.flags = n.flags + assert k in {nkProcDef, nkMethodDef, nkIteratorDef, nkFuncDef, nkConverterDef, nkLambda} + var i = 0 + for n0 in sonsReadonly(tree, n): + if i != bodyPos: + result.add loadNodes(c, g, thisModule, tree, n0) + else: + result.addAllowNil nil + inc i + +proc loadProcBody(c: var PackedDecoder; g: var PackedModuleGraph; thisModule: int; + tree: PackedTree; n: NodePos): PNode = + result = nil + var i = 0 + for n0 in sonsReadonly(tree, n): + if i == bodyPos: + result = loadNodes(c, g, thisModule, tree, n0) + inc i + +proc moduleIndex*(c: var PackedDecoder; g: var PackedModuleGraph; thisModule: int; + s: PackedItemId): int32 {.inline.} = + result = if s.module == LitId(0): thisModule.int32 + else: toFileIndexCached(c, g, thisModule, s.module).int32 + +proc symHeaderFromPacked(c: var PackedDecoder; g: var PackedModuleGraph; + s: PackedSym; si, item: int32): PSym = + result = PSym(itemId: ItemId(module: si, item: item), + kind: s.kind, magic: s.magic, flags: s.flags, + info: translateLineInfo(c, g, si, s.info), + options: s.options, + position: if s.kind in {skForVar, skVar, skLet, skTemp}: 0 else: s.position, + offset: if s.kind in routineKinds: defaultOffset else: s.offset, + disamb: s.disamb, + name: getIdent(c.cache, g[si].fromDisk.strings[s.name]) + ) + +template loadAstBody(p, field) = + if p.field != emptyNodeId: + result.field = loadNodes(c, g, si, g[si].fromDisk.bodies, NodePos p.field) + +template loadAstBodyLazy(p, field) = + if p.field != emptyNodeId: + result.field = loadProcHeader(c, g, si, g[si].fromDisk.bodies, NodePos p.field) + +proc loadLib(c: var PackedDecoder; g: var PackedModuleGraph; + si, item: int32; l: PackedLib): PLib = + # XXX: hack; assume a zero LitId means the PackedLib is all zero (empty) + if l.name.int == 0: + result = nil + else: + result = PLib(generated: l.generated, isOverridden: l.isOverridden, + kind: l.kind, name: rope g[si].fromDisk.strings[l.name]) + loadAstBody(l, path) + +proc symBodyFromPacked(c: var PackedDecoder; g: var PackedModuleGraph; + s: PackedSym; si, item: int32; result: PSym) = + result.typ = loadType(c, g, si, s.typ) + loadAstBody(s, constraint) + if result.kind in {skProc, skFunc, skIterator, skConverter, skMethod}: + loadAstBodyLazy(s, ast) + else: + loadAstBody(s, ast) + result.annex = loadLib(c, g, si, item, s.annex) + when hasFFI: + result.cname = g[si].fromDisk.strings[s.cname] + + if s.kind in {skLet, skVar, skField, skForVar}: + result.guard = loadSym(c, g, si, s.guard) + result.bitsize = s.bitsize + result.alignment = s.alignment + result.owner = loadSym(c, g, si, s.owner) + let externalName = g[si].fromDisk.strings[s.externalName] + if externalName != "": + result.loc.snippet = externalName + result.loc.flags = s.locFlags + result.instantiatedFrom = loadSym(c, g, si, s.instantiatedFrom) + +proc needsRecompile(g: var PackedModuleGraph; conf: ConfigRef; cache: IdentCache; + fileIdx: FileIndex; cachedModules: var seq[FileIndex]): bool +proc loadToReplayNodes(g: var PackedModuleGraph; conf: ConfigRef; cache: IdentCache; + fileIdx: FileIndex; m: var LoadedModule) + +proc loadSym(c: var PackedDecoder; g: var PackedModuleGraph; thisModule: int; s: PackedItemId): PSym = + if s == nilItemId: + result = nil + else: + let si = moduleIndex(c, g, thisModule, s) + if si >= g.len: + g.pm.setLen(si+1) + + if g[si].status == undefined and c.config.cmd == cmdM: + var cachedModules: seq[FileIndex] = @[] + discard needsRecompile(g, c.config, c.cache, FileIndex(si), cachedModules) + for m in cachedModules: + loadToReplayNodes(g, c.config, c.cache, m, g[int m]) + + assert g[si].status in {loaded, storing, stored} + #if not g[si].symsInit: + # g[si].symsInit = true + # setLen g[si].syms, g[si].fromDisk.syms.len + + if g[si].syms.getOrDefault(s.item) == nil: + if g[si].fromDisk.syms[s.item].kind != skModule: + result = symHeaderFromPacked(c, g, g[si].fromDisk.syms[s.item], si, s.item) + # store it here early on, so that recursions work properly: + g[si].syms[s.item] = result + symBodyFromPacked(c, g, g[si].fromDisk.syms[s.item], si, s.item, result) + else: + result = g[si].module + assert result != nil + g[si].syms[s.item] = result + + else: + result = g[si].syms[s.item] + +proc typeHeaderFromPacked(c: var PackedDecoder; g: var PackedModuleGraph; + t: PackedType; si, item: int32): PType = + result = PType(itemId: ItemId(module: si, item: t.nonUniqueId), kind: t.kind, + flags: t.flags, size: t.size, align: t.align, + paddingAtEnd: t.paddingAtEnd, + uniqueId: ItemId(module: si, item: item), + callConv: t.callConv) + +proc typeBodyFromPacked(c: var PackedDecoder; g: var PackedModuleGraph; + t: PackedType; si, item: int32; result: PType) = + result.sym = loadSym(c, g, si, t.sym) + result.owner = loadSym(c, g, si, t.owner) + when false: + for op, item in pairs t.attachedOps: + result.attachedOps[op] = loadSym(c, g, si, item) + result.typeInst = loadType(c, g, si, t.typeInst) + var sons = newSeq[PType]() + for son in items t.types: + sons.add loadType(c, g, si, son) + result.setSons(sons) + loadAstBody(t, n) + when false: + for gen, id in items t.methods: + result.methods.add((gen, loadSym(c, g, si, id))) + +proc loadType(c: var PackedDecoder; g: var PackedModuleGraph; thisModule: int; t: PackedItemId): PType = + if t == nilItemId: + result = nil + else: + let si = moduleIndex(c, g, thisModule, t) + assert g[si].status in {loaded, storing, stored} + assert t.item > 0 + + #if not g[si].typesInit: + # g[si].typesInit = true + # setLen g[si].types, g[si].fromDisk.types.len + + if g[si].types.getOrDefault(t.item) == nil: + result = typeHeaderFromPacked(c, g, g[si].fromDisk.types[t.item], si, t.item) + # store it here early on, so that recursions work properly: + g[si].types[t.item] = result + typeBodyFromPacked(c, g, g[si].fromDisk.types[t.item], si, t.item, result) + #assert result.itemId.item == t.item, $(result.itemId.item, t.item) + assert result.itemId.item > 0, $(result.itemId.item, t.item) + else: + result = g[si].types[t.item] + assert result.itemId.item > 0, "2" + +proc setupLookupTables(g: var PackedModuleGraph; conf: ConfigRef; cache: IdentCache; + fileIdx: FileIndex; m: var LoadedModule) = + m.iface = initTable[PIdent, seq[PackedItemId]]() + m.ifaceHidden = initTable[PIdent, seq[PackedItemId]]() + template impl(iface, e) = + let nameLit = e[0] + let e2 = + when e[1] is PackedItemId: e[1] + else: PackedItemId(module: LitId(0), item: e[1]) + iface.mgetOrPut(cache.getIdent(m.fromDisk.strings[nameLit]), @[]).add(e2) + + for e in m.fromDisk.exports: + m.iface.impl(e) + m.ifaceHidden.impl(e) + for e in m.fromDisk.reexports: + m.iface.impl(e) + m.ifaceHidden.impl(e) + for e in m.fromDisk.hidden: + m.ifaceHidden.impl(e) + + let filename = AbsoluteFile toFullPath(conf, fileIdx) + # We cannot call ``newSym`` here, because we have to circumvent the ID + # mechanism, which we do in order to assign each module a persistent ID. + m.module = PSym(kind: skModule, itemId: ItemId(module: int32(fileIdx), item: 0'i32), + name: getIdent(cache, splitFile(filename).name), + info: newLineInfo(fileIdx, 1, 1), + position: int(fileIdx)) + m.module.owner = getPackage(conf, cache, fileIdx) + m.module.flags = m.fromDisk.moduleFlags + +proc loadToReplayNodes(g: var PackedModuleGraph; conf: ConfigRef; cache: IdentCache; + fileIdx: FileIndex; m: var LoadedModule) = + m.module.ast = newNode(nkStmtList) + if m.fromDisk.toReplay.len > 0: + var decoder = PackedDecoder( + lastModule: int32(-1), + lastLit: LitId(0), + lastFile: FileIndex(-1), + config: conf, + cache: cache) + for p in allNodes(m.fromDisk.toReplay): + m.module.ast.add loadNodes(decoder, g, int(fileIdx), m.fromDisk.toReplay, p) + +proc needsRecompile(g: var PackedModuleGraph; conf: ConfigRef; cache: IdentCache; + fileIdx: FileIndex; cachedModules: var seq[FileIndex]): bool = + # Does the file belong to the fileIdx need to be recompiled? + let m = int(fileIdx) + if m >= g.len: + g.pm.setLen(m+1) + + case g[m].status + of undefined: + g[m].status = loading + let fullpath = msgs.toFullPath(conf, fileIdx) + let rod = toRodFile(conf, AbsoluteFile fullpath) + let err = loadRodFile(rod, g[m].fromDisk, conf, ignoreConfig = conf.cmd == cmdM) + if err == ok: + if conf.cmd == cmdM: + setupLookupTables(g, conf, cache, fileIdx, g[m]) + cachedModules.add fileIdx + g[m].status = loaded + result = false + else: + result = optForceFullMake in conf.globalOptions + # check its dependencies: + let imp = g[m].fromDisk.imports + for dep in imp: + let fid = toFileIndex(dep, g[m].fromDisk, conf) + # Warning: we need to traverse the full graph, so + # do **not use break here**! + if needsRecompile(g, conf, cache, fid, cachedModules): + result = true + + if not result: + setupLookupTables(g, conf, cache, fileIdx, g[m]) + cachedModules.add fileIdx + g[m].status = loaded + else: + g.pm[m] = LoadedModule(status: outdated, module: g[m].module) + else: + loadError(err, rod, conf) + g[m].status = outdated + result = true + when false: loadError(err, rod, conf) + of loading, loaded: + # For loading: Assume no recompile is required. + result = false + of outdated, storing, stored: + result = true + +proc moduleFromRodFile*(g: var PackedModuleGraph; conf: ConfigRef; cache: IdentCache; + fileIdx: FileIndex; cachedModules: var seq[FileIndex]): PSym = + ## Returns 'nil' if the module needs to be recompiled. + bench g.depAnalysis: + if needsRecompile(g, conf, cache, fileIdx, cachedModules): + result = nil + else: + result = g[int fileIdx].module + assert result != nil + assert result.position == int(fileIdx) + for m in cachedModules: + loadToReplayNodes(g, conf, cache, m, g[int m]) + +template setupDecoder() {.dirty.} = + var decoder = PackedDecoder( + lastModule: int32(-1), + lastLit: LitId(0), + lastFile: FileIndex(-1), + config: config, + cache: cache) + +proc loadProcBody*(config: ConfigRef, cache: IdentCache; + g: var PackedModuleGraph; s: PSym): PNode = + bench g.loadBody: + let mId = s.itemId.module + var decoder = PackedDecoder( + lastModule: int32(-1), + lastLit: LitId(0), + lastFile: FileIndex(-1), + config: config, + cache: cache) + let pos = g[mId].fromDisk.syms[s.itemId.item].ast + assert pos != emptyNodeId + result = loadProcBody(decoder, g, mId, g[mId].fromDisk.bodies, NodePos pos) + +proc loadTypeFromId*(config: ConfigRef, cache: IdentCache; + g: var PackedModuleGraph; module: int; id: PackedItemId): PType = + bench g.loadType: + result = g[module].types.getOrDefault(id.item) + if result == nil: + var decoder = PackedDecoder( + lastModule: int32(-1), + lastLit: LitId(0), + lastFile: FileIndex(-1), + config: config, + cache: cache) + result = loadType(decoder, g, module, id) + +proc loadSymFromId*(config: ConfigRef, cache: IdentCache; + g: var PackedModuleGraph; module: int; id: PackedItemId): PSym = + bench g.loadSym: + result = g[module].syms.getOrDefault(id.item) + if result == nil: + var decoder = PackedDecoder( + lastModule: int32(-1), + lastLit: LitId(0), + lastFile: FileIndex(-1), + config: config, + cache: cache) + result = loadSym(decoder, g, module, id) + +proc translateId*(id: PackedItemId; g: PackedModuleGraph; thisModule: int; config: ConfigRef): ItemId = + if id.module == LitId(0): + ItemId(module: thisModule.int32, item: id.item) + else: + ItemId(module: toFileIndex(id.module, g[thisModule].fromDisk, config).int32, item: id.item) + +proc simulateLoadedModule*(g: var PackedModuleGraph; conf: ConfigRef; cache: IdentCache; + moduleSym: PSym; m: PackedModule) = + # For now only used for heavy debugging. In the future we could use this to reduce the + # compiler's memory consumption. + let idx = moduleSym.position + assert g[idx].status in {storing} + g[idx].status = loaded + assert g[idx].module == moduleSym + setupLookupTables(g, conf, cache, FileIndex(idx), g[idx]) + loadToReplayNodes(g, conf, cache, FileIndex(idx), g[idx]) + +# ---------------- symbol table handling ---------------- + +type + RodIter* = object + decoder: PackedDecoder + values: seq[PackedItemId] + i, module: int + +template interfSelect(a: LoadedModule, importHidden: bool): auto = + var ret = a.iface.addr + if importHidden: ret = a.ifaceHidden.addr + ret[] + +proc initRodIter*(it: var RodIter; config: ConfigRef, cache: IdentCache; + g: var PackedModuleGraph; module: FileIndex; + name: PIdent, importHidden: bool): PSym = + it.decoder = PackedDecoder( + lastModule: int32(-1), + lastLit: LitId(0), + lastFile: FileIndex(-1), + config: config, + cache: cache) + it.values = g[int module].interfSelect(importHidden).getOrDefault(name) + it.i = 0 + it.module = int(module) + if it.i < it.values.len: + result = loadSym(it.decoder, g, int(module), it.values[it.i]) + inc it.i + else: + result = nil + +proc initRodIterAllSyms*(it: var RodIter; config: ConfigRef, cache: IdentCache; + g: var PackedModuleGraph; module: FileIndex; importHidden: bool): PSym = + it.decoder = PackedDecoder( + lastModule: int32(-1), + lastLit: LitId(0), + lastFile: FileIndex(-1), + config: config, + cache: cache) + it.values = @[] + it.module = int(module) + for v in g[int module].interfSelect(importHidden).values: + it.values.add v + it.i = 0 + if it.i < it.values.len: + result = loadSym(it.decoder, g, int(module), it.values[it.i]) + inc it.i + else: + result = nil + +proc nextRodIter*(it: var RodIter; g: var PackedModuleGraph): PSym = + if it.i < it.values.len: + result = loadSym(it.decoder, g, it.module, it.values[it.i]) + inc it.i + else: + result = nil + +iterator interfaceSymbols*(config: ConfigRef, cache: IdentCache; + g: var PackedModuleGraph; module: FileIndex; + name: PIdent, importHidden: bool): PSym = + setupDecoder() + let values = g[int module].interfSelect(importHidden).getOrDefault(name) + for pid in values: + let s = loadSym(decoder, g, int(module), pid) + assert s != nil + yield s + +proc interfaceSymbol*(config: ConfigRef, cache: IdentCache; + g: var PackedModuleGraph; module: FileIndex; + name: PIdent, importHidden: bool): PSym = + setupDecoder() + let values = g[int module].interfSelect(importHidden).getOrDefault(name) + result = loadSym(decoder, g, int(module), values[0]) + +proc idgenFromLoadedModule*(m: LoadedModule): IdGenerator = + IdGenerator(module: m.module.itemId.module, symId: int32 m.fromDisk.syms.len, + typeId: int32 m.fromDisk.types.len) + +proc searchForCompilerproc*(m: LoadedModule; name: string): int32 = + # slow, linear search, but the results are cached: + for it in items(m.fromDisk.compilerProcs): + if m.fromDisk.strings[it[0]] == name: + return it[1] + return -1 + +# ------------------------- .rod file viewer --------------------------------- + +proc rodViewer*(rodfile: AbsoluteFile; config: ConfigRef, cache: IdentCache) = + var m: PackedModule = PackedModule() + let err = loadRodFile(rodfile, m, config, ignoreConfig=true) + if err != ok: + config.quitOrRaise "Error: could not load: " & $rodfile.string & " reason: " & $err + + when false: + echo "exports:" + for ex in m.exports: + echo " ", m.strings[ex[0]], " local ID: ", ex[1] + assert ex[0] == m.syms[ex[1]].name + # ex[1] int32 + + echo "reexports:" + for ex in m.reexports: + echo " ", m.strings[ex[0]] + # reexports*: seq[(LitId, PackedItemId)] + + echo "hidden: " & $m.hidden.len + for ex in m.hidden: + echo " ", m.strings[ex[0]], " local ID: ", ex[1] + + when false: + echo "all symbols" + for i in 0..high(m.syms): + if m.syms[i].name != LitId(0): + echo " ", m.strings[m.syms[i].name], " local ID: ", i, " kind ", m.syms[i].kind + else: + echo " <anon symbol?> local ID: ", i, " kind ", m.syms[i].kind + + echo "symbols: ", m.syms.len, " types: ", m.types.len, + " top level nodes: ", m.topLevel.len, " other nodes: ", m.bodies.len, + " strings: ", m.strings.len, " numbers: ", m.numbers.len + + echo "SIZES:" + echo "symbols: ", m.syms.len * sizeof(PackedSym), " types: ", m.types.len * sizeof(PackedType), + " top level nodes: ", m.topLevel.len * sizeof(PackedNode), + " other nodes: ", m.bodies.len * sizeof(PackedNode), + " strings: ", sizeOnDisc(m.strings) + when false: + var tt = 0 + var fc = 0 + for x in m.topLevel: + if x.kind == nkSym or x.typeId == nilItemId: inc tt + if x.flags == {}: inc fc + for x in m.bodies: + if x.kind == nkSym or x.typeId == nilItemId: inc tt + if x.flags == {}: inc fc + let total = float(m.topLevel.len + m.bodies.len) + echo "nodes with nil type: ", tt, " in % ", tt.float / total + echo "nodes with empty flags: ", fc.float / total diff --git a/compiler/ic/iclineinfos.nim b/compiler/ic/iclineinfos.nim new file mode 100644 index 000000000..74a7d971b --- /dev/null +++ b/compiler/ic/iclineinfos.nim @@ -0,0 +1,84 @@ +# +# +# The Nim Compiler +# (c) Copyright 2024 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# For the line information we use 32 bits. They are used as follows: +# Bit 0 (AsideBit): If we have inline line information or not. If not, the +# remaining 31 bits are used as an index into a seq[(LitId, int, int)]. +# +# We use 10 bits for the "file ID", this means a program can consist of as much +# as 1024 different files. (If it uses more files than that, the overflow bit +# would be set.) +# This means we have 21 bits left to encode the (line, col) pair. We use 7 bits for the column +# so 128 is the limit and 14 bits for the line number. +# The packed representation supports files with up to 16384 lines. +# Keep in mind that whenever any limit is reached the AsideBit is set and the real line +# information is kept in a side channel. + +import std / assertions + +const + AsideBit = 1 + FileBits = 10 + LineBits = 14 + ColBits = 7 + FileMax = (1 shl FileBits) - 1 + LineMax = (1 shl LineBits) - 1 + ColMax = (1 shl ColBits) - 1 + +static: + assert AsideBit + FileBits + LineBits + ColBits == 32 + +import .. / ic / [bitabs, rodfiles] # for LitId + +type + PackedLineInfo* = distinct uint32 + + LineInfoManager* = object + aside: seq[(LitId, int32, int32)] + +const + NoLineInfo* = PackedLineInfo(0'u32) + +proc pack*(m: var LineInfoManager; file: LitId; line, col: int32): PackedLineInfo = + if file.uint32 <= FileMax.uint32 and line <= LineMax and col <= ColMax: + let col = if col < 0'i32: 0'u32 else: col.uint32 + let line = if line < 0'i32: 0'u32 else: line.uint32 + # use inline representation: + result = PackedLineInfo((file.uint32 shl 1'u32) or (line shl uint32(AsideBit + FileBits)) or + (col shl uint32(AsideBit + FileBits + LineBits))) + else: + result = PackedLineInfo((m.aside.len shl 1) or AsideBit) + m.aside.add (file, line, col) + +proc unpack*(m: LineInfoManager; i: PackedLineInfo): (LitId, int32, int32) = + let i = i.uint32 + if (i and 1'u32) == 0'u32: + # inline representation: + result = (LitId((i shr 1'u32) and FileMax.uint32), + int32((i shr uint32(AsideBit + FileBits)) and LineMax.uint32), + int32((i shr uint32(AsideBit + FileBits + LineBits)) and ColMax.uint32)) + else: + result = m.aside[int(i shr 1'u32)] + +proc getFileId*(m: LineInfoManager; i: PackedLineInfo): LitId = + result = unpack(m, i)[0] + +proc store*(r: var RodFile; m: LineInfoManager) = storeSeq(r, m.aside) +proc load*(r: var RodFile; m: var LineInfoManager) = loadSeq(r, m.aside) + +when isMainModule: + var m = LineInfoManager(aside: @[]) + for i in 0'i32..<16388'i32: + for col in 0'i32..<100'i32: + let packed = pack(m, LitId(1023), i, col) + let u = unpack(m, packed) + assert u[0] == LitId(1023) + assert u[1] == i + assert u[2] == col + echo m.aside.len diff --git a/compiler/ic/integrity.nim b/compiler/ic/integrity.nim new file mode 100644 index 000000000..3e8ea2503 --- /dev/null +++ b/compiler/ic/integrity.nim @@ -0,0 +1,155 @@ +# +# +# The Nim Compiler +# (c) Copyright 2021 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Integrity checking for a set of .rod files. +## The set must cover a complete Nim project. + +import std/[sets, tables] + +when defined(nimPreviewSlimSystem): + import std/assertions + +import ".." / [ast, modulegraphs] +import packed_ast, bitabs, ic + +type + CheckedContext = object + g: ModuleGraph + thisModule: int32 + checkedSyms: HashSet[ItemId] + checkedTypes: HashSet[ItemId] + +proc checkType(c: var CheckedContext; typeId: PackedItemId) +proc checkForeignSym(c: var CheckedContext; symId: PackedItemId) +proc checkNode(c: var CheckedContext; tree: PackedTree; n: NodePos) + +proc checkTypeObj(c: var CheckedContext; typ: PackedType) = + for child in typ.types: + checkType(c, child) + if typ.n != emptyNodeId: + checkNode(c, c.g.packed[c.thisModule].fromDisk.bodies, NodePos typ.n) + if typ.sym != nilItemId: + checkForeignSym(c, typ.sym) + if typ.owner != nilItemId: + checkForeignSym(c, typ.owner) + checkType(c, typ.typeInst) + +proc checkType(c: var CheckedContext; typeId: PackedItemId) = + if typeId == nilItemId: return + let itemId = translateId(typeId, c.g.packed, c.thisModule, c.g.config) + if not c.checkedTypes.containsOrIncl(itemId): + let oldThisModule = c.thisModule + c.thisModule = itemId.module + checkTypeObj c, c.g.packed[itemId.module].fromDisk.types[itemId.item] + c.thisModule = oldThisModule + +proc checkSym(c: var CheckedContext; s: PackedSym) = + if s.name != LitId(0): + assert c.g.packed[c.thisModule].fromDisk.strings.hasLitId s.name + checkType c, s.typ + if s.ast != emptyNodeId: + checkNode(c, c.g.packed[c.thisModule].fromDisk.bodies, NodePos s.ast) + if s.owner != nilItemId: + checkForeignSym(c, s.owner) + +proc checkLocalSym(c: var CheckedContext; item: int32) = + let itemId = ItemId(module: c.thisModule, item: item) + if not c.checkedSyms.containsOrIncl(itemId): + checkSym c, c.g.packed[c.thisModule].fromDisk.syms[item] + +proc checkForeignSym(c: var CheckedContext; symId: PackedItemId) = + let itemId = translateId(symId, c.g.packed, c.thisModule, c.g.config) + if not c.checkedSyms.containsOrIncl(itemId): + let oldThisModule = c.thisModule + c.thisModule = itemId.module + checkSym c, c.g.packed[itemId.module].fromDisk.syms[itemId.item] + c.thisModule = oldThisModule + +proc checkNode(c: var CheckedContext; tree: PackedTree; n: NodePos) = + let t = findType(tree, n) + if t != nilItemId: + checkType(c, t) + case n.kind + of nkEmpty, nkNilLit, nkType, nkNilRodNode: + discard + of nkIdent: + assert c.g.packed[c.thisModule].fromDisk.strings.hasLitId n.litId + of nkSym: + checkLocalSym(c, tree[n].soperand) + of directIntLit: + discard + of externIntLit, nkFloatLit..nkFloat128Lit: + assert c.g.packed[c.thisModule].fromDisk.numbers.hasLitId n.litId + of nkStrLit..nkTripleStrLit: + assert c.g.packed[c.thisModule].fromDisk.strings.hasLitId n.litId + of nkModuleRef: + let (n1, n2) = sons2(tree, n) + assert n1.kind == nkNone + assert n2.kind == nkNone + checkForeignSym(c, PackedItemId(module: n1.litId, item: tree[n2].soperand)) + else: + for n0 in sonsReadonly(tree, n): + checkNode(c, tree, n0) + +proc checkTree(c: var CheckedContext; t: PackedTree) = + for p in allNodes(t): checkNode(c, t, p) + +proc checkLocalSymIds(c: var CheckedContext; m: PackedModule; symIds: seq[int32]) = + for symId in symIds: + assert symId >= 0 and symId < m.syms.len, $symId & " " & $m.syms.len + +proc checkModule(c: var CheckedContext; m: PackedModule) = + # We check that: + # - Every symbol references existing types and symbols. + # - Every tree node references existing types and symbols. + for _, v in pairs(m.syms): + checkLocalSym c, v.id + + checkTree c, m.toReplay + checkTree c, m.topLevel + + for e in m.exports: + #assert e[1] >= 0 and e[1] < m.syms.len + assert e[0] == m.syms[e[1]].name + + for e in m.compilerProcs: + #assert e[1] >= 0 and e[1] < m.syms.len + assert e[0] == m.syms[e[1]].name + + checkLocalSymIds c, m, m.converters + checkLocalSymIds c, m, m.methods + checkLocalSymIds c, m, m.trmacros + checkLocalSymIds c, m, m.pureEnums + #[ + To do: Check all these fields: + + reexports*: seq[(LitId, PackedItemId)] + macroUsages*: seq[(PackedItemId, PackedLineInfo)] + + typeInstCache*: seq[(PackedItemId, PackedItemId)] + procInstCache*: seq[PackedInstantiation] + attachedOps*: seq[(TTypeAttachedOp, PackedItemId, PackedItemId)] + methodsPerGenericType*: seq[(PackedItemId, int, PackedItemId)] + enumToStringProcs*: seq[(PackedItemId, PackedItemId)] + methodsPerType*: seq[(PackedItemId, PackedItemId)] + dispatchers*: seq[PackedItemId] + ]# + +proc checkIntegrity*(g: ModuleGraph) = + var c = CheckedContext(g: g) + for i in 0..<len(g.packed): + # case statement here to enforce exhaustive checks. + case g.packed[i].status + of undefined: + discard "nothing to do" + of loading: + assert false, "cannot check integrity: Module still loading" + of stored, storing, outdated, loaded: + c.thisModule = int32 i + checkModule(c, g.packed[i].fromDisk) diff --git a/compiler/ic/navigator.nim b/compiler/ic/navigator.nim new file mode 100644 index 000000000..39037b94f --- /dev/null +++ b/compiler/ic/navigator.nim @@ -0,0 +1,183 @@ +# +# +# The Nim Compiler +# (c) Copyright 2021 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Supports the "nim check --ic:on --defusages:FILE,LINE,COL" +## IDE-like features. It uses the set of .rod files to accomplish +## its task. The set must cover a complete Nim project. + +import std/[sets, tables] + +from std/os import nil +from std/private/miscdollars import toLocation + +when defined(nimPreviewSlimSystem): + import std/assertions + +import ".." / [ast, modulegraphs, msgs, options] +import iclineinfos +import packed_ast, bitabs, ic + +type + UnpackedLineInfo = object + file: LitId + line, col: int + NavContext = object + g: ModuleGraph + thisModule: int32 + trackPos: UnpackedLineInfo + alreadyEmitted: HashSet[string] + outputSep: char # for easier testing, use short filenames and spaces instead of tabs. + +proc isTracked(man: LineInfoManager; current: PackedLineInfo, trackPos: UnpackedLineInfo, tokenLen: int): bool = + let (currentFile, currentLine, currentCol) = man.unpack(current) + if currentFile == trackPos.file and currentLine == trackPos.line: + let col = trackPos.col + if col >= currentCol and col < currentCol+tokenLen: + result = true + else: + result = false + else: + result = false + +proc searchLocalSym(c: var NavContext; s: PackedSym; info: PackedLineInfo): bool = + result = s.name != LitId(0) and + isTracked(c.g.packed[c.thisModule].fromDisk.man, info, c.trackPos, c.g.packed[c.thisModule].fromDisk.strings[s.name].len) + +proc searchForeignSym(c: var NavContext; s: ItemId; info: PackedLineInfo): bool = + let name = c.g.packed[s.module].fromDisk.syms[s.item].name + result = name != LitId(0) and + isTracked(c.g.packed[c.thisModule].fromDisk.man, info, c.trackPos, c.g.packed[s.module].fromDisk.strings[name].len) + +const + EmptyItemId = ItemId(module: -1'i32, item: -1'i32) + +proc search(c: var NavContext; tree: PackedTree): ItemId = + # We use the linear representation here directly: + for i in 0..<len(tree): + let i = NodePos(i) + case tree[i].kind + of nkSym: + let item = tree[i].soperand + if searchLocalSym(c, c.g.packed[c.thisModule].fromDisk.syms[item], tree[i].info): + return ItemId(module: c.thisModule, item: item) + of nkModuleRef: + let (currentFile, currentLine, currentCol) = c.g.packed[c.thisModule].fromDisk.man.unpack(tree[i].info) + if currentLine == c.trackPos.line and currentFile == c.trackPos.file: + let (n1, n2) = sons2(tree, i) + assert n1.kind == nkInt32Lit + assert n2.kind == nkInt32Lit + let pId = PackedItemId(module: n1.litId, item: tree[n2].soperand) + let itemId = translateId(pId, c.g.packed, c.thisModule, c.g.config) + if searchForeignSym(c, itemId, tree[i].info): + return itemId + else: discard + return EmptyItemId + +proc isDecl(tree: PackedTree; n: NodePos): bool = + # XXX This is not correct yet. + const declarativeNodes = procDefs + {nkMacroDef, nkTemplateDef, + nkLetSection, nkVarSection, nkUsingStmt, nkConstSection, nkTypeSection, + nkIdentDefs, nkEnumTy, nkVarTuple} + result = n.int >= 0 and tree[n].kind in declarativeNodes + +proc usage(c: var NavContext; info: PackedLineInfo; isDecl: bool) = + let (fileId, line, col) = unpack(c.g.packed[c.thisModule].fromDisk.man, info) + var m = "" + var file = c.g.packed[c.thisModule].fromDisk.strings[fileId] + if c.outputSep == ' ': + file = os.extractFilename file + toLocation(m, file, line, col + ColOffset) + if not c.alreadyEmitted.containsOrIncl(m): + msgWriteln c.g.config, (if isDecl: "def" else: "usage") & c.outputSep & m + +proc list(c: var NavContext; tree: PackedTree; sym: ItemId) = + for i in 0..<len(tree): + let i = NodePos(i) + case tree[i].kind + of nkSym: + let item = tree[i].soperand + if sym.item == item and sym.module == c.thisModule: + usage(c, tree[i].info, isDecl(tree, parent(i))) + of nkModuleRef: + let (n1, n2) = sons2(tree, i) + assert n1.kind == nkNone + assert n2.kind == nkNone + let pId = PackedItemId(module: n1.litId, item: tree[n2].soperand) + let itemId = translateId(pId, c.g.packed, c.thisModule, c.g.config) + if itemId.item == sym.item and sym.module == itemId.module: + usage(c, tree[i].info, isDecl(tree, parent(i))) + else: discard + +proc searchForIncludeFile(g: ModuleGraph; fullPath: string): int = + for i in 0..<len(g.packed): + for k in 1..high(g.packed[i].fromDisk.includes): + # we start from 1 because the first "include" file is + # the module's filename. + if os.cmpPaths(g.packed[i].fromDisk.strings[g.packed[i].fromDisk.includes[k][0]], fullPath) == 0: + return i + return -1 + +proc nav(g: ModuleGraph) = + # translate the track position to a packed position: + let unpacked = g.config.m.trackPos + var mid = unpacked.fileIndex.int + + let fullPath = toFullPath(g.config, unpacked.fileIndex) + + if g.packed[mid].status == undefined: + # check if 'mid' is an include file of some other module: + mid = searchForIncludeFile(g, fullPath) + + if mid < 0: + localError(g.config, unpacked, "unknown file name: " & fullPath) + return + + let fileId = g.packed[mid].fromDisk.strings.getKeyId(fullPath) + + if fileId == LitId(0): + internalError(g.config, unpacked, "cannot find a valid file ID") + return + + var c = NavContext( + g: g, + thisModule: int32 mid, + trackPos: UnpackedLineInfo(line: unpacked.line.int, col: unpacked.col.int, file: fileId), + outputSep: if isDefined(g.config, "nimIcNavigatorTests"): ' ' else: '\t' + ) + var symId = search(c, g.packed[mid].fromDisk.topLevel) + if symId == EmptyItemId: + symId = search(c, g.packed[mid].fromDisk.bodies) + + if symId == EmptyItemId: + localError(g.config, unpacked, "no symbol at this position") + return + + for i in 0..<len(g.packed): + # case statement here to enforce exhaustive checks. + case g.packed[i].status + of undefined: + discard "nothing to do" + of loading: + assert false, "cannot check integrity: Module still loading" + of stored, storing, outdated, loaded: + c.thisModule = int32 i + list(c, g.packed[i].fromDisk.topLevel, symId) + list(c, g.packed[i].fromDisk.bodies, symId) + +proc navDefinition*(g: ModuleGraph) = nav(g) +proc navUsages*(g: ModuleGraph) = nav(g) +proc navDefusages*(g: ModuleGraph) = nav(g) + +proc writeRodFiles*(g: ModuleGraph) = + for i in 0..<len(g.packed): + case g.packed[i].status + of undefined, loading, stored, loaded: + discard "nothing to do" + of storing, outdated: + closeRodFile(g, g.packed[i].module) diff --git a/compiler/ic/packed_ast.nim b/compiler/ic/packed_ast.nim new file mode 100644 index 000000000..a39bb7adf --- /dev/null +++ b/compiler/ic/packed_ast.nim @@ -0,0 +1,367 @@ +# +# +# The Nim Compiler +# (c) Copyright 2020 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Packed AST representation, mostly based on a seq of nodes. +## For IC support. Far future: Rewrite the compiler passes to +## use this representation directly in all the transformations, +## it is superior. + +import std/[hashes, tables, strtabs] +import bitabs, rodfiles +import ".." / [ast, options] + +import iclineinfos + +when defined(nimPreviewSlimSystem): + import std/assertions + +type + SymId* = distinct int32 + ModuleId* = distinct int32 + NodePos* = distinct int + + NodeId* = distinct int32 + + PackedItemId* = object + module*: LitId # 0 if it's this module + item*: int32 # same as the in-memory representation + +const + nilItemId* = PackedItemId(module: LitId(0), item: 0.int32) + +const + emptyNodeId* = NodeId(-1) + +type + PackedLib* = object + kind*: TLibKind + generated*: bool + isOverridden*: bool + name*: LitId + path*: NodeId + + PackedSym* = object + id*: int32 + kind*: TSymKind + name*: LitId + typ*: PackedItemId + flags*: TSymFlags + magic*: TMagic + info*: PackedLineInfo + ast*: NodeId + owner*: PackedItemId + guard*: PackedItemId + bitsize*: int + alignment*: int # for alignment + options*: TOptions + position*: int + offset*: int32 + disamb*: int32 + externalName*: LitId # instead of TLoc + locFlags*: TLocFlags + annex*: PackedLib + when hasFFI: + cname*: LitId + constraint*: NodeId + instantiatedFrom*: PackedItemId + + PackedType* = object + id*: int32 + kind*: TTypeKind + callConv*: TCallingConvention + #nodekind*: TNodeKind + flags*: TTypeFlags + types*: seq[PackedItemId] + n*: NodeId + #nodeflags*: TNodeFlags + sym*: PackedItemId + owner*: PackedItemId + size*: BiggestInt + align*: int16 + paddingAtEnd*: int16 + # not serialized: loc*: TLoc because it is backend-specific + typeInst*: PackedItemId + nonUniqueId*: int32 + + PackedNode* = object # 8 bytes + x: uint32 + info*: PackedLineInfo + + PackedTree* = object ## usually represents a full Nim module + nodes: seq[PackedNode] + withFlags: seq[(int32, TNodeFlags)] + withTypes: seq[(int32, PackedItemId)] + + PackedInstantiation* = object + key*, sym*: PackedItemId + concreteTypes*: seq[PackedItemId] + +const + NodeKindBits = 8'u32 + NodeKindMask = (1'u32 shl NodeKindBits) - 1'u32 + +template kind*(n: PackedNode): TNodeKind = TNodeKind(n.x and NodeKindMask) +template uoperand*(n: PackedNode): uint32 = (n.x shr NodeKindBits) +template soperand*(n: PackedNode): int32 = int32(uoperand(n)) + +template toX(k: TNodeKind; operand: uint32): uint32 = + uint32(k) or (operand shl NodeKindBits) + +template toX(k: TNodeKind; operand: LitId): uint32 = + uint32(k) or (operand.uint32 shl NodeKindBits) + +template typeId*(n: PackedNode): PackedItemId = n.typ + +proc `==`*(a, b: SymId): bool {.borrow.} +proc hash*(a: SymId): Hash {.borrow.} + +proc `==`*(a, b: NodePos): bool {.borrow.} +#proc `==`*(a, b: PackedItemId): bool {.borrow.} +proc `==`*(a, b: NodeId): bool {.borrow.} + +proc newTreeFrom*(old: PackedTree): PackedTree = + result = PackedTree(nodes: @[]) + when false: result.sh = old.sh + +proc addIdent*(tree: var PackedTree; s: LitId; info: PackedLineInfo) = + tree.nodes.add PackedNode(x: toX(nkIdent, uint32(s)), info: info) + +proc addSym*(tree: var PackedTree; s: int32; info: PackedLineInfo) = + tree.nodes.add PackedNode(x: toX(nkSym, cast[uint32](s)), info: info) + +proc addSymDef*(tree: var PackedTree; s: SymId; info: PackedLineInfo) = + tree.nodes.add PackedNode(x: toX(nkSym, cast[uint32](s)), info: info) + +proc isAtom*(tree: PackedTree; pos: int): bool {.inline.} = tree.nodes[pos].kind <= nkNilLit + +type + PatchPos = distinct int + +proc addNode*(t: var PackedTree; kind: TNodeKind; operand: int32; + typeId: PackedItemId = nilItemId; info: PackedLineInfo; + flags: TNodeFlags = {}) = + t.nodes.add PackedNode(x: toX(kind, cast[uint32](operand)), info: info) + if flags != {}: + t.withFlags.add (t.nodes.len.int32 - 1, flags) + if typeId != nilItemId: + t.withTypes.add (t.nodes.len.int32 - 1, typeId) + +proc prepare*(tree: var PackedTree; kind: TNodeKind; flags: TNodeFlags; typeId: PackedItemId; info: PackedLineInfo): PatchPos = + result = PatchPos tree.nodes.len + tree.addNode(kind = kind, flags = flags, operand = 0, info = info, typeId = typeId) + +proc prepare*(dest: var PackedTree; source: PackedTree; sourcePos: NodePos): PatchPos = + result = PatchPos dest.nodes.len + dest.nodes.add source.nodes[sourcePos.int] + +proc patch*(tree: var PackedTree; pos: PatchPos) = + let pos = pos.int + let k = tree.nodes[pos].kind + assert k > nkNilLit + let distance = int32(tree.nodes.len - pos) + assert distance > 0 + tree.nodes[pos].x = toX(k, cast[uint32](distance)) + +proc len*(tree: PackedTree): int {.inline.} = tree.nodes.len + +proc `[]`*(tree: PackedTree; i: NodePos): lent PackedNode {.inline.} = + tree.nodes[i.int] + +template rawSpan(n: PackedNode): int = int(uoperand(n)) + +proc nextChild(tree: PackedTree; pos: var int) {.inline.} = + if tree.nodes[pos].kind > nkNilLit: + assert tree.nodes[pos].uoperand > 0 + inc pos, tree.nodes[pos].rawSpan + else: + inc pos + +iterator sonsReadonly*(tree: PackedTree; n: NodePos): NodePos = + var pos = n.int + assert tree.nodes[pos].kind > nkNilLit + let last = pos + tree.nodes[pos].rawSpan + inc pos + while pos < last: + yield NodePos pos + nextChild tree, pos + +iterator sons*(dest: var PackedTree; tree: PackedTree; n: NodePos): NodePos = + let patchPos = prepare(dest, tree, n) + for x in sonsReadonly(tree, n): yield x + patch dest, patchPos + +iterator isons*(dest: var PackedTree; tree: PackedTree; + n: NodePos): (int, NodePos) = + var i = 0 + for ch0 in sons(dest, tree, n): + yield (i, ch0) + inc i + +iterator sonsFrom1*(tree: PackedTree; n: NodePos): NodePos = + var pos = n.int + assert tree.nodes[pos].kind > nkNilLit + let last = pos + tree.nodes[pos].rawSpan + inc pos + if pos < last: + nextChild tree, pos + while pos < last: + yield NodePos pos + nextChild tree, pos + +iterator sonsWithoutLast2*(tree: PackedTree; n: NodePos): NodePos = + var count = 0 + for child in sonsReadonly(tree, n): + inc count + var pos = n.int + assert tree.nodes[pos].kind > nkNilLit + let last = pos + tree.nodes[pos].rawSpan + inc pos + while pos < last and count > 2: + yield NodePos pos + dec count + nextChild tree, pos + +proc parentImpl(tree: PackedTree; n: NodePos): NodePos = + # finding the parent of a node is rather easy: + var pos = n.int - 1 + while pos >= 0 and (isAtom(tree, pos) or (pos + tree.nodes[pos].rawSpan - 1 < n.int)): + dec pos + #assert pos >= 0, "node has no parent" + result = NodePos(pos) + +template parent*(n: NodePos): NodePos = parentImpl(tree, n) + +proc hasXsons*(tree: PackedTree; n: NodePos; x: int): bool = + var count = 0 + if tree.nodes[n.int].kind > nkNilLit: + for child in sonsReadonly(tree, n): inc count + result = count == x + +proc hasAtLeastXsons*(tree: PackedTree; n: NodePos; x: int): bool = + if tree.nodes[n.int].kind > nkNilLit: + var count = 0 + for child in sonsReadonly(tree, n): + inc count + if count >= x: return true + return false + +proc firstSon*(tree: PackedTree; n: NodePos): NodePos {.inline.} = + NodePos(n.int+1) +proc kind*(tree: PackedTree; n: NodePos): TNodeKind {.inline.} = + tree.nodes[n.int].kind +proc litId*(tree: PackedTree; n: NodePos): LitId {.inline.} = + LitId tree.nodes[n.int].uoperand +proc info*(tree: PackedTree; n: NodePos): PackedLineInfo {.inline.} = + tree.nodes[n.int].info + +proc findType*(tree: PackedTree; n: NodePos): PackedItemId = + for x in tree.withTypes: + if x[0] == int32(n): return x[1] + if x[0] > int32(n): return nilItemId + return nilItemId + +proc findFlags*(tree: PackedTree; n: NodePos): TNodeFlags = + for x in tree.withFlags: + if x[0] == int32(n): return x[1] + if x[0] > int32(n): return {} + return {} + +template typ*(n: NodePos): PackedItemId = + tree.findType(n) +template flags*(n: NodePos): TNodeFlags = + tree.findFlags(n) + +template uoperand*(n: NodePos): uint32 = + tree.nodes[n.int].uoperand + +proc span*(tree: PackedTree; pos: int): int {.inline.} = + if isAtom(tree, pos): 1 else: tree.nodes[pos].rawSpan + +proc sons2*(tree: PackedTree; n: NodePos): (NodePos, NodePos) = + assert(not isAtom(tree, n.int)) + let a = n.int+1 + let b = a + span(tree, a) + result = (NodePos a, NodePos b) + +proc sons3*(tree: PackedTree; n: NodePos): (NodePos, NodePos, NodePos) = + assert(not isAtom(tree, n.int)) + let a = n.int+1 + let b = a + span(tree, a) + let c = b + span(tree, b) + result = (NodePos a, NodePos b, NodePos c) + +proc ithSon*(tree: PackedTree; n: NodePos; i: int): NodePos = + result = default(NodePos) + if tree.nodes[n.int].kind > nkNilLit: + var count = 0 + for child in sonsReadonly(tree, n): + if count == i: return child + inc count + assert false, "node has no i-th child" + +when false: + proc `@`*(tree: PackedTree; lit: LitId): lent string {.inline.} = + tree.sh.strings[lit] + +template kind*(n: NodePos): TNodeKind = tree.nodes[n.int].kind +template info*(n: NodePos): PackedLineInfo = tree.nodes[n.int].info +template litId*(n: NodePos): LitId = LitId tree.nodes[n.int].uoperand + +template symId*(n: NodePos): SymId = SymId tree.nodes[n.int].soperand + +proc firstSon*(n: NodePos): NodePos {.inline.} = NodePos(n.int+1) + +const + externIntLit* = {nkCharLit, + nkIntLit, + nkInt8Lit, + nkInt16Lit, + nkInt32Lit, + nkInt64Lit, + nkUIntLit, + nkUInt8Lit, + nkUInt16Lit, + nkUInt32Lit, + nkUInt64Lit} + + externSIntLit* = {nkIntLit, nkInt8Lit, nkInt16Lit, nkInt32Lit, nkInt64Lit} + externUIntLit* = {nkUIntLit, nkUInt8Lit, nkUInt16Lit, nkUInt32Lit, nkUInt64Lit} + directIntLit* = nkNone + +template copyInto*(dest, n, body) = + let patchPos = prepare(dest, tree, n) + body + patch dest, patchPos + +template copyIntoKind*(dest, kind, info, body) = + let patchPos = prepare(dest, kind, info) + body + patch dest, patchPos + +proc getNodeId*(tree: PackedTree): NodeId {.inline.} = NodeId tree.nodes.len + +iterator allNodes*(tree: PackedTree): NodePos = + var p = 0 + while p < tree.len: + yield NodePos(p) + let s = span(tree, p) + inc p, s + +proc toPackedItemId*(item: int32): PackedItemId {.inline.} = + PackedItemId(module: LitId(0), item: item) + +proc load*(f: var RodFile; t: var PackedTree) = + loadSeq f, t.nodes + loadSeq f, t.withFlags + loadSeq f, t.withTypes + +proc store*(f: var RodFile; t: PackedTree) = + storeSeq f, t.nodes + storeSeq f, t.withFlags + storeSeq f, t.withTypes diff --git a/compiler/ic/replayer.nim b/compiler/ic/replayer.nim new file mode 100644 index 000000000..b244ec885 --- /dev/null +++ b/compiler/ic/replayer.nim @@ -0,0 +1,171 @@ +# +# +# The Nim Compiler +# (c) Copyright 2020 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Module that contains code to replay global VM state changes and pragma +## state like ``{.compile: "foo.c".}``. For IC (= Incremental compilation) +## support. + +import ".." / [ast, modulegraphs, trees, extccomp, btrees, + msgs, lineinfos, pathutils, options, cgmeth] + +import std/tables + +when defined(nimPreviewSlimSystem): + import std/assertions + +import packed_ast, ic, bitabs + +proc replayStateChanges*(module: PSym; g: ModuleGraph) = + let list = module.ast + assert list != nil + assert list.kind == nkStmtList + for n in list: + assert n.kind == nkReplayAction + # Fortunately only a tiny subset of the available pragmas need to + # be replayed here. This is always a subset of ``pragmas.stmtPragmas``. + if n.len >= 2: + internalAssert g.config, n[0].kind == nkStrLit and n[1].kind == nkStrLit + case n[0].strVal + of "hint": message(g.config, n.info, hintUser, n[1].strVal) + of "warning": message(g.config, n.info, warnUser, n[1].strVal) + of "error": localError(g.config, n.info, errUser, n[1].strVal) + of "compile": + internalAssert g.config, n.len == 4 and n[2].kind == nkStrLit + let cname = AbsoluteFile n[1].strVal + var cf = Cfile(nimname: splitFile(cname).name, cname: cname, + obj: AbsoluteFile n[2].strVal, + flags: {CfileFlag.External}, + customArgs: n[3].strVal) + extccomp.addExternalFileToCompile(g.config, cf) + of "link": + extccomp.addExternalFileToLink(g.config, AbsoluteFile n[1].strVal) + of "passl": + extccomp.addLinkOption(g.config, n[1].strVal) + of "passc": + extccomp.addCompileOption(g.config, n[1].strVal) + of "localpassc": + extccomp.addLocalCompileOption(g.config, n[1].strVal, toFullPathConsiderDirty(g.config, module.info.fileIndex)) + of "cppdefine": + options.cppDefine(g.config, n[1].strVal) + of "inc": + let destKey = n[1].strVal + let by = n[2].intVal + let v = getOrDefault(g.cacheCounters, destKey) + g.cacheCounters[destKey] = v+by + of "put": + let destKey = n[1].strVal + let key = n[2].strVal + let val = n[3] + if not contains(g.cacheTables, destKey): + g.cacheTables[destKey] = initBTree[string, PNode]() + if not contains(g.cacheTables[destKey], key): + g.cacheTables[destKey].add(key, val) + else: + internalError(g.config, n.info, "key already exists: " & key) + of "incl": + let destKey = n[1].strVal + let val = n[2] + if not contains(g.cacheSeqs, destKey): + g.cacheSeqs[destKey] = newTree(nkStmtList, val) + else: + block search: + for existing in g.cacheSeqs[destKey]: + if exprStructuralEquivalent(existing, val, strictSymEquality=true): + break search + g.cacheSeqs[destKey].add val + of "add": + let destKey = n[1].strVal + let val = n[2] + if not contains(g.cacheSeqs, destKey): + g.cacheSeqs[destKey] = newTree(nkStmtList, val) + else: + g.cacheSeqs[destKey].add val + else: + internalAssert g.config, false + +proc replayBackendProcs*(g: ModuleGraph; module: int) = + for it in mitems(g.packed[module].fromDisk.attachedOps): + let key = translateId(it[0], g.packed, module, g.config) + let op = it[1] + let tmp = translateId(it[2], g.packed, module, g.config) + let symId = FullId(module: tmp.module, packed: it[2]) + g.attachedOps[op][key] = LazySym(id: symId, sym: nil) + + for it in mitems(g.packed[module].fromDisk.enumToStringProcs): + let key = translateId(it[0], g.packed, module, g.config) + let tmp = translateId(it[1], g.packed, module, g.config) + let symId = FullId(module: tmp.module, packed: it[1]) + g.enumToStringProcs[key] = LazySym(id: symId, sym: nil) + + for it in mitems(g.packed[module].fromDisk.methodsPerType): + let key = translateId(it[0], g.packed, module, g.config) + let tmp = translateId(it[1], g.packed, module, g.config) + let symId = FullId(module: tmp.module, packed: it[1]) + g.methodsPerType.mgetOrPut(key, @[]).add LazySym(id: symId, sym: nil) + + for it in mitems(g.packed[module].fromDisk.dispatchers): + let tmp = translateId(it, g.packed, module, g.config) + let symId = FullId(module: tmp.module, packed: it) + g.dispatchers.add LazySym(id: symId, sym: nil) + +proc replayGenericCacheInformation*(g: ModuleGraph; module: int) = + ## We remember the generic instantiations a module performed + ## in order to to avoid the code bloat that generic code tends + ## to imply. This is cheaper than deduplication of identical + ## generic instantiations. However, deduplication is more + ## powerful and general and I hope to implement it soon too + ## (famous last words). + assert g.packed[module].status == loaded + for it in g.packed[module].fromDisk.typeInstCache: + let key = translateId(it[0], g.packed, module, g.config) + g.typeInstCache.mgetOrPut(key, @[]).add LazyType(id: FullId(module: module, packed: it[1]), typ: nil) + + for it in mitems(g.packed[module].fromDisk.procInstCache): + let key = translateId(it.key, g.packed, module, g.config) + let sym = translateId(it.sym, g.packed, module, g.config) + var concreteTypes = newSeq[FullId](it.concreteTypes.len) + for i in 0..high(it.concreteTypes): + let tmp = translateId(it.concreteTypes[i], g.packed, module, g.config) + concreteTypes[i] = FullId(module: tmp.module, packed: it.concreteTypes[i]) + + g.procInstCache.mgetOrPut(key, @[]).add LazyInstantiation( + module: module, sym: FullId(module: sym.module, packed: it.sym), + concreteTypes: concreteTypes, inst: nil) + + for it in mitems(g.packed[module].fromDisk.methodsPerGenericType): + let key = translateId(it[0], g.packed, module, g.config) + let col = it[1] + let tmp = translateId(it[2], g.packed, module, g.config) + let symId = FullId(module: tmp.module, packed: it[2]) + g.methodsPerGenericType.mgetOrPut(key, @[]).add (col, LazySym(id: symId, sym: nil)) + + replayBackendProcs(g, module) + + for it in mitems(g.packed[module].fromDisk.methods): + let sym = loadSymFromId(g.config, g.cache, g.packed, module, + PackedItemId(module: LitId(0), item: it)) + methodDef(g, g.idgen, sym) + + when false: + # not used anymore: + for it in mitems(g.packed[module].fromDisk.compilerProcs): + let symId = FullId(module: module, packed: PackedItemId(module: LitId(0), item: it[1])) + g.lazyCompilerprocs[g.packed[module].fromDisk.sh.strings[it[0]]] = symId + + for it in mitems(g.packed[module].fromDisk.converters): + let symId = FullId(module: module, packed: PackedItemId(module: LitId(0), item: it)) + g.ifaces[module].converters.add LazySym(id: symId, sym: nil) + + for it in mitems(g.packed[module].fromDisk.trmacros): + let symId = FullId(module: module, packed: PackedItemId(module: LitId(0), item: it)) + g.ifaces[module].patterns.add LazySym(id: symId, sym: nil) + + for it in mitems(g.packed[module].fromDisk.pureEnums): + let symId = FullId(module: module, packed: PackedItemId(module: LitId(0), item: it)) + g.ifaces[module].pureEnums.add LazySym(id: symId, sym: nil) diff --git a/compiler/ic/rodfiles.nim b/compiler/ic/rodfiles.nim new file mode 100644 index 000000000..ac995dd2e --- /dev/null +++ b/compiler/ic/rodfiles.nim @@ -0,0 +1,283 @@ +# +# +# The Nim Compiler +# (c) Copyright 2020 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Low level binary format used by the compiler to store and load various AST +## and related data. +## +## NB: this is incredibly low level and if you're interested in how the +## compiler works and less a storage format, you're probably looking for +## the `ic` or `packed_ast` modules to understand the logical format. + +from std/typetraits import supportsCopyMem + +when defined(nimPreviewSlimSystem): + import std/[syncio, assertions] + +import std / tables + +## Overview +## ======== +## `RodFile` represents a Rod File (versioned binary format), and the +## associated data for common interactions such as IO and error tracking +## (`RodFileError`). The file format broken up into sections (`RodSection`) +## and preceded by a header (see: `cookie`). The precise layout, section +## ordering and data following the section are determined by the user. See +## `ic.loadRodFile`. +## +## A basic but "wrong" example of the lifecycle: +## --------------------------------------------- +## 1. `create` or `open` - create a new one or open an existing +## 2. `storeHeader` - header info +## 3. `storePrim` or `storeSeq` - save your stuff +## 4. `close` - and we're done +## +## Now read the bits below to understand what's missing. +## +## ### Issues with the Example +## Missing Sections: +## This is a low level API, so headers and sections need to be stored and +## loaded by the user, see `storeHeader` & `loadHeader` and `storeSection` & +## `loadSection`, respectively. +## +## No Error Handling: +## The API is centered around IO and prone to error, each operation checks or +## sets the `RodFile.err` field. A user of this API needs to handle these +## appropriately. +## +## API Notes +## ========= +## +## Valid inputs for Rod files +## -------------------------- +## ASTs, hopes, dreams, and anything as long as it and any children it may have +## support `copyMem`. This means anything that is not a pointer and that does not contain a pointer. At a glance these are: +## * string +## * objects & tuples (fields are recursed) +## * sequences AKA `seq[T]` +## +## Note on error handling style +## ---------------------------- +## A flag based approach is used where operations no-op in case of a +## preexisting error and set the flag if they encounter one. +## +## Misc +## ---- +## * 'Prim' is short for 'primitive', as in a non-sequence type + +type + RodSection* = enum + versionSection + configSection + stringsSection + checkSumsSection + depsSection + numbersSection + exportsSection + hiddenSection + reexportsSection + compilerProcsSection + trmacrosSection + convertersSection + methodsSection + pureEnumsSection + toReplaySection + topLevelSection + bodiesSection + symsSection + typesSection + typeInstCacheSection + procInstCacheSection + attachedOpsSection + methodsPerGenericTypeSection + enumToStringProcsSection + methodsPerTypeSection + dispatchersSection + typeInfoSection # required by the backend + backendFlagsSection + aliveSymsSection # beware, this is stored in a `.alivesyms` file. + sideChannelSection + namespaceSection + symnamesSection + + RodFileError* = enum + ok, tooBig, cannotOpen, ioFailure, wrongHeader, wrongSection, configMismatch, + includeFileChanged + + RodFile* = object + f*: File + currentSection*: RodSection # for error checking + err*: RodFileError # little experiment to see if this works + # better than exceptions. + +const + RodVersion = 2 + defaultCookie = [byte(0), byte('R'), byte('O'), byte('D'), + byte(sizeof(int)*8), byte(system.cpuEndian), byte(0), byte(RodVersion)] + +proc setError(f: var RodFile; err: RodFileError) {.inline.} = + f.err = err + #raise newException(IOError, "IO error") + +proc storePrim*(f: var RodFile; s: string) = + ## Stores a string. + ## The len is prefixed to allow for later retreival. + if f.err != ok: return + if s.len >= high(int32): + setError f, tooBig + return + var lenPrefix = int32(s.len) + if writeBuffer(f.f, addr lenPrefix, sizeof(lenPrefix)) != sizeof(lenPrefix): + setError f, ioFailure + else: + if s.len != 0: + if writeBuffer(f.f, unsafeAddr(s[0]), s.len) != s.len: + setError f, ioFailure + +proc storePrim*[T](f: var RodFile; x: T) = + ## Stores a non-sequence/string `T`. + ## If `T` doesn't support `copyMem` and is an object or tuple then the fields + ## are written -- the user from context will need to know which `T` to load. + if f.err != ok: return + when supportsCopyMem(T): + if writeBuffer(f.f, unsafeAddr(x), sizeof(x)) != sizeof(x): + setError f, ioFailure + elif T is tuple: + for y in fields(x): + storePrim(f, y) + elif T is object: + for y in fields(x): + when y is seq: + storeSeq(f, y) + else: + storePrim(f, y) + else: + {.error: "unsupported type for 'storePrim'".} + +proc storeSeq*[T](f: var RodFile; s: seq[T]) = + ## Stores a sequence of `T`s, with the len as a prefix for later retrieval. + if f.err != ok: return + if s.len >= high(int32): + setError f, tooBig + return + var lenPrefix = int32(s.len) + if writeBuffer(f.f, addr lenPrefix, sizeof(lenPrefix)) != sizeof(lenPrefix): + setError f, ioFailure + else: + for i in 0..<s.len: + storePrim(f, s[i]) + +proc storeOrderedTable*[K, T](f: var RodFile; s: OrderedTable[K, T]) = + if f.err != ok: return + if s.len >= high(int32): + setError f, tooBig + return + var lenPrefix = int32(s.len) + if writeBuffer(f.f, addr lenPrefix, sizeof(lenPrefix)) != sizeof(lenPrefix): + setError f, ioFailure + else: + for _, v in s: + storePrim(f, v) + +proc loadPrim*(f: var RodFile; s: var string) = + ## Read a string, the length was stored as a prefix + if f.err != ok: return + var lenPrefix = int32(0) + if readBuffer(f.f, addr lenPrefix, sizeof(lenPrefix)) != sizeof(lenPrefix): + setError f, ioFailure + else: + s = newString(lenPrefix) + if lenPrefix > 0: + if readBuffer(f.f, unsafeAddr(s[0]), s.len) != s.len: + setError f, ioFailure + +proc loadPrim*[T](f: var RodFile; x: var T) = + ## Load a non-sequence/string `T`. + if f.err != ok: return + when supportsCopyMem(T): + if readBuffer(f.f, unsafeAddr(x), sizeof(x)) != sizeof(x): + setError f, ioFailure + elif T is tuple: + for y in fields(x): + loadPrim(f, y) + elif T is object: + for y in fields(x): + when y is seq: + loadSeq(f, y) + else: + loadPrim(f, y) + else: + {.error: "unsupported type for 'loadPrim'".} + +proc loadSeq*[T](f: var RodFile; s: var seq[T]) = + ## `T` must be compatible with `copyMem`, see `loadPrim` + if f.err != ok: return + var lenPrefix = int32(0) + if readBuffer(f.f, addr lenPrefix, sizeof(lenPrefix)) != sizeof(lenPrefix): + setError f, ioFailure + else: + s = newSeq[T](lenPrefix) + for i in 0..<lenPrefix: + loadPrim(f, s[i]) + +proc loadOrderedTable*[K, T](f: var RodFile; s: var OrderedTable[K, T]) = + ## `T` must be compatible with `copyMem`, see `loadPrim` + if f.err != ok: return + var lenPrefix = int32(0) + if readBuffer(f.f, addr lenPrefix, sizeof(lenPrefix)) != sizeof(lenPrefix): + setError f, ioFailure + else: + s = initOrderedTable[K, T](lenPrefix) + for i in 0..<lenPrefix: + var x = default T + loadPrim(f, x) + s[x.id] = x + +proc storeHeader*(f: var RodFile; cookie = defaultCookie) = + ## stores the header which is described by `cookie`. + if f.err != ok: return + if f.f.writeBytes(cookie, 0, cookie.len) != cookie.len: + setError f, ioFailure + +proc loadHeader*(f: var RodFile; cookie = defaultCookie) = + ## Loads the header which is described by `cookie`. + if f.err != ok: return + var thisCookie: array[cookie.len, byte] = default(array[cookie.len, byte]) + if f.f.readBytes(thisCookie, 0, thisCookie.len) != thisCookie.len: + setError f, ioFailure + elif thisCookie != cookie: + setError f, wrongHeader + +proc storeSection*(f: var RodFile; s: RodSection) = + ## update `currentSection` and writes the bytes value of s. + if f.err != ok: return + assert f.currentSection < s + f.currentSection = s + storePrim(f, s) + +proc loadSection*(f: var RodFile; expected: RodSection) = + ## read the bytes value of s, sets and error if the section is incorrect. + if f.err != ok: return + var s: RodSection = default(RodSection) + loadPrim(f, s) + if expected != s and f.err == ok: + setError f, wrongSection + +proc create*(filename: string): RodFile = + ## create the file and open it for writing + result = default(RodFile) + if not open(result.f, filename, fmWrite): + setError result, cannotOpen + +proc close*(f: var RodFile) = close(f.f) + +proc open*(filename: string): RodFile = + ## open the file for reading + result = default(RodFile) + if not open(result.f, filename, fmRead): + setError result, cannotOpen |