diff options
author | Andreas Rumpf <rumpf_a@web.de> | 2017-10-07 09:35:45 +0200 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2017-10-09 21:12:40 +0200 |
commit | b9511a2d7f6b2037ed461abddfc85b3afd5108d9 (patch) | |
tree | 952a372f423b4e52714e11040a2f6ab7790a3cb2 /compiler | |
parent | 5c1a842b881525a77dda40af8cf9bdd6432e25de (diff) | |
download | Nim-b9511a2d7f6b2037ed461abddfc85b3afd5108d9.tar.gz |
work in progress: a dataflow architecture for Nim
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/dfa.nim | 412 | ||||
-rw-r--r-- | compiler/sempass2.nim | 6 |
2 files changed, 416 insertions, 2 deletions
diff --git a/compiler/dfa.nim b/compiler/dfa.nim new file mode 100644 index 000000000..b9ca81065 --- /dev/null +++ b/compiler/dfa.nim @@ -0,0 +1,412 @@ +# +# +# The Nim Compiler +# (c) Copyright 2017 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Data flow analysis for Nim. For now the task is to prove that every +## usage of a local variable 'v' is covered by an initialization to 'v' +## first. +## We transform the AST into a linear list of instructions first to +## make this easier to handle: There are only 2 different branching +## instructions: 'goto X' is an unconditional goto, 'fork X' +## is a conditional goto (either the next instruction or 'X' can be +## taken). Exhaustive case statements are translated +## so that the last branch is transformed into an 'else' branch. +## ``return`` and ``break`` are all covered by 'goto'. +## The case to detect is ``use v`` that is not dominated by +## a ``def v``. +## The data structures and algorithms used here are inspired by +## "A Graph–Free Approach to Data–Flow Analysis" by Markus Mohnen. +## https://link.springer.com/content/pdf/10.1007/3-540-45937-5_6.pdf + +import ast, astalgo, types, intsets, tables, msgs + +type + InstrKind = enum + goto, fork, def, use + Instr = object + n: PNode + case kind: InstrKind + of def, use: sym: PSym + of goto, fork: dest: int + + TPosition = distinct int + TBlock = object + label: PSym + fixups: seq[TPosition] + + ValueKind = enum + undef, value, valueOrUndef + + Con = object + code: seq[Instr] + blocks: seq[TBlock] + +proc debugInfo(info: TLineInfo): string = + result = info.toFilename & ":" & $info.line + +proc codeListing(c: Con, result: var string, start=0; last = -1) = + # for debugging purposes + # first iteration: compute all necessary labels: + var jumpTargets = initIntSet() + let last = if last < 0: c.code.len-1 else: min(last, c.code.len-1) + for i in start..last: + if c.code[i].kind in {goto, fork}: + jumpTargets.incl(i+c.code[i].dest) + var i = start + while i <= last: + if i in jumpTargets: result.add("L" & $i & ":\n") + result.add "\t" + result.add $c.code[i].kind + result.add "\t" + case c.code[i].kind + of def, use: + result.add c.code[i].sym.name.s + of goto, fork: + result.add "L" + result.add c.code[i].dest+i + result.add("\t#") + result.add(debugInfo(c.code[i].n.info)) + result.add("\n") + inc i + if i in jumpTargets: result.add("L" & $i & ": End\n") + + +proc echoCfg*(c: Con; start=0; last = -1) {.deprecated.} = + var buf = "" + codeListing(c, buf, start, last) + echo buf + +proc forkI(c: var Con; n: PNode): TPosition = + result = TPosition(c.code.len) + c.code.add Instr(n: n, kind: fork, dest: 0) + +proc gotoI(c: var Con; n: PNode): TPosition = + result = TPosition(c.code.len) + c.code.add Instr(n: n, kind: goto, dest: 0) + +proc genLabel(c: Con): TPosition = + result = TPosition(c.code.len) + +proc jmpBack(c: var Con, n: PNode, p = TPosition(0)) = + let dist = p.int - c.code.len + internalAssert(-0x7fff < dist and dist < 0x7fff) + c.code.add Instr(n: n, kind: goto, dest: dist) + +proc patch(c: var Con, p: TPosition) = + # patch with current index + let p = p.int + let diff = c.code.len - p + internalAssert(-0x7fff < diff and diff < 0x7fff) + c.code[p].dest = diff + +proc popBlock(c: var Con; oldLen: int) = + for f in c.blocks[oldLen].fixups: + c.patch(f) + c.blocks.setLen(oldLen) + +template withBlock(labl: PSym; body: untyped) {.dirty.} = + var oldLen {.gensym.} = c.blocks.len + c.blocks.add TBlock(label: labl, fixups: @[]) + body + popBlock(c, oldLen) + +proc isTrue(n: PNode): bool = + n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or + n.kind == nkIntLit and n.intVal != 0 + +proc gen(c: var Con; n: PNode) # {.noSideEffect.} + +proc genWhile(c: var Con; n: PNode) = + # L1: + # cond, tmp + # fjmp tmp, L2 + # body + # jmp L1 + # L2: + let L1 = c.genLabel + withBlock(nil): + if isTrue(n.sons[0]): + c.gen(n.sons[1]) + c.jmpBack(n, L1) + else: + c.gen(n.sons[0]) + let L2 = c.forkI(n) + c.gen(n.sons[1]) + c.jmpBack(n, L1) + c.patch(L2) + +proc genBlock(c: var Con; n: PNode) = + withBlock(n.sons[0].sym): + c.gen(n.sons[1]) + +proc genBreak(c: var Con; n: PNode) = + let L1 = c.gotoI(n) + if n.sons[0].kind == nkSym: + #echo cast[int](n.sons[0].sym) + for i in countdown(c.blocks.len-1, 0): + if c.blocks[i].label == n.sons[0].sym: + c.blocks[i].fixups.add L1 + return + globalError(n.info, errGenerated, "VM problem: cannot find 'break' target") + else: + c.blocks[c.blocks.high].fixups.add L1 + +proc genIf(c: var Con, n: PNode) = + var endings: seq[TPosition] = @[] + for i in countup(0, len(n) - 1): + var it = n.sons[i] + if it.len == 2: + c.gen(it.sons[0].sons[1]) + var elsePos = c.forkI(it.sons[0].sons[1]) + c.gen(it.sons[1]) + if i < sonsLen(n)-1: + endings.add(c.gotoI(it.sons[1])) + c.patch(elsePos) + else: + c.gen(it.sons[0]) + for endPos in endings: c.patch(endPos) + +proc genAndOr(c: var Con; n: PNode) = + # asgn dest, a + # fork L1 + # asgn dest, b + # L1: + c.gen(n.sons[1]) + let L1 = c.forkI(n) + c.gen(n.sons[2]) + c.patch(L1) + +proc genCase(c: var Con; n: PNode) = + # if (!expr1) goto L1; + # thenPart + # goto LEnd + # L1: + # if (!expr2) goto L2; + # thenPart2 + # goto LEnd + # L2: + # elsePart + # Lend: + var endings: seq[TPosition] = @[] + c.gen(n.sons[0]) + for i in 1 .. <n.len: + let it = n.sons[i] + if it.len == 1: + c.gen(it.sons[0]) + else: + let elsePos = c.forkI(it.lastSon) + c.gen(it.lastSon) + if i < sonsLen(n)-1: + endings.add(c.gotoI(it.lastSon)) + c.patch(elsePos) + for endPos in endings: c.patch(endPos) + +proc genTry(c: var Con; n: PNode) = + var endings: seq[TPosition] = @[] + let elsePos = c.forkI(n) + c.gen(n.sons[0]) + c.patch(elsePos) + for i in 1 .. <n.len: + let it = n.sons[i] + if it.kind != nkFinally: + var blen = len(it) + let endExcept = c.forkI(it) + c.gen(it.lastSon) + if i < sonsLen(n)-1: + endings.add(c.gotoI(it)) + c.patch(endExcept) + for endPos in endings: c.patch(endPos) + let fin = lastSon(n) + if fin.kind == nkFinally: + c.gen(fin.sons[0]) + +proc genRaise(c: var Con; n: PNode) = + gen(c, n.sons[0]) + c.code.add Instr(n: n, kind: goto, dest: high(int)) + +proc genReturn(c: var Con; n: PNode) = + if n.sons[0].kind != nkEmpty: gen(c, n.sons[0]) + c.code.add Instr(n: n, kind: goto, dest: high(int)) + +const + InterestingSyms = {skVar, skResult} + +proc genUse(c: var Con; n: PNode) = + var n = n + while n.kind in {nkDotExpr, nkCheckedFieldExpr, + nkBracketExpr, nkDerefExpr, nkHiddenDeref, + nkAddr, nkHiddenAddr}: + n = n[0] + if n.kind == nkSym and n.sym.kind in InterestingSyms: + c.code.add Instr(n: n, kind: use, sym: n.sym) + +proc genDef(c: var Con; n: PNode) = + if n.kind == nkSym and n.sym.kind in InterestingSyms: + c.code.add Instr(n: n, kind: def, sym: n.sym) + +proc genCall(c: var Con; n: PNode) = + gen(c, n[0]) + var t = n[0].typ + if t != nil: t = t.skipTypes(abstractInst) + for i in 1..<n.len: + gen(c, n[i]) + if t != nil and i < t.len and t.sons[i].kind == tyVar: + genDef(c, n[i]) + +proc genMagic(c: var Con; n: PNode; m: TMagic) = + case m + of mAnd, mOr: c.genAndOr(n) + of mNew, mNewFinalize: + genDef(c, n[1]) + for i in 2..<n.len: gen(c, n[i]) + of mExit: + genCall(c, n) + c.code.add Instr(n: n, kind: goto, dest: high(int)) + else: + genCall(c, n) + +proc genVarSection(c: var Con; n: PNode) = + for a in n: + if a.kind == nkCommentStmt: continue + if a.kind == nkVarTuple: + gen(c, a.lastSon) + for i in 0 .. a.len-3: genDef(c, a[i]) + else: + gen(c, a.lastSon) + if a.lastSon.kind != nkEmpty: + genDef(c, a.sons[0]) + +proc gen(c: var Con; n: PNode) = + case n.kind + of nkSym: genUse(c, n) + of nkCallKinds: + if n.sons[0].kind == nkSym: + let s = n.sons[0].sym + if s.magic != mNone: + genMagic(c, n, s.magic) + else: + genCall(c, n) + else: + genCall(c, n) + of nkCharLit..nkNilLit: discard + of nkAsgn, nkFastAsgn: + gen(c, n[1]) + genDef(c, n[0]) + of nkDotExpr, nkCheckedFieldExpr, nkBracketExpr, + nkDerefExpr, nkHiddenDeref, nkAddr, nkHiddenAddr: + gen(c, n[0]) + of nkIfStmt, nkIfExpr: genIf(c, n) + of nkWhenStmt: + # This is "when nimvm" node. Chose the first branch. + gen(c, n.sons[0].sons[1]) + of nkCaseStmt: genCase(c, n) + of nkWhileStmt: genWhile(c, n) + of nkBlockExpr, nkBlockStmt: genBlock(c, n) + of nkReturnStmt: genReturn(c, n) + of nkRaiseStmt: genRaise(c, n) + of nkBreakStmt: genBreak(c, n) + of nkTryStmt: genTry(c, n) + of nkStmtList, nkStmtListExpr, nkChckRangeF, nkChckRange64, nkChckRange, + nkBracket, nkCurly, nkPar, nkClosure, nkObjConstr: + for x in n: gen(c, x) + of nkPragmaBlock: gen(c, n.lastSon) + of nkDiscardStmt: gen(c, n.sons[0]) + of nkHiddenStdConv, nkHiddenSubConv, nkConv, nkExprColonExpr, nkExprEqExpr, + nkCast: + gen(c, n.sons[1]) + of nkObjDownConv, nkStringToCString, nkCStringToString: gen(c, n.sons[0]) + of nkVarSection, nkLetSection: genVarSection(c, n) + else: discard + +proc dfa(code: seq[Instr]) = + # We aggressively push 'undef' values for every 'use v' instruction + # until they are eliminated via a 'def v' instructions. + # If we manage to push one 'undef' to a 'use' instruction, we produce + # an error: + var undef = initIntSet() + for i in 0..<code.len: + if code[i].kind == use: undef.incl(code[i].sym.id) + + var s = newSeq[IntSet](code.len) + for i in 0..<code.len: + assign(s[i], undef) + + # In the original paper, W := {0,...,n} is done. This is wasteful, we + # have no intention to analyse a program like + # + # return 3 + # echo a + b + # + # any further than necessary. + var w = @[0] + while w.len > 0: + var pc = w.pop() + #var undefB: IntSet + #assign(undefB, undef) + + #[ + new := ![I[pc]!](s[pc]) + if I[pc] = (goto l) then + pc' := l + else + pc' := pc + 1 + if I[pc] = (if ψ goto l) and new < s[l] then + W := W + l + s[l] := new + end + end + if new < s[pc] then + s[pc'] := new + pc := pc' + else + break + end + if pc >= code.len: break + ]# + + # this simulates a single linear control flow execution: + while true: + case code[pc].kind + of use: + let s = code[pc].sym + if undefB.contains(s.id): + localError(code[pc].n.info, "variable read before initialized: " & s.name.s) + break + inc pc + of def: + let s = code[pc].sym + # exclude 'undef' for s for this path through the graph. + if not undefB.missingOrExcl(s.id): + inc pc + else: + break + #undefB.excl s.id + #inc pc + when false: + let prev = bindings.getOrDefault(s.id) + if prev != value: + # well now it has a value and we made progress, so + bindings[s.id] = value + inc pc + else: + break + of fork: + let diff = code[pc].dest + # we follow pc + 1 and remember the label for later: + w.add pc+diff + inc pc + of goto: + let diff = code[pc].dest + pc = pc + diff + if pc >= code.len: break + +proc dataflowAnalysis*(s: PSym; body: PNode) = + var c = Con(code: @[], blocks: @[]) + gen(c, body) + echoCfg(c) + dfa(c.code) diff --git a/compiler/sempass2.nim b/compiler/sempass2.nim index e1a3939fc..bf5735fc1 100644 --- a/compiler/sempass2.nim +++ b/compiler/sempass2.nim @@ -9,7 +9,7 @@ import intsets, ast, astalgo, msgs, renderer, magicsys, types, idents, trees, - wordrecg, strutils, options, guards, writetracking + wordrecg, strutils, options, guards, writetracking, dfa # Second semantic checking pass over the AST. Necessary because the old # way had some inherent problems. Performs: @@ -979,7 +979,9 @@ proc trackProc*(s: PSym, body: PNode) = message(s.info, warnLockLevel, "declared lock level is $1, but real lock level is $2" % [$s.typ.lockLevel, $t.maxLockLevel]) - if s.kind == skFunc: trackWrites(s, body) + if s.kind == skFunc: + dataflowAnalysis(s, body) + trackWrites(s, body) proc trackTopLevelStmt*(module: PSym; n: PNode) = if n.kind in {nkPragma, nkMacroDef, nkTemplateDef, nkProcDef, nkFuncDef, |