From b9511a2d7f6b2037ed461abddfc85b3afd5108d9 Mon Sep 17 00:00:00 2001 From: Andreas Rumpf Date: Sat, 7 Oct 2017 09:35:45 +0200 Subject: work in progress: a dataflow architecture for Nim --- compiler/dfa.nim | 412 ++++++++++++++++++++++++++++++++++++++++++++++++++ compiler/sempass2.nim | 6 +- 2 files changed, 416 insertions(+), 2 deletions(-) create mode 100644 compiler/dfa.nim (limited to 'compiler') diff --git a/compiler/dfa.nim b/compiler/dfa.nim new file mode 100644 index 000000000..b9ca81065 --- /dev/null +++ b/compiler/dfa.nim @@ -0,0 +1,412 @@ +# +# +# The Nim Compiler +# (c) Copyright 2017 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Data flow analysis for Nim. For now the task is to prove that every +## usage of a local variable 'v' is covered by an initialization to 'v' +## first. +## We transform the AST into a linear list of instructions first to +## make this easier to handle: There are only 2 different branching +## instructions: 'goto X' is an unconditional goto, 'fork X' +## is a conditional goto (either the next instruction or 'X' can be +## taken). Exhaustive case statements are translated +## so that the last branch is transformed into an 'else' branch. +## ``return`` and ``break`` are all covered by 'goto'. +## The case to detect is ``use v`` that is not dominated by +## a ``def v``. +## The data structures and algorithms used here are inspired by +## "A Graph–Free Approach to Data–Flow Analysis" by Markus Mohnen. +## https://link.springer.com/content/pdf/10.1007/3-540-45937-5_6.pdf + +import ast, astalgo, types, intsets, tables, msgs + +type + InstrKind = enum + goto, fork, def, use + Instr = object + n: PNode + case kind: InstrKind + of def, use: sym: PSym + of goto, fork: dest: int + + TPosition = distinct int + TBlock = object + label: PSym + fixups: seq[TPosition] + + ValueKind = enum + undef, value, valueOrUndef + + Con = object + code: seq[Instr] + blocks: seq[TBlock] + +proc debugInfo(info: TLineInfo): string = + result = info.toFilename & ":" & $info.line + +proc codeListing(c: Con, result: var string, start=0; last = -1) = + # for debugging purposes + # first iteration: compute all necessary labels: + var jumpTargets = initIntSet() + let last = if last < 0: c.code.len-1 else: min(last, c.code.len-1) + for i in start..last: + if c.code[i].kind in {goto, fork}: + jumpTargets.incl(i+c.code[i].dest) + var i = start + while i <= last: + if i in jumpTargets: result.add("L" & $i & ":\n") + result.add "\t" + result.add $c.code[i].kind + result.add "\t" + case c.code[i].kind + of def, use: + result.add c.code[i].sym.name.s + of goto, fork: + result.add "L" + result.add c.code[i].dest+i + result.add("\t#") + result.add(debugInfo(c.code[i].n.info)) + result.add("\n") + inc i + if i in jumpTargets: result.add("L" & $i & ": End\n") + + +proc echoCfg*(c: Con; start=0; last = -1) {.deprecated.} = + var buf = "" + codeListing(c, buf, start, last) + echo buf + +proc forkI(c: var Con; n: PNode): TPosition = + result = TPosition(c.code.len) + c.code.add Instr(n: n, kind: fork, dest: 0) + +proc gotoI(c: var Con; n: PNode): TPosition = + result = TPosition(c.code.len) + c.code.add Instr(n: n, kind: goto, dest: 0) + +proc genLabel(c: Con): TPosition = + result = TPosition(c.code.len) + +proc jmpBack(c: var Con, n: PNode, p = TPosition(0)) = + let dist = p.int - c.code.len + internalAssert(-0x7fff < dist and dist < 0x7fff) + c.code.add Instr(n: n, kind: goto, dest: dist) + +proc patch(c: var Con, p: TPosition) = + # patch with current index + let p = p.int + let diff = c.code.len - p + internalAssert(-0x7fff < diff and diff < 0x7fff) + c.code[p].dest = diff + +proc popBlock(c: var Con; oldLen: int) = + for f in c.blocks[oldLen].fixups: + c.patch(f) + c.blocks.setLen(oldLen) + +template withBlock(labl: PSym; body: untyped) {.dirty.} = + var oldLen {.gensym.} = c.blocks.len + c.blocks.add TBlock(label: labl, fixups: @[]) + body + popBlock(c, oldLen) + +proc isTrue(n: PNode): bool = + n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or + n.kind == nkIntLit and n.intVal != 0 + +proc gen(c: var Con; n: PNode) # {.noSideEffect.} + +proc genWhile(c: var Con; n: PNode) = + # L1: + # cond, tmp + # fjmp tmp, L2 + # body + # jmp L1 + # L2: + let L1 = c.genLabel + withBlock(nil): + if isTrue(n.sons[0]): + c.gen(n.sons[1]) + c.jmpBack(n, L1) + else: + c.gen(n.sons[0]) + let L2 = c.forkI(n) + c.gen(n.sons[1]) + c.jmpBack(n, L1) + c.patch(L2) + +proc genBlock(c: var Con; n: PNode) = + withBlock(n.sons[0].sym): + c.gen(n.sons[1]) + +proc genBreak(c: var Con; n: PNode) = + let L1 = c.gotoI(n) + if n.sons[0].kind == nkSym: + #echo cast[int](n.sons[0].sym) + for i in countdown(c.blocks.len-1, 0): + if c.blocks[i].label == n.sons[0].sym: + c.blocks[i].fixups.add L1 + return + globalError(n.info, errGenerated, "VM problem: cannot find 'break' target") + else: + c.blocks[c.blocks.high].fixups.add L1 + +proc genIf(c: var Con, n: PNode) = + var endings: seq[TPosition] = @[] + for i in countup(0, len(n) - 1): + var it = n.sons[i] + if it.len == 2: + c.gen(it.sons[0].sons[1]) + var elsePos = c.forkI(it.sons[0].sons[1]) + c.gen(it.sons[1]) + if i < sonsLen(n)-1: + endings.add(c.gotoI(it.sons[1])) + c.patch(elsePos) + else: + c.gen(it.sons[0]) + for endPos in endings: c.patch(endPos) + +proc genAndOr(c: var Con; n: PNode) = + # asgn dest, a + # fork L1 + # asgn dest, b + # L1: + c.gen(n.sons[1]) + let L1 = c.forkI(n) + c.gen(n.sons[2]) + c.patch(L1) + +proc genCase(c: var Con; n: PNode) = + # if (!expr1) goto L1; + # thenPart + # goto LEnd + # L1: + # if (!expr2) goto L2; + # thenPart2 + # goto LEnd + # L2: + # elsePart + # Lend: + var endings: seq[TPosition] = @[] + c.gen(n.sons[0]) + for i in 1 .. 0: + var pc = w.pop() + #var undefB: IntSet + #assign(undefB, undef) + + #[ + new := ![I[pc]!](s[pc]) + if I[pc] = (goto l) then + pc' := l + else + pc' := pc + 1 + if I[pc] = (if ψ goto l) and new < s[l] then + W := W + l + s[l] := new + end + end + if new < s[pc] then + s[pc'] := new + pc := pc' + else + break + end + if pc >= code.len: break + ]# + + # this simulates a single linear control flow execution: + while true: + case code[pc].kind + of use: + let s = code[pc].sym + if undefB.contains(s.id): + localError(code[pc].n.info, "variable read before initialized: " & s.name.s) + break + inc pc + of def: + let s = code[pc].sym + # exclude 'undef' for s for this path through the graph. + if not undefB.missingOrExcl(s.id): + inc pc + else: + break + #undefB.excl s.id + #inc pc + when false: + let prev = bindings.getOrDefault(s.id) + if prev != value: + # well now it has a value and we made progress, so + bindings[s.id] = value + inc pc + else: + break + of fork: + let diff = code[pc].dest + # we follow pc + 1 and remember the label for later: + w.add pc+diff + inc pc + of goto: + let diff = code[pc].dest + pc = pc + diff + if pc >= code.len: break + +proc dataflowAnalysis*(s: PSym; body: PNode) = + var c = Con(code: @[], blocks: @[]) + gen(c, body) + echoCfg(c) + dfa(c.code) diff --git a/compiler/sempass2.nim b/compiler/sempass2.nim index e1a3939fc..bf5735fc1 100644 --- a/compiler/sempass2.nim +++ b/compiler/sempass2.nim @@ -9,7 +9,7 @@ import intsets, ast, astalgo, msgs, renderer, magicsys, types, idents, trees, - wordrecg, strutils, options, guards, writetracking + wordrecg, strutils, options, guards, writetracking, dfa # Second semantic checking pass over the AST. Necessary because the old # way had some inherent problems. Performs: @@ -979,7 +979,9 @@ proc trackProc*(s: PSym, body: PNode) = message(s.info, warnLockLevel, "declared lock level is $1, but real lock level is $2" % [$s.typ.lockLevel, $t.maxLockLevel]) - if s.kind == skFunc: trackWrites(s, body) + if s.kind == skFunc: + dataflowAnalysis(s, body) + trackWrites(s, body) proc trackTopLevelStmt*(module: PSym; n: PNode) = if n.kind in {nkPragma, nkMacroDef, nkTemplateDef, nkProcDef, nkFuncDef, -- cgit 1.4.1-2-gfad0