diff options
Diffstat (limited to 'compiler/dfa.nim')
-rw-r--r-- | compiler/dfa.nim | 614 |
1 files changed, 333 insertions, 281 deletions
diff --git a/compiler/dfa.nim b/compiler/dfa.nim index ca1849a3c..5534d07e7 100644 --- a/compiler/dfa.nim +++ b/compiler/dfa.nim @@ -7,269 +7,385 @@ # distribution, for details about the copyright. # -## Data flow analysis for Nim. For now the task is to prove that every -## usage of a local variable 'v' is covered by an initialization to 'v' -## first. +## Data flow analysis for Nim. ## We transform the AST into a linear list of instructions first to -## make this easier to handle: There are only 2 different branching +## make this easier to handle: There are only 3 different branching ## instructions: 'goto X' is an unconditional goto, 'fork X' ## is a conditional goto (either the next instruction or 'X' can be -## taken). Exhaustive case statements are translated +## taken), 'loop X' is the only jump that jumps back. +## +## Exhaustive case statements are translated ## so that the last branch is transformed into an 'else' branch. ## ``return`` and ``break`` are all covered by 'goto'. -## The case to detect is ``use v`` that is not dominated by -## a ``def v``. +## ## The data structures and algorithms used here are inspired by ## "A Graph–Free Approach to Data–Flow Analysis" by Markus Mohnen. ## https://link.springer.com/content/pdf/10.1007/3-540-45937-5_6.pdf -import ast, astalgo, types, intsets, tables, msgs +import ast, lineinfos, renderer, aliasanalysis +import std/private/asciitables +import std/intsets + +when defined(nimPreviewSlimSystem): + import std/assertions type InstrKind* = enum - goto, fork, def, use, - useWithinCall # this strange special case is used to get more - # move optimizations out of regular code - # XXX This is still overly pessimistic in - # call(let x = foo; bar(x)) + goto, loop, fork, def, use Instr* = object - n*: PNode case kind*: InstrKind - of def, use, useWithinCall: sym*: PSym - of goto, fork: dest*: int + of goto, fork, loop: dest*: int + of def, use: + n*: PNode # contains the def/use location. ControlFlowGraph* = seq[Instr] TPosition = distinct int - TBlock = object - label: PSym - fixups: seq[TPosition] - ValueKind = enum - undef, value, valueOrUndef + TBlock = object + case isTryBlock: bool + of false: + label: PSym + breakFixups: seq[(TPosition, seq[PNode])] # Contains the gotos for the breaks along with their pending finales + of true: + finale: PNode + raiseFixups: seq[TPosition] # Contains the gotos for the raises Con = object code: ControlFlowGraph - inCall: int + inTryStmt, interestingInstructions: int blocks: seq[TBlock] + owner: PSym + root: PSym -proc debugInfo(info: TLineInfo): string = - result = info.toFilename & ":" & $info.line - -proc codeListing(c: ControlFlowGraph, result: var string, start=0; last = -1) = +proc codeListing(c: ControlFlowGraph, start = 0; last = -1): string = # for debugging purposes # first iteration: compute all necessary labels: + result = "" var jumpTargets = initIntSet() let last = if last < 0: c.len-1 else: min(last, c.len-1) for i in start..last: - if c[i].kind in {goto, fork}: + if c[i].kind in {goto, fork, loop}: jumpTargets.incl(i+c[i].dest) var i = start while i <= last: if i in jumpTargets: result.add("L" & $i & ":\n") result.add "\t" - result.add $c[i].kind + result.add ($i & " " & $c[i].kind) result.add "\t" case c[i].kind - of def, use, useWithinCall: - result.add c[i].sym.name.s - of goto, fork: + of def, use: + result.add renderTree(c[i].n) + result.add("\t#") + result.add($c[i].n.info.line) + result.add("\n") + of goto, fork, loop: result.add "L" - result.add c[i].dest+i - result.add("\t#") - result.add(debugInfo(c[i].n.info)) - result.add("\n") + result.addInt c[i].dest+i inc i if i in jumpTargets: result.add("L" & $i & ": End\n") - -proc echoCfg*(c: ControlFlowGraph; start=0; last = -1) {.deprecated.} = +proc echoCfg*(c: ControlFlowGraph; start = 0; last = -1) {.deprecated.} = ## echos the ControlFlowGraph for debugging purposes. - var buf = "" - codeListing(c, buf, start, last) - echo buf + echo codeListing(c, start, last).alignTable -proc forkI(c: var Con; n: PNode): TPosition = +proc forkI(c: var Con): TPosition = result = TPosition(c.code.len) - c.code.add Instr(n: n, kind: fork, dest: 0) + c.code.add Instr(kind: fork, dest: 0) -proc gotoI(c: var Con; n: PNode): TPosition = +proc gotoI(c: var Con): TPosition = result = TPosition(c.code.len) - c.code.add Instr(n: n, kind: goto, dest: 0) + c.code.add Instr(kind: goto, dest: 0) -proc genLabel(c: Con): TPosition = - result = TPosition(c.code.len) +proc genLabel(c: Con): TPosition = TPosition(c.code.len) + +template checkedDistance(dist): int = + doAssert low(int) div 2 + 1 < dist and dist < high(int) div 2 + dist -proc jmpBack(c: var Con, n: PNode, p = TPosition(0)) = - let dist = p.int - c.code.len - internalAssert(-0x7fff < dist and dist < 0x7fff) - c.code.add Instr(n: n, kind: goto, dest: dist) +proc jmpBack(c: var Con, p = TPosition(0)) = + c.code.add Instr(kind: loop, dest: checkedDistance(p.int - c.code.len)) proc patch(c: var Con, p: TPosition) = # patch with current index - let p = p.int - let diff = c.code.len - p - internalAssert(-0x7fff < diff and diff < 0x7fff) - c.code[p].dest = diff + c.code[p.int].dest = checkedDistance(c.code.len - p.int) + +proc gen(c: var Con; n: PNode) proc popBlock(c: var Con; oldLen: int) = - for f in c.blocks[oldLen].fixups: - c.patch(f) + var exits: seq[TPosition] = @[] + exits.add c.gotoI() + for f in c.blocks[oldLen].breakFixups: + c.patch(f[0]) + for finale in f[1]: + c.gen(finale) + exits.add c.gotoI() + for e in exits: + c.patch e c.blocks.setLen(oldLen) -template withBlock(labl: PSym; body: untyped) {.dirty.} = - var oldLen {.gensym.} = c.blocks.len - c.blocks.add TBlock(label: labl, fixups: @[]) +template withBlock(labl: PSym; body: untyped) = + let oldLen = c.blocks.len + c.blocks.add TBlock(isTryBlock: false, label: labl) body popBlock(c, oldLen) -proc isTrue(n: PNode): bool = - n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or - n.kind == nkIntLit and n.intVal != 0 - -proc gen(c: var Con; n: PNode) # {.noSideEffect.} +template forkT(body) = + let lab1 = c.forkI() + body + c.patch(lab1) proc genWhile(c: var Con; n: PNode) = - # L1: + # lab1: # cond, tmp - # fjmp tmp, L2 + # fork tmp, lab2 # body - # jmp L1 - # L2: - let L1 = c.genLabel + # jmp lab1 + # lab2: + let lab1 = c.genLabel withBlock(nil): - if isTrue(n.sons[0]): - c.gen(n.sons[1]) - c.jmpBack(n, L1) + if isTrue(n[0]): + c.gen(n[1]) + c.jmpBack(lab1) else: - c.gen(n.sons[0]) - let L2 = c.forkI(n) - c.gen(n.sons[1]) - c.jmpBack(n, L1) - c.patch(L2) - -proc genBlock(c: var Con; n: PNode) = - withBlock(n.sons[0].sym): - c.gen(n.sons[1]) - -proc genBreak(c: var Con; n: PNode) = - let L1 = c.gotoI(n) - if n.sons[0].kind == nkSym: - #echo cast[int](n.sons[0].sym) - for i in countdown(c.blocks.len-1, 0): - if c.blocks[i].label == n.sons[0].sym: - c.blocks[i].fixups.add L1 - return - globalError(n.info, errGenerated, "VM problem: cannot find 'break' target") - else: - c.blocks[c.blocks.high].fixups.add L1 + c.gen(n[0]) + forkT: + c.gen(n[1]) + c.jmpBack(lab1) proc genIf(c: var Con, n: PNode) = + #[ + + if cond: + A + elif condB: + B + elif condC: + C + else: + D + + cond + fork lab1 + A + goto Lend + lab1: + condB + fork lab2 + B + goto Lend2 + lab2: + condC + fork L3 + C + goto Lend3 + L3: + D + ]# var endings: seq[TPosition] = @[] - for i in countup(0, len(n) - 1): - var it = n.sons[i] + let oldInteresting = c.interestingInstructions + let oldLen = c.code.len + + for i in 0..<n.len: + let it = n[i] + c.gen(it[0]) if it.len == 2: - c.gen(it.sons[0].sons[1]) - var elsePos = c.forkI(it.sons[0].sons[1]) - c.gen(it.sons[1]) - if i < sonsLen(n)-1: - endings.add(c.gotoI(it.sons[1])) - c.patch(elsePos) - else: - c.gen(it.sons[0]) - for endPos in endings: c.patch(endPos) + forkT: + c.gen(it.lastSon) + endings.add c.gotoI() + + if oldInteresting == c.interestingInstructions: + setLen c.code, oldLen + else: + for i in countdown(endings.high, 0): + c.patch(endings[i]) proc genAndOr(c: var Con; n: PNode) = # asgn dest, a - # fork L1 + # fork lab1 # asgn dest, b - # L1: - c.gen(n.sons[1]) - let L1 = c.forkI(n) - c.gen(n.sons[2]) - c.patch(L1) + # lab1: + c.gen(n[1]) + forkT: + c.gen(n[2]) proc genCase(c: var Con; n: PNode) = - # if (!expr1) goto L1; + # if (!expr1) goto lab1; # thenPart # goto LEnd - # L1: - # if (!expr2) goto L2; + # lab1: + # if (!expr2) goto lab2; # thenPart2 # goto LEnd - # L2: + # lab2: # elsePart # Lend: + let isExhaustive = skipTypes(n[0].typ, + abstractVarRange-{tyTypeDesc}).kind notin {tyFloat..tyFloat128, tyString, tyCstring} + var endings: seq[TPosition] = @[] - c.gen(n.sons[0]) - for i in 1 .. <n.len: - let it = n.sons[i] - if it.len == 1: - c.gen(it.sons[0]) - else: - let elsePos = c.forkI(it.lastSon) + c.gen(n[0]) + let oldInteresting = c.interestingInstructions + let oldLen = c.code.len + for i in 1..<n.len: + let it = n[i] + if it.len == 1 or (i == n.len-1 and isExhaustive): + # treat the last branch as 'else' if this is an exhaustive case statement. c.gen(it.lastSon) - if i < sonsLen(n)-1: - endings.add(c.gotoI(it.lastSon)) - c.patch(elsePos) - for endPos in endings: c.patch(endPos) + else: + forkT: + c.gen(it.lastSon) + endings.add c.gotoI() + + if oldInteresting == c.interestingInstructions: + setLen c.code, oldLen + else: + for i in countdown(endings.high, 0): + c.patch(endings[i]) + +proc genBlock(c: var Con; n: PNode) = + withBlock(n[0].sym): + c.gen(n[1]) + +proc genBreakOrRaiseAux(c: var Con, i: int, n: PNode) = + let lab1 = c.gotoI() + if c.blocks[i].isTryBlock: + c.blocks[i].raiseFixups.add lab1 + else: + var trailingFinales: seq[PNode] = @[] + if c.inTryStmt > 0: + # Ok, we are in a try, lets see which (if any) try's we break out from: + for b in countdown(c.blocks.high, i): + if c.blocks[b].isTryBlock: + trailingFinales.add c.blocks[b].finale + + c.blocks[i].breakFixups.add (lab1, trailingFinales) + +proc genBreak(c: var Con; n: PNode) = + inc c.interestingInstructions + if n[0].kind == nkSym: + for i in countdown(c.blocks.high, 0): + if not c.blocks[i].isTryBlock and c.blocks[i].label == n[0].sym: + genBreakOrRaiseAux(c, i, n) + return + #globalError(n.info, "VM problem: cannot find 'break' target") + else: + for i in countdown(c.blocks.high, 0): + if not c.blocks[i].isTryBlock: + genBreakOrRaiseAux(c, i, n) + return proc genTry(c: var Con; n: PNode) = var endings: seq[TPosition] = @[] - let elsePos = c.forkI(n) - c.gen(n.sons[0]) - c.patch(elsePos) - for i in 1 .. <n.len: - let it = n.sons[i] + + let oldLen = c.blocks.len + c.blocks.add TBlock(isTryBlock: true, finale: if n[^1].kind == nkFinally: n[^1] else: newNode(nkEmpty)) + + inc c.inTryStmt + c.gen(n[0]) + dec c.inTryStmt + + for f in c.blocks[oldLen].raiseFixups: + c.patch(f) + + c.blocks.setLen oldLen + + for i in 1..<n.len: + let it = n[i] if it.kind != nkFinally: - var blen = len(it) - let endExcept = c.forkI(it) - c.gen(it.lastSon) - if i < sonsLen(n)-1: - endings.add(c.gotoI(it)) - c.patch(endExcept) - for endPos in endings: c.patch(endPos) + forkT: + c.gen(it.lastSon) + endings.add c.gotoI() + for i in countdown(endings.high, 0): + c.patch(endings[i]) + let fin = lastSon(n) if fin.kind == nkFinally: - c.gen(fin.sons[0]) + c.gen(fin[0]) + +template genNoReturn(c: var Con) = + # leave the graph + c.code.add Instr(kind: goto, dest: high(int) - c.code.len) proc genRaise(c: var Con; n: PNode) = - gen(c, n.sons[0]) - c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len) + inc c.interestingInstructions + gen(c, n[0]) + if c.inTryStmt > 0: + for i in countdown(c.blocks.high, 0): + if c.blocks[i].isTryBlock: + genBreakOrRaiseAux(c, i, n) + return + assert false # Unreachable + else: + genNoReturn(c) + +proc genImplicitReturn(c: var Con) = + if c.owner.kind in {skProc, skFunc, skMethod, skIterator, skConverter} and resultPos < c.owner.ast.len: + gen(c, c.owner.ast[resultPos]) proc genReturn(c: var Con; n: PNode) = - if n.sons[0].kind != nkEmpty: gen(c, n.sons[0]) - c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len) + inc c.interestingInstructions + if n[0].kind != nkEmpty: + gen(c, n[0]) + else: + genImplicitReturn(c) + genBreakOrRaiseAux(c, 0, n) const - InterestingSyms = {skVar, skResult, skLet} - -proc genUse(c: var Con; n: PNode) = - var n = n - while n.kind in {nkDotExpr, nkCheckedFieldExpr, - nkBracketExpr, nkDerefExpr, nkHiddenDeref, - nkAddr, nkHiddenAddr}: - n = n[0] - if n.kind == nkSym and n.sym.kind in InterestingSyms: - if c.inCall > 0: - c.code.add Instr(n: n, kind: useWithinCall, sym: n.sym) - else: - c.code.add Instr(n: n, kind: use, sym: n.sym) + InterestingSyms = {skVar, skResult, skLet, skParam, skForVar, skTemp} + +proc skipTrivials(c: var Con, n: PNode): PNode = + result = n + while true: + case result.kind + of PathKinds0 - {nkBracketExpr}: + result = result[0] + of nkBracketExpr: + gen(c, result[1]) + result = result[0] + of PathKinds1: + result = result[1] + else: break + +proc genUse(c: var Con; orig: PNode) = + let n = c.skipTrivials(orig) + + if n.kind == nkSym: + if n.sym.kind in InterestingSyms and n.sym == c.root: + c.code.add Instr(kind: use, n: orig) + inc c.interestingInstructions + else: + gen(c, n) + +proc genDef(c: var Con; orig: PNode) = + let n = c.skipTrivials(orig) -proc genDef(c: var Con; n: PNode) = if n.kind == nkSym and n.sym.kind in InterestingSyms: - c.code.add Instr(n: n, kind: def, sym: n.sym) + if n.sym == c.root: + c.code.add Instr(kind: def, n: orig) + inc c.interestingInstructions proc genCall(c: var Con; n: PNode) = gen(c, n[0]) var t = n[0].typ if t != nil: t = t.skipTypes(abstractInst) - inc c.inCall for i in 1..<n.len: gen(c, n[i]) - if t != nil and i < t.len and t.sons[i].kind == tyVar: + if t != nil and i < t.signatureLen and isOutParam(t[i]): + # Pass by 'out' is a 'must def'. Good enough for a move optimizer. genDef(c, n[i]) - dec c.inCall + # every call can potentially raise: + if c.inTryStmt > 0 and canRaiseConservative(n[0]): + inc c.interestingInstructions + # we generate the instruction sequence: + # fork lab1 + # goto exceptionHandler (except or finally) + # lab1: + forkT: + for i in countdown(c.blocks.high, 0): + if c.blocks[i].isTryBlock: + genBreakOrRaiseAux(c, i, n) + break proc genMagic(c: var Con; n: PNode; m: TMagic) = case m @@ -277,163 +393,99 @@ proc genMagic(c: var Con; n: PNode; m: TMagic) = of mNew, mNewFinalize: genDef(c, n[1]) for i in 2..<n.len: gen(c, n[i]) - of mExit: - genCall(c, n) - c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len) else: genCall(c, n) proc genVarSection(c: var Con; n: PNode) = for a in n: - if a.kind == nkCommentStmt: continue - if a.kind == nkVarTuple: + if a.kind == nkCommentStmt: + discard + elif a.kind == nkVarTuple: gen(c, a.lastSon) - for i in 0 .. a.len-3: genDef(c, a[i]) + for i in 0..<a.len-2: genDef(c, a[i]) else: gen(c, a.lastSon) if a.lastSon.kind != nkEmpty: - genDef(c, a.sons[0]) + genDef(c, a[0]) proc gen(c: var Con; n: PNode) = case n.kind of nkSym: genUse(c, n) of nkCallKinds: - if n.sons[0].kind == nkSym: - let s = n.sons[0].sym + if n[0].kind == nkSym: + let s = n[0].sym if s.magic != mNone: genMagic(c, n, s.magic) else: genCall(c, n) + if sfNoReturn in n[0].sym.flags: + genNoReturn(c) else: genCall(c, n) of nkCharLit..nkNilLit: discard - of nkAsgn, nkFastAsgn: + of nkAsgn, nkFastAsgn, nkSinkAsgn: gen(c, n[1]) + + if n[0].kind in PathKinds0: + let a = c.skipTrivials(n[0]) + if a.kind in nkCallKinds: + gen(c, a) + + # watch out: 'obj[i].f2 = value' sets 'f2' but + # "uses" 'i'. But we are only talking about builtin array indexing so + # it doesn't matter and 'x = 34' is NOT a usage of 'x'. genDef(c, n[0]) - of nkDotExpr, nkCheckedFieldExpr, nkBracketExpr, - nkDerefExpr, nkHiddenDeref, nkAddr, nkHiddenAddr: - gen(c, n[0]) + of PathKinds0 - {nkObjDownConv, nkObjUpConv}: + genUse(c, n) of nkIfStmt, nkIfExpr: genIf(c, n) of nkWhenStmt: # This is "when nimvm" node. Chose the first branch. - gen(c, n.sons[0].sons[1]) + gen(c, n[0][1]) of nkCaseStmt: genCase(c, n) of nkWhileStmt: genWhile(c, n) of nkBlockExpr, nkBlockStmt: genBlock(c, n) of nkReturnStmt: genReturn(c, n) of nkRaiseStmt: genRaise(c, n) of nkBreakStmt: genBreak(c, n) - of nkTryStmt: genTry(c, n) + of nkTryStmt, nkHiddenTryStmt: genTry(c, n) of nkStmtList, nkStmtListExpr, nkChckRangeF, nkChckRange64, nkChckRange, - nkBracket, nkCurly, nkPar, nkClosure, nkObjConstr: + nkBracket, nkCurly, nkPar, nkTupleConstr, nkClosure, nkObjConstr, nkYieldStmt: for x in n: gen(c, x) of nkPragmaBlock: gen(c, n.lastSon) - of nkDiscardStmt: gen(c, n.sons[0]) - of nkHiddenStdConv, nkHiddenSubConv, nkConv, nkExprColonExpr, nkExprEqExpr, - nkCast: - gen(c, n.sons[1]) - of nkObjDownConv, nkStringToCString, nkCStringToString: gen(c, n.sons[0]) + of nkDiscardStmt, nkObjDownConv, nkObjUpConv, nkStringToCString, nkCStringToString: + gen(c, n[0]) + of nkConv, nkExprColonExpr, nkExprEqExpr, nkCast, PathKinds1: + gen(c, n[1]) of nkVarSection, nkLetSection: genVarSection(c, n) + of nkDefer: raiseAssert "dfa construction pass requires the elimination of 'defer'" else: discard -proc dfa(code: seq[Instr]) = - # We aggressively push 'undef' values for every 'use v' instruction - # until they are eliminated via a 'def v' instructions. - # If we manage to push one 'undef' to a 'use' instruction, we produce - # an error: - var undef = initIntSet() - for i in 0..<code.len: - if code[i].kind == use: undef.incl(code[i].sym.id) - - var s = newSeq[IntSet](code.len) - for i in 0..<code.len: - assign(s[i], undef) - - # In the original paper, W := {0,...,n} is done. This is wasteful, we - # have no intention to analyse a program like - # - # return 3 - # echo a + b - # - # any further than necessary. - var w = @[0] - while w.len > 0: - var pc = w[^1] - # this simulates a single linear control flow execution: - while true: - # according to the paper, it is better to shrink the working set here - # in this inner loop: - let widx = w.find(pc) - if widx >= 0: w.del(widx) - # our interpretation ![I!]: - var sid = -1 - case code[pc].kind - of goto, fork: discard - of use, useWithinCall: - let sym = code[pc].sym - if s[pc].contains(sym.id): - localError(code[pc].n.info, "variable read before initialized: " & sym.name.s) - of def: - sid = code[pc].sym.id - - var pc2: int - if code[pc].kind == goto: - pc2 = pc + code[pc].dest - else: - pc2 = pc + 1 - if code[pc].kind == fork: - let l = pc + code[pc].dest - if sid >= 0 and s[l].missingOrExcl(sid): - w.add l - - if sid >= 0 and s[pc2].missingOrExcl(sid): - pc = pc2 - else: - break - if pc >= code.len: break - - when false: - case code[pc].kind - of use: - let s = code[pc].sym - if undefB.contains(s.id): - localError(code[pc].n.info, "variable read before initialized: " & s.name.s) - break - inc pc - of def: - let s = code[pc].sym - # exclude 'undef' for s for this path through the graph. - if not undefB.missingOrExcl(s.id): - inc pc - else: - break - #undefB.excl s.id - #inc pc - when false: - let prev = bindings.getOrDefault(s.id) - if prev != value: - # well now it has a value and we made progress, so - bindings[s.id] = value - inc pc - else: - break - of fork: - let diff = code[pc].dest - # we follow pc + 1 and remember the label for later: - w.add pc+diff - inc pc - of goto: - let diff = code[pc].dest - pc = pc + diff - if pc >= code.len: break - -proc dataflowAnalysis*(s: PSym; body: PNode) = - var c = Con(code: @[], blocks: @[]) - gen(c, body) - #echoCfg(c.code) - dfa(c.code) - -proc constructCfg*(s: PSym; body: PNode): ControlFlowGraph = +when false: + proc optimizeJumps(c: var ControlFlowGraph) = + for i in 0..<c.len: + case c[i].kind + of goto, fork: + var pc = i + c[i].dest + if pc < c.len and c[pc].kind == goto: + while pc < c.len and c[pc].kind == goto: + let newPc = pc + c[pc].dest + if newPc > pc: + pc = newPc + else: + break + c[i].dest = pc - i + of loop, def, use: discard + +proc constructCfg*(s: PSym; body: PNode; root: PSym): ControlFlowGraph = ## constructs a control flow graph for ``body``. - var c = Con(code: @[], blocks: @[]) - shallowCopy(result, c.code) + var c = Con(code: @[], blocks: @[], owner: s, root: root) + withBlock(s): + gen(c, body) + if root.kind == skResult: + genImplicitReturn(c) + when defined(gcArc) or defined(gcOrc) or defined(gcAtomicArc): + result = c.code # will move + else: + shallowCopy(result, c.code) + when false: + optimizeJumps result |