diff options
author | Andreas Rumpf <rumpf_a@web.de> | 2019-01-21 14:35:49 +0100 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2019-01-23 11:08:51 +0100 |
commit | 11022fea1b530aceced4c3f24d9295898d2eaf44 (patch) | |
tree | 08ddea8242a6d414b884f94a9be03fc20c907c9a | |
parent | 2fb8b1d1323ce2b683933e0a7d59044bf26caf99 (diff) | |
download | Nim-11022fea1b530aceced4c3f24d9295898d2eaf44.tar.gz |
control flow graphs: introduce 'join' points for easy analyses based on abstract interpretation
-rw-r--r-- | compiler/destroyer.nim | 4 | ||||
-rw-r--r-- | compiler/dfa.nim | 543 | ||||
-rw-r--r-- | compiler/sempass2.nim | 2 |
3 files changed, 395 insertions, 154 deletions
diff --git a/compiler/destroyer.nim b/compiler/destroyer.nim index 03ce9a5cf..ca2aa8558 100644 --- a/compiler/destroyer.nim +++ b/compiler/destroyer.nim @@ -184,7 +184,7 @@ proc isHarmlessVar*(s: PSym; c: Con): bool = inc usages #of useWithinCall: # if c.g[i].sym == s: return false - of goto, fork: + of goto, fork, InstrKind.join: discard "we do not perform an abstract interpretation yet" result = usages <= 1 @@ -246,6 +246,8 @@ proc isLastRead(n: PNode; c: var Con): bool = if not takenForks.containsOrIncl(pc): pcs.add pc + c.g[pc].dest inc pc + of InstrKind.join: + inc pc #echo c.graph.config $ n.info, " last read here!" return true diff --git a/compiler/dfa.nim b/compiler/dfa.nim index df9584576..f34981000 100644 --- a/compiler/dfa.nim +++ b/compiler/dfa.nim @@ -34,12 +34,12 @@ import ast, astalgo, types, intsets, tables, msgs, options, lineinfos type InstrKind* = enum - goto, fork, def, use + goto, fork, join, def, use Instr* = object n*: PNode case kind*: InstrKind of def, use: sym*: PSym - of goto, fork: dest*: int + of goto, fork, join: dest*: int ControlFlowGraph* = seq[Instr] @@ -56,6 +56,7 @@ type inCall, inTryStmt: int blocks: seq[TBlock] tryStmtFixups: seq[TPosition] + forks: seq[TPosition] owner: PSym proc debugInfo(info: TLineInfo): string = @@ -67,7 +68,7 @@ proc codeListing(c: ControlFlowGraph, result: var string, start=0; last = -1) = var jumpTargets = initIntSet() let last = if last < 0: c.len-1 else: min(last, c.len-1) for i in start..last: - if c[i].kind in {goto, fork}: + if c[i].kind in {goto, fork, join}: jumpTargets.incl(i+c[i].dest) var i = start while i <= last: @@ -78,7 +79,7 @@ proc codeListing(c: ControlFlowGraph, result: var string, start=0; last = -1) = case c[i].kind of def, use: result.add c[i].sym.name.s - of goto, fork: + of goto, fork, join: result.add "L" result.add c[i].dest+i result.add("\t#") @@ -98,11 +99,166 @@ proc echoCfg*(c: ControlFlowGraph; start=0; last = -1) {.deprecated.} = proc forkI(c: var Con; n: PNode): TPosition = result = TPosition(c.code.len) c.code.add Instr(n: n, kind: fork, dest: 0) + c.forks.add result proc gotoI(c: var Con; n: PNode): TPosition = result = TPosition(c.code.len) c.code.add Instr(n: n, kind: goto, dest: 0) +#[ + +Design of join +============== + +block: + if cond: break + def(x) + +use(x) + +Generates: + +L0: fork L1 + join L0 # patched. + goto Louter +L1: + def x + join L0 +Louter: + use x + + +block outer: + while a: + while b: + if foo: + if bar: + break outer # --> we need to 'join' every pushed 'fork' here + + +This works and then our abstract interpretation needs to deal with 'fork' +differently. It really causes a split in execution. Two threads are +"spawned" and both need to reach the 'join L' instruction. Afterwards +the abstract interpretations are joined and execution resumes single +threaded. + + +Abstract Interpretation +----------------------- + +proc interpret(pc, state, comesFrom): state = + result = state + # we need an explicit 'create' instruction (an explicit heap), in order + # to deal with 'var x = create(); var y = x; var z = y; destroy(z)' + while true: + case pc + of fork: + let a = interpret(pc+1, result, pc) + let b = interpret(forkTarget, result, pc) + result = a ++ b # ++ is a union operation + inc pc + of join: + if joinTarget == comesFrom: return result + else: inc pc + of use X: + if not result.contains(x): + error "variable not initialized " & x + inc pc + of def X: + if not result.contains(x): + result.incl X + else: + error "overwrite of variable causes memory leak " & x + inc pc + of destroy X: + result.excl X + +This is correct but still can lead to false positives: + +proc p(cond: bool) = + if cond: + new(x) + otherThings() + if cond: + destroy x + +Is not a leak. We should find a way to model *data* flow, not just +control flow. One solution is to rewrite the 'if' without a fork +instruction. The unstructured aspect can now be easily dealt with +the 'goto' and 'join' instructions. + +proc p(cond: bool) = + L0: fork Lend + new(x) + # do not 'join' here! + + Lend: + otherThings() + join L0 # SKIP THIS FOR new(x) SOMEHOW + destroy x + join L0 # but here. + + + +But if we follow 'goto Louter' we will never come to the join point. +We restore the bindings after popping pc from the stack then there +"no" problem?! + + +while cond: + prelude() + if not condB: break + postlude() + +---> +var setFlag = true +while cond and not setFlag: + prelude() + if not condB: + setFlag = true # BUT: Dependency + if not setFlag: # HERE + postlude() + +---> +var setFlag = true +while cond and not setFlag: + prelude() + if not condB: + postlude() + setFlag = true + + +------------------------------------------------- + +while cond: + prelude() + if more: + if not condB: break + stuffHere() + postlude() + +--> +var setFlag = true +while cond and not setFlag: + prelude() + if more: + if not condB: + setFlag = false + else: + stuffHere() + postlude() + else: + postlude() + +This is getting complicated. Instead we keep the whole 'join' idea but +duplicate the 'join' instructions on breaks and return exits! + +]# + +proc joinI(c: var Con; fromFork: TPosition; n: PNode) = + let dist = fromFork.int - c.code.len + c.code.add Instr(n: n, kind: join, dest: dist) + proc genLabel(c: Con): TPosition = result = TPosition(c.code.len) @@ -135,30 +291,97 @@ proc isTrue(n: PNode): bool = proc gen(c: var Con; n: PNode) # {.noSideEffect.} -proc genWhile(c: var Con; n: PNode) = - # L1: - # cond, tmp - # fork tmp, L2 - # body - # jmp L1 - # L2: - let L1 = c.genLabel - withBlock(nil): +when true: + proc genWhile(c: var Con; n: PNode) = + # We unroll every loop 3 times. We emulate 0, 1, 2 iterations + # through the loop. We need to prove this is correct for our + # purposes. But Herb Sutter claims it is. (Proof by authority.) + #[ + while cond: + body + + Becomes: + + if cond: + body + if cond: + body + if cond: + body + + We still need to ensure 'break' resolves properly, so an AST to AST + translation is impossible. + + So the code to generate is: + + cond + fork L4 # F1 + body + cond + fork L5 # F2 + body + cond + fork L6 # F3 + body + L6: + join F3 + L5: + join F2 + L4: + join F1 + ]# if isTrue(n.sons[0]): - c.gen(n.sons[1]) - c.jmpBack(n, L1) + # 'while true' is an idiom in Nim and so we produce + # better code for it: + for i in 0..2: + withBlock(nil): + c.gen(n.sons[1]) else: - c.gen(n.sons[0]) - let L2 = c.forkI(n) - c.gen(n.sons[1]) - c.jmpBack(n, L1) - c.patch(L2) + let oldForksLen = c.forks.len + var endings: array[3, TPosition] + for i in 0..2: + withBlock(nil): + c.gen(n.sons[0]) + endings[i] = c.forkI(n) + c.gen(n.sons[1]) + for i in countdown(endings.high, 0): + let endPos = endings[i] + c.patch(endPos) + c.joinI(c.forks.pop(), n) + doAssert(c.forks.len == oldForksLen) + +else: + + proc genWhile(c: var Con; n: PNode) = + # L1: + # cond, tmp + # fork tmp, L2 + # body + # jmp L1 + # L2: + let oldForksLen = c.forks.len + let L1 = c.genLabel + withBlock(nil): + if isTrue(n.sons[0]): + c.gen(n.sons[1]) + c.jmpBack(n, L1) + else: + c.gen(n.sons[0]) + let L2 = c.forkI(n) + c.gen(n.sons[1]) + c.jmpBack(n, L1) + c.patch(L2) + setLen(c.forks, oldForksLen) proc genBlock(c: var Con; n: PNode) = withBlock(n.sons[0].sym): c.gen(n.sons[1]) +proc genJoins(c: var Con; n: PNode) = + for i in countdown(c.forks.high, 0): joinI(c, c.forks[i], n) + proc genBreak(c: var Con; n: PNode) = + genJoins(c, n) let L1 = c.gotoI(n) if n.sons[0].kind == nkSym: #echo cast[int](n.sons[0].sym) @@ -170,28 +393,76 @@ proc genBreak(c: var Con; n: PNode) = else: c.blocks[c.blocks.high].fixups.add L1 +template forkT(n, body) = + let oldLen = c.forks.len + let L1 = c.forkI(n) + body + c.patch(L1) + c.joinI(L1, n) + setLen(c.forks, oldLen) + proc genIf(c: var Con, n: PNode) = + #[ + + if cond: + A + elif condB: + B + elif condC: + C + else: + D + + cond + fork L1 + A + goto Lend + L1: + condB + fork L2 + B + goto Lend2 + L2: + condC + fork L3 + C + goto Lend3 + L3: + D + goto Lend3 # not eliminated to simplify the join generation + Lend3: + join F3 + Lend2: + join F2 + Lend: + join F1 + + ]# + let oldLen = c.forks.len var endings: seq[TPosition] = @[] for i in countup(0, len(n) - 1): var it = n.sons[i] c.gen(it.sons[0]) if it.len == 2: - let elsePos = c.forkI(it.sons[1]) + let elsePos = forkI(c, it[1]) c.gen(it.sons[1]) - if i < sonsLen(n)-1: - endings.add(c.gotoI(it.sons[1])) + endings.add(c.gotoI(it.sons[1])) c.patch(elsePos) - for endPos in endings: c.patch(endPos) + for i in countdown(endings.high, 0): + let endPos = endings[i] + c.patch(endPos) + c.joinI(c.forks.pop(), n) + doAssert(c.forks.len == oldLen) proc genAndOr(c: var Con; n: PNode) = # asgn dest, a # fork L1 # asgn dest, b # L1: + # join F1 c.gen(n.sons[1]) - let L1 = c.forkI(n) - c.gen(n.sons[2]) - c.patch(L1) + forkT(n): + c.gen(n.sons[2]) proc genCase(c: var Con; n: PNode) = # if (!expr1) goto L1; @@ -204,56 +475,73 @@ proc genCase(c: var Con; n: PNode) = # L2: # elsePart # Lend: - when false: - # XXX Exhaustiveness is not yet mapped to the control flow graph as - # it seems to offer no benefits for the 'last read of' question. - let isExhaustive = skipTypes(n.sons[0].typ, - abstractVarRange-{tyTypeDesc}).kind in {tyFloat..tyFloat128, tyString} or - lastSon(n).kind == nkElse + let isExhaustive = skipTypes(n.sons[0].typ, + abstractVarRange-{tyTypeDesc}).kind notin {tyFloat..tyFloat128, tyString} var endings: seq[TPosition] = @[] + let oldLen = c.forks.len c.gen(n.sons[0]) for i in 1 ..< n.len: let it = n.sons[i] if it.len == 1: c.gen(it.sons[0]) + elif i == n.len-1 and isExhaustive: + # treat the last branch as 'else' if this is an exhaustive case statement. + c.gen(it.lastSon) else: let elsePos = c.forkI(it.lastSon) c.gen(it.lastSon) - if i < sonsLen(n)-1: - endings.add(c.gotoI(it.lastSon)) + endings.add(c.gotoI(it.lastSon)) c.patch(elsePos) - for endPos in endings: c.patch(endPos) + for i in countdown(endings.high, 0): + let endPos = endings[i] + c.patch(endPos) + c.joinI(c.forks.pop(), n) + doAssert(c.forks.len == oldLen) proc genTry(c: var Con; n: PNode) = + let oldLen = c.forks.len var endings: seq[TPosition] = @[] inc c.inTryStmt - var newFixups: seq[TPosition] - swap(newFixups, c.tryStmtFixups) + let oldFixups = c.tryStmtFixups.len - let elsePos = c.forkI(n) + #let elsePos = c.forkI(n) c.gen(n.sons[0]) dec c.inTryStmt - for f in newFixups: + for i in oldFixups..c.tryStmtFixups.high: + let f = c.tryStmtFixups[i] c.patch(f) - swap(newFixups, c.tryStmtFixups) + # we also need to produce join instructions + # for the 'fork' that might preceed the goto instruction + if f.int-1 >= 0 and c.code[f.int-1].kind == fork: + c.joinI(TPosition(f.int-1), n) + + setLen(c.tryStmtFixups, oldFixups) - c.patch(elsePos) + #c.patch(elsePos) for i in 1 ..< n.len: let it = n.sons[i] if it.kind != nkFinally: var blen = len(it) let endExcept = c.forkI(it) c.gen(it.lastSon) - if i < sonsLen(n)-1: - endings.add(c.gotoI(it)) + endings.add(c.gotoI(it)) c.patch(endExcept) - for endPos in endings: c.patch(endPos) + for i in countdown(endings.high, 0): + let endPos = endings[i] + c.patch(endPos) + c.joinI(c.forks.pop(), n) + + # join the 'elsePos' forkI instruction: + #c.joinI(c.forks.pop(), n) + let fin = lastSon(n) if fin.kind == nkFinally: c.gen(fin.sons[0]) + doAssert(c.forks.len == oldLen) proc genRaise(c: var Con; n: PNode) = + genJoins(c, n) gen(c, n.sons[0]) if c.inTryStmt > 0: c.tryStmtFixups.add c.gotoI(n) @@ -265,6 +553,7 @@ proc genImplicitReturn(c: var Con) = gen(c, c.owner.ast.sons[resultPos]) proc genReturn(c: var Con; n: PNode) = + genJoins(c, n) if n.sons[0].kind != nkEmpty: gen(c, n.sons[0]) else: @@ -287,6 +576,14 @@ proc genDef(c: var Con; n: PNode) = if n.kind == nkSym and n.sym.kind in InterestingSyms: c.code.add Instr(n: n, kind: def, sym: n.sym) +proc canRaise(fn: PNode): bool = + const magicsThatCanRaise = { + mNone, mSlurp, mStaticExec, mParseExprToAst, mParseStmtToAst} + if fn.kind == nkSym and fn.sym.magic notin magicsThatCanRaise: + result = false + else: + result = true + proc genCall(c: var Con; n: PNode) = gen(c, n[0]) var t = n[0].typ @@ -297,8 +594,16 @@ proc genCall(c: var Con; n: PNode) = if t != nil and i < t.len and t.sons[i].kind == tyVar: genDef(c, n[i]) # every call can potentially raise: - if c.inTryStmt > 0: - c.tryStmtFixups.add c.forkI(n) + if c.inTryStmt > 0 and canRaise(n[0]): + # we generate the instruction sequence: + # fork L1 + # goto exceptionHandler (except or finally) + # L1: + # join F1 + let endGoto = c.forkI(n) + c.tryStmtFixups.add c.gotoI(n) + c.patch(endGoto) + c.joinI(c.forks.pop(), n) dec c.inCall proc genMagic(c: var Con; n: PNode; m: TMagic) = @@ -368,114 +673,48 @@ proc gen(c: var Con; n: PNode) = doAssert false, "dfa construction pass requires the elimination of 'defer'" else: discard -proc dfa(code: seq[Instr]; conf: ConfigRef) = - var u = newSeq[IntSet](code.len) # usages - var d = newSeq[IntSet](code.len) # defs - var c = newSeq[IntSet](code.len) # consumed - var backrefs = initTable[int, int]() - for i in 0..<code.len: - u[i] = initIntSet() - d[i] = initIntSet() - c[i] = initIntSet() - case code[i].kind - of use: u[i].incl(code[i].sym.id) - of def: d[i].incl(code[i].sym.id) - of fork, goto: - let d = i+code[i].dest - backrefs.add(d, i) - - var w = @[0] - var maxIters = 50 - var someChange = true - var takenGotos = initIntSet() - var consuming = -1 - while w.len > 0 and maxIters > 0: # and someChange: - dec maxIters - var pc = w.pop() # w[^1] - var prevPc = -1 - # this simulates a single linear control flow execution: - while pc < code.len: - if prevPc >= 0: - someChange = false - # merge step and test for changes (we compute the fixpoints here): - # 'u' needs to be the union of prevPc, pc - # 'd' needs to be the intersection of 'pc' - for id in u[prevPc]: - if not u[pc].containsOrIncl(id): - someChange = true - # in (a; b) if ``a`` sets ``v`` so does ``b``. The intersection - # is only interesting on merge points: - for id in d[prevPc]: - if not d[pc].containsOrIncl(id): - someChange = true - # if this is a merge point, we take the intersection of the 'd' sets: - if backrefs.hasKey(pc): - var intersect = initIntSet() - assign(intersect, d[pc]) - var first = true - for prevPc in backrefs.allValues(pc): - for def in d[pc]: - if def notin d[prevPc]: - excl(intersect, def) - someChange = true - when defined(debugDfa): - echo "Excluding ", pc, " prev ", prevPc - assign d[pc], intersect - if consuming >= 0: - if not c[pc].containsOrIncl(consuming): - someChange = true - consuming = -1 - - # our interpretation ![I!]: - prevPc = pc - case code[pc].kind - of goto: - # we must leave endless loops eventually: - if not takenGotos.containsOrIncl(pc) or someChange: - pc = pc + code[pc].dest - else: - inc pc - of fork: - # we follow the next instruction but push the dest onto our "work" stack: - #if someChange: - w.add pc + code[pc].dest - inc pc - of use: - #if not d[prevPc].missingOrExcl(): - # someChange = true - consuming = code[pc].sym.id - inc pc - of def: - if not d[pc].containsOrIncl(code[pc].sym.id): - someChange = true - inc pc - - when defined(useDfa) and defined(debugDfa): - for i in 0..<code.len: - echo "PC ", i, ": defs: ", d[i], "; uses ", u[i], "; consumes ", c[i] - - # now check the condition we're interested in: - for i in 0..<code.len: - case code[i].kind - of use: - let s = code[i].sym - if s.id notin d[i]: - localError(conf, code[i].n.info, "usage of uninitialized variable: " & s.name.s) - if s.id in c[i]: - localError(conf, code[i].n.info, "usage of an already consumed variable: " & s.name.s) - - else: discard - -proc dataflowAnalysis*(s: PSym; body: PNode; conf: ConfigRef) = - var c = Con(code: @[], blocks: @[]) - gen(c, body) - genImplicitReturn(c) - when defined(useDfa) and defined(debugDfa): echoCfg(c.code) - dfa(c.code, conf) - proc constructCfg*(s: PSym; body: PNode): ControlFlowGraph = ## constructs a control flow graph for ``body``. var c = Con(code: @[], blocks: @[], owner: s) gen(c, body) genImplicitReturn(c) shallowCopy(result, c.code) + +proc interpret(code: ControlFlowGraph; pc: int, state: seq[PSym], comesFrom: int; threadId: int): (seq[PSym], int) = + var res = state + var pc = pc + while pc < code.len: + #echo threadId, " ", code[pc].kind + case code[pc].kind + of goto: + pc = pc + code[pc].dest + of fork: + let target = pc + code[pc].dest + let (branchA, pcA) = interpret(code, pc+1, res, pc, threadId+1) + let (branchB, _) = interpret(code, target, res, pc, threadId+2) + # we add vars if they are in both branches: + for v in branchB: + if v in branchA: + if v notin res: + res.add v + pc = pcA+1 + of join: + let target = pc + code[pc].dest + if comesFrom == target: return (res, pc) + inc pc + of use: + let v = code[pc].sym + if v notin res and v.kind != skParam: + echo "attempt to read uninitialized variable ", v.name.s + inc pc + of def: + let v = code[pc].sym + if v notin res: + res.add v + inc pc + return (res, pc) + +proc dataflowAnalysis*(s: PSym; body: PNode) = + let c = constructCfg(s, body) + #echoCfg c + discard interpret(c, 0, @[], -1, 1) diff --git a/compiler/sempass2.nim b/compiler/sempass2.nim index 75dea069f..81d637fee 100644 --- a/compiler/sempass2.nim +++ b/compiler/sempass2.nim @@ -1013,7 +1013,7 @@ proc trackProc*(g: ModuleGraph; s: PSym, body: PNode) = "declared lock level is $1, but real lock level is $2" % [$s.typ.lockLevel, $t.maxLockLevel]) when defined(useDfa): - if s.kind == skFunc: + if s.name.s == "testp": dataflowAnalysis(s, body) when false: trackWrites(s, body) |