# # # The Nim Compiler # (c) Copyright 2017 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## Data flow analysis for Nim. For now the task is to prove that every ## usage of a local variable 'v' is covered by an initialization to 'v' ## first. ## We transform the AST into a linear list of instructions first to ## make this easier to handle: There are only 2 different branching ## instructions: 'goto X' is an unconditional goto, 'fork X' ## is a conditional goto (either the next instruction or 'X' can be ## taken). Exhaustive case statements are translated ## so that the last branch is transformed into an 'else' branch. ## ``return`` and ``break`` are all covered by 'goto'. ## The case to detect is ``use v`` that is not dominated by ## a ``def v``. ## The data structures and algorithms used here are inspired by ## "A Graph–Free Approach to Data–Flow Analysis" by Markus Mohnen. ## https://link.springer.com/content/pdf/10.1007/3-540-45937-5_6.pdf import ast, astalgo, types, intsets, tables, msgs type InstrKind* = enum goto, fork, def, use, useWithinCall # this strange special case is used to get more # move optimizations out of regular code # XXX This is still overly pessimistic in # call(let x = foo; bar(x)) Instr* = object n*: PNode case kind*: InstrKind of def, use, useWithinCall: sym*: PSym of goto, fork: dest*: int ControlFlowGraph* = seq[Instr] TPosition = distinct int TBlock = object label: PSym fixups: seq[TPosition] ValueKind = enum undef, value, valueOrUndef Con = object code: ControlFlowGraph inCall: int blocks: seq[TBlock] proc debugInfo(info: TLineInfo): string = result = info.toFilename & ":" & $info.line proc codeListing(c: ControlFlowGraph, result: var string, start=0; last = -1) = # for debugging purposes # first iteration: compute all necessary labels: var jumpTargets = initIntSet() let last = if last < 0: c.len-1 else: min(last, c.len-1) for i in start..last: if c[i].kind in {goto, fork}: jumpTargets.incl(i+c[i].dest) var i = start while i <= last: if i in jumpTargets: result.add("L" & $i & ":\n") result.add "\t" result.add $c[i].kind result.add "\t" case c[i].kind of def, use, useWithinCall: result.add c[i].sym.name.s of goto, fork: result.add "L" result.add c[i].dest+i result.add("\t#") result.add(debugInfo(c[i].n.info)) result.add("\n") inc i if i in jumpTargets: result.add("L" & $i & ": End\n") proc echoCfg*(c: ControlFlowGraph; start=0; last = -1) {.deprecated.} = ## echos the ControlFlowGraph for debugging purposes. var buf = "" codeListing(c, buf, start, last) echo buf proc forkI(c: var Con; n: PNode): TPosition = result = TPosition(c.code.len) c.code.add Instr(n: n, kind: fork, dest: 0) proc gotoI(c: var Con; n: PNode): TPosition = result = TPosition(c.code.len) c.code.add Instr(n: n, kind: goto, dest: 0) proc genLabel(c: Con): TPosition = result = TPosition(c.code.len) proc jmpBack(c: var Con, n: PNode, p = TPosition(0)) = let dist = p.int - c.code.len internalAssert(-0x7fff < dist and dist < 0x7fff) c.code.add Instr(n: n, kind: goto, dest: dist) proc patch(c: var Con, p: TPosition) = # patch with current index let p = p.int let diff = c.code.len - p internalAssert(-0x7fff < diff and diff < 0x7fff) c.code[p].dest = diff proc popBlock(c: var Con; oldLen: int) = for f in c.blocks[oldLen].fixups: c.patch(f) c.blocks.setLen(oldLen) template withBlock(labl: PSym; body: untyped) {.dirty.} = var oldLen {.gensym.} = c.blocks.len c.blocks.add TBlock(label: labl, fixups: @[]) body popBlock(c, oldLen) proc isTrue(n: PNode): bool = n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or n.kind == nkIntLit and n.intVal != 0 proc gen(c: var Con; n: PNode) # {.noSideEffect.} proc genWhile(c: var Con; n: PNode) = # L1: # cond, tmp # fork tmp, L2 # body # jmp L1 # L2: let L1 = c.genLabel withBlock(nil): if isTrue(n.sons[0]): c.gen(n.sons[1]) c.jmpBack(n, L1) else: c.gen(n.sons[0]) let L2 = c.forkI(n) c.gen(n.sons[1]) c.jmpBack(n, L1) c.patch(L2) proc genBlock(c: var Con; n: PNode) = withBlock(n.sons[0].sym): c.gen(n.sons[1]) proc genBreak(c: var Con; n: PNode) = let L1 = c.gotoI(n) if n.sons[0].kind == nkSym: #echo cast[int](n.sons[0].sym) for i in countdown(c.blocks.len-1, 0): if c.blocks[i].label == n.sons[0].sym: c.blocks[i].fixups.add L1 return globalError(n.info, errGenerated, "VM problem: cannot find 'break' target") else: c.blocks[c.blocks.high].fixups.add L1 proc genIf(c: var Con, n: PNode) = var endings: seq[TPosition] = @[] for i in countup(0, len(n) - 1): var it = n.sons[i] c.gen(it.sons[0]) if it.len == 2: let elsePos = c.forkI(it.sons[1]) c.gen(it.sons[1]) if i < sonsLen(n)-1: endings.add(c.gotoI(it.sons[1])) c.patch(elsePos) for endPos in endings: c.patch(endPos) proc genAndOr(c: var Con; n: PNode) = # asgn dest, a # fork L1 # asgn dest, b # L1: c.gen(n.sons[1]) let L1 = c.forkI(n) c.gen(n.sons[2]) c.patch(L1) proc genCase(c: var Con; n: PNode) = # if (!expr1) goto L1; # thenPart # goto LEnd # L1: # if (!expr2) goto L2; # thenPart2 # goto LEnd # L2: # elsePart # Lend: var endings: seq[TPosition] = @[] c.gen(n.sons[0]) for i in 1 ..< n.len: let it = n.sons[i] if it.len == 1: c.gen(it.sons[0]) else: let elsePos = c.forkI(it.lastSon) c.gen(it.lastSon) if i < sonsLen(n)-1: endings.add(c.gotoI(it.lastSon)) c.patch(elsePos) for endPos in endings: c.patch(endPos) proc genTry(c: var Con; n: PNode) = var endings: seq[TPosition] = @[] let elsePos = c.forkI(n) c.gen(n.sons[0]) c.patch(elsePos) for i in 1 ..< n.len: let it = n.sons[i] if it.kind != nkFinally: var blen = len(it) let endExcept = c.forkI(it) c.gen(it.lastSon) if i < sonsLen(n)-1: endings.add(c.gotoI(it)) c.patch(endExcept) for endPos in endings: c.patch(endPos) let fin = lastSon(n) if fin.kind == nkFinally: c.gen(fin.sons[0]) proc genRaise(c: var Con; n: PNode) = gen(c, n.sons[0]) c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len) proc genReturn(c: var Con; n: PNode) = if n.sons[0].kind != nkEmpty: gen(c, n.sons[0]) c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len) const InterestingSyms = {skVar, skResult, skLet} proc genUse(c: var Con; n: PNode) = var n = n while n.kind in {nkDotExpr, nkCheckedFieldExpr, nkBracketExpr, nkDerefExpr, nkHiddenDeref, nkAddr, nkHiddenAddr}: n = n[0] if n.kind == nkSym and n.sym.kind in InterestingSyms: if c.inCall > 0: c.code.add Instr(n: n, kind: useWithinCall, sym: n.sym) else: c.code.add Instr(n: n, kind: use, sym: n.sym) proc genDef(c: var Con; n: PNode) = if n.kind == nkSym and n.sym.kind in InterestingSyms: c.code.add Instr(n: n, kind: def, sym: n.sym) proc genCall(c: var Con; n: PNode) = gen(c, n[0]) var t = n[0].typ if t != nil: t = t.skipTypes(abstractInst) inc c.inCall for i in 1.. 0 and maxIters > 0: # and someChange: dec maxIters var pc = w.pop() # w[^1] var prevPc = -1 # this simulates a single linear control flow execution: while pc < code.len: if prevPc >= 0: someChange = false # merge step and test for changes (we compute the fixpoints here): # 'u' needs to be the union of prevPc, pc # 'd' needs to be the intersection of 'pc' for id in u[prevPc]: if not u[pc].containsOrIncl(id): someChange = true # in (a; b) if ``a`` sets ``v`` so does ``b``. The intersection # is only interesting on merge points: for id in d[prevPc]: if not d[pc].containsOrIncl(id): someChange = true # if this is a merge point, we take the intersection of the 'd' sets: if backrefs.hasKey(pc): var intersect = initIntSet() assign(intersect, d[pc]) var first = true for prevPc in backrefs.allValues(pc): for def in d[pc]: if def notin d[prevPc]: excl(intersect, def) someChange = true when defined(debugDfa): echo "Excluding ", pc, " prev ", prevPc assign d[pc], intersect if consuming >= 0: if not c[pc].containsOrIncl(consuming): someChange = true consuming = -1 # our interpretation ![I!]: prevPc = pc case code[pc].kind of goto: # we must leave endless loops eventually: if not takenGotos.containsOrIncl(pc) or someChange: pc = pc + code[pc].dest else: inc pc of fork: # we follow the next instruction but push the dest onto our "work" stack: #if someChange: w.add pc + code[pc].dest inc pc of use, useWithinCall: #if not d[prevPc].missingOrExcl(): # someChange = true consuming = code[pc].sym.id inc pc of def: if not d[pc].containsOrIncl(code[pc].sym.id): someChange = true inc pc when defined(useDfa) and defined(debugDfa): for i in 0..