diff options
Diffstat (limited to 'compiler/semparallel.nim')
-rw-r--r-- | compiler/semparallel.nim | 504 |
1 files changed, 504 insertions, 0 deletions
diff --git a/compiler/semparallel.nim b/compiler/semparallel.nim new file mode 100644 index 000000000..23a8e6362 --- /dev/null +++ b/compiler/semparallel.nim @@ -0,0 +1,504 @@ +# +# +# The Nim Compiler +# (c) Copyright 2015 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Semantic checking for 'parallel'. + +# - codegen needs to support mSlice (+) +# - lowerings must not perform unnecessary copies (+) +# - slices should become "nocopy" to openArray (+) +# - need to perform bound checks (+) +# +# - parallel needs to insert a barrier (+) +# - passed arguments need to be ensured to be "const" +# - what about 'f(a)'? --> f shouldn't have side effects anyway +# - passed arrays need to be ensured not to alias +# - passed slices need to be ensured to be disjoint (+) +# - output slices need special logic (+) + +import + ast, astalgo, idents, lowerings, magicsys, guards, msgs, + renderer, types, modulegraphs, options, spawn, lineinfos + +from trees import getMagic, getRoot +from std/strutils import `%` + +discard """ + +one major problem: + spawn f(a[i]) + inc i + spawn f(a[i]) +is valid, but + spawn f(a[i]) + spawn f(a[i]) + inc i +is not! However, + spawn f(a[i]) + if guard: inc i + spawn f(a[i]) +is not valid either! --> We need a flow dependent analysis here. + +However: + while foo: + spawn f(a[i]) + inc i + spawn f(a[i]) + +Is not valid either! --> We should really restrict 'inc' to loop endings? + +The heuristic that we implement here (that has no false positives) is: Usage +of 'i' in a slice *after* we determined the stride is invalid! +""" + +type + TDirection = enum + ascending, descending + MonotonicVar = object + v, alias: PSym # to support the ordinary 'countup' iterator + # we need to detect aliases + lower, upper, stride: PNode + dir: TDirection + blacklisted: bool # blacklisted variables that are not monotonic + AnalysisCtx = object + locals: seq[MonotonicVar] + slices: seq[tuple[x,a,b: PNode, spawnId: int, inLoop: bool]] + guards: TModel # nested guards + args: seq[PSym] # args must be deeply immutable + spawns: int # we can check that at last 1 spawn is used in + # the 'parallel' section + currentSpawnId: int + inLoop: int + graph: ModuleGraph + +proc initAnalysisCtx(g: ModuleGraph): AnalysisCtx = + result = AnalysisCtx(locals: @[], + slices: @[], + args: @[], + graph: g) + result.guards.s = @[] + result.guards.g = g + +proc lookupSlot(c: AnalysisCtx; s: PSym): int = + for i in 0..<c.locals.len: + if c.locals[i].v == s or c.locals[i].alias == s: return i + return -1 + +proc getSlot(c: var AnalysisCtx; v: PSym): ptr MonotonicVar = + let s = lookupSlot(c, v) + if s >= 0: return addr(c.locals[s]) + c.locals.setLen(c.locals.len+1) + c.locals[^1].v = v + return addr(c.locals[^1]) + +proc gatherArgs(c: var AnalysisCtx; n: PNode) = + for i in 0..<n.safeLen: + let root = getRoot n[i] + if root != nil: + block addRoot: + for r in items(c.args): + if r == root: break addRoot + c.args.add root + gatherArgs(c, n[i]) + +proc isSingleAssignable(n: PNode): bool = + n.kind == nkSym and (let s = n.sym; + s.kind in {skTemp, skForVar, skLet} and + {sfAddrTaken, sfGlobal} * s.flags == {}) + +proc isLocal(n: PNode): bool = + n.kind == nkSym and (let s = n.sym; + s.kind in {skResult, skTemp, skForVar, skVar, skLet} and + {sfAddrTaken, sfGlobal} * s.flags == {}) + +proc checkLocal(c: AnalysisCtx; n: PNode) = + if isLocal(n): + let s = c.lookupSlot(n.sym) + if s >= 0 and c.locals[s].stride != nil: + localError(c.graph.config, n.info, "invalid usage of counter after increment") + else: + for i in 0..<n.safeLen: checkLocal(c, n[i]) + +template `?`(x): untyped = x.renderTree + +proc checkLe(c: AnalysisCtx; a, b: PNode) = + case proveLe(c.guards, a, b) + of impUnknown: + message(c.graph.config, a.info, warnStaticIndexCheck, + "cannot prove: " & ?a & " <= " & ?b) + of impYes: discard + of impNo: + message(c.graph.config, a.info, warnStaticIndexCheck, + "can prove: " & ?a & " > " & ?b) + +proc checkBounds(c: AnalysisCtx; arr, idx: PNode) = + checkLe(c, lowBound(c.graph.config, arr), idx) + checkLe(c, idx, highBound(c.graph.config, arr, c.graph.operators)) + +proc addLowerBoundAsFacts(c: var AnalysisCtx) = + for v in c.locals: + if not v.blacklisted: + c.guards.addFactLe(v.lower, newSymNode(v.v)) + +proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: PNode) = + checkLocal(c, n) + let le = le.canon(c.graph.operators) + let ri = ri.canon(c.graph.operators) + # perform static bounds checking here; and not later! + let oldState = c.guards.s.len + addLowerBoundAsFacts(c) + c.checkBounds(x, le) + c.checkBounds(x, ri) + c.guards.s.setLen(oldState) + c.slices.add((x, le, ri, c.currentSpawnId, c.inLoop > 0)) + +proc overlap(m: TModel; conf: ConfigRef; x,y,c,d: PNode) = + # X..Y and C..D overlap iff (X <= D and C <= Y) + case proveLe(m, c, y) + of impUnknown: + case proveLe(m, x, d) + of impNo: discard + of impUnknown, impYes: + message(conf, x.info, warnStaticIndexCheck, + "cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" % + [?c, ?y, ?x, ?y, ?c, ?d]) + of impYes: + case proveLe(m, x, d) + of impUnknown: + message(conf, x.info, warnStaticIndexCheck, + "cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" % + [?x, ?d, ?x, ?y, ?c, ?d]) + of impYes: + message(conf, x.info, warnStaticIndexCheck, "($#)..($#) not disjoint from ($#)..($#)" % + [?c, ?y, ?x, ?y, ?c, ?d]) + of impNo: discard + of impNo: discard + +proc stride(c: AnalysisCtx; n: PNode): BiggestInt = + if isLocal(n): + let s = c.lookupSlot(n.sym) + if s >= 0 and c.locals[s].stride != nil: + result = c.locals[s].stride.intVal + else: + result = 0 + else: + result = 0 + for i in 0..<n.safeLen: result += stride(c, n[i]) + +proc subStride(c: AnalysisCtx; n: PNode): PNode = + # substitute with stride: + if isLocal(n): + let s = c.lookupSlot(n.sym) + if s >= 0 and c.locals[s].stride != nil: + result = buildAdd(n, c.locals[s].stride.intVal, c.graph.operators) + else: + result = n + elif n.safeLen > 0: + result = shallowCopy(n) + for i in 0..<n.len: result[i] = subStride(c, n[i]) + else: + result = n + +proc checkSlicesAreDisjoint(c: var AnalysisCtx) = + # this is the only thing that we need to perform after we have traversed + # the whole tree so that the strides are available. + # First we need to add all the computed lower bounds: + addLowerBoundAsFacts(c) + # Every slice used in a loop needs to be disjoint with itself: + for x,a,b,id,inLoop in items(c.slices): + if inLoop: overlap(c.guards, c.graph.config, a,b, c.subStride(a), c.subStride(b)) + # Another tricky example is: + # while true: + # spawn f(a[i]) + # spawn f(a[i+1]) + # inc i # inc i, 2 would be correct here + # + # Or even worse: + # while true: + # spawn f(a[i+1..i+3]) + # spawn f(a[i+4..i+5]) + # inc i, 4 + # Prove that i*k*stride + 3 != i*k'*stride + 5 + # For the correct example this amounts to + # i*k*2 != i*k'*2 + 1 + # which is true. + # For now, we don't try to prove things like that at all, even though it'd + # be feasible for many useful examples. Instead we attach the slice to + # a spawn and if the attached spawns differ, we bail out: + for i in 0..high(c.slices): + for j in i+1..high(c.slices): + let x = c.slices[i] + let y = c.slices[j] + if x.spawnId != y.spawnId and guards.sameTree(x.x, y.x): + if not x.inLoop or not y.inLoop: + # XXX strictly speaking, 'or' is not correct here and it needs to + # be 'and'. However this prevents too many obviously correct programs + # like f(a[0..x]); for i in x+1..a.high: f(a[i]) + overlap(c.guards, c.graph.config, x.a, x.b, y.a, y.b) + elif (let k = simpleSlice(x.a, x.b); let m = simpleSlice(y.a, y.b); + k >= 0 and m >= 0): + # ah I cannot resist the temptation and add another sweet heuristic: + # if both slices have the form (i+k)..(i+k) and (i+m)..(i+m) we + # check they are disjoint and k < stride and m < stride: + overlap(c.guards, c.graph.config, x.a, x.b, y.a, y.b) + let stride = min(c.stride(x.a), c.stride(y.a)) + if k < stride and m < stride: + discard + else: + localError(c.graph.config, x.x.info, "cannot prove ($#)..($#) disjoint from ($#)..($#)" % + [?x.a, ?x.b, ?y.a, ?y.b]) + else: + localError(c.graph.config, x.x.info, "cannot prove ($#)..($#) disjoint from ($#)..($#)" % + [?x.a, ?x.b, ?y.a, ?y.b]) + +proc analyse(c: var AnalysisCtx; n: PNode) + +proc analyseSons(c: var AnalysisCtx; n: PNode) = + for i in 0..<n.safeLen: analyse(c, n[i]) + +proc min(a, b: PNode): PNode = + if a.isNil: result = b + elif a.intVal < b.intVal: result = a + else: result = b + +template pushSpawnId(c, body) {.dirty.} = + inc c.spawns + let oldSpawnId = c.currentSpawnId + c.currentSpawnId = c.spawns + body + c.currentSpawnId = oldSpawnId + +proc analyseCall(c: var AnalysisCtx; n: PNode; op: PSym) = + if op.magic == mSpawn: + pushSpawnId(c): + gatherArgs(c, n[1]) + analyseSons(c, n) + elif op.magic == mInc or (op.name.s == "+=" and op.fromSystem): + if n[1].isLocal: + let incr = n[2].skipConv + if incr.kind in {nkCharLit..nkUInt32Lit} and incr.intVal > 0: + let slot = c.getSlot(n[1].sym) + slot.stride = min(slot.stride, incr) + analyseSons(c, n) + elif op.name.s == "[]" and op.fromSystem: + let slice = n[2].skipStmtList + c.addSlice(n, n[1], slice[1], slice[2]) + analyseSons(c, n) + elif op.name.s == "[]=" and op.fromSystem: + let slice = n[2].skipStmtList + c.addSlice(n, n[1], slice[1], slice[2]) + analyseSons(c, n) + else: + analyseSons(c, n) + +proc analyseCase(c: var AnalysisCtx; n: PNode) = + analyse(c, n[0]) + let oldFacts = c.guards.s.len + for i in 1..<n.len: + let branch = n[i] + setLen(c.guards.s, oldFacts) + addCaseBranchFacts(c.guards, n, i) + for i in 0..<branch.len: + analyse(c, branch[i]) + setLen(c.guards.s, oldFacts) + +proc analyseIf(c: var AnalysisCtx; n: PNode) = + analyse(c, n[0][0]) + let oldFacts = c.guards.s.len + addFact(c.guards, canon(n[0][0], c.graph.operators)) + + analyse(c, n[0][1]) + for i in 1..<n.len: + let branch = n[i] + setLen(c.guards.s, oldFacts) + for j in 0..i-1: + addFactNeg(c.guards, canon(n[j][0], c.graph.operators)) + if branch.len > 1: + addFact(c.guards, canon(branch[0], c.graph.operators)) + for i in 0..<branch.len: + analyse(c, branch[i]) + setLen(c.guards.s, oldFacts) + +proc analyse(c: var AnalysisCtx; n: PNode) = + case n.kind + of nkAsgn, nkFastAsgn, nkSinkAsgn: + let y = n[1].skipConv + if n[0].isSingleAssignable and y.isLocal: + let slot = c.getSlot(y.sym) + slot.alias = n[0].sym + elif n[0].isLocal: + # since we already ensure sfAddrTaken is not in s.flags, we only need to + # prevent direct assignments to the monotonic variable: + let slot = c.getSlot(n[0].sym) + slot.blacklisted = true + invalidateFacts(c.guards, n[0]) + let value = n[1] + if getMagic(value) == mSpawn: + pushSpawnId(c): + gatherArgs(c, value[1]) + analyseSons(c, value[1]) + analyse(c, n[0]) + else: + analyseSons(c, n) + addAsgnFact(c.guards, n[0], y) + of nkCallKinds: + # direct call: + if n[0].kind == nkSym: analyseCall(c, n, n[0].sym) + else: analyseSons(c, n) + of nkBracketExpr: + if n[0].typ != nil and skipTypes(n[0].typ, abstractVar).kind != tyTuple: + c.addSlice(n, n[0], n[1], n[1]) + analyseSons(c, n) + of nkReturnStmt, nkRaiseStmt, nkTryStmt, nkHiddenTryStmt: + localError(c.graph.config, n.info, "invalid control flow for 'parallel'") + # 'break' that leaves the 'parallel' section is not valid either + # or maybe we should generate a 'try' XXX + of nkVarSection, nkLetSection: + for it in n: + let value = it.lastSon + let isSpawned = getMagic(value) == mSpawn + if isSpawned: + pushSpawnId(c): + gatherArgs(c, value[1]) + analyseSons(c, value[1]) + if value.kind != nkEmpty: + for j in 0..<it.len-2: + if it[j].isLocal: + let slot = c.getSlot(it[j].sym) + if slot.lower.isNil: slot.lower = value + else: internalError(c.graph.config, it.info, "slot already has a lower bound") + if not isSpawned: analyse(c, value) + of nkCaseStmt: analyseCase(c, n) + of nkWhen, nkIfStmt, nkIfExpr: analyseIf(c, n) + of nkWhileStmt: + analyse(c, n[0]) + # 'while true' loop? + inc c.inLoop + if isTrue(n[0]): + analyseSons(c, n[1]) + else: + # loop may never execute: + let oldState = c.locals.len + let oldFacts = c.guards.s.len + addFact(c.guards, canon(n[0], c.graph.operators)) + analyse(c, n[1]) + setLen(c.locals, oldState) + setLen(c.guards.s, oldFacts) + # we know after the loop the negation holds: + if not hasSubnodeWith(n[1], nkBreakStmt): + addFactNeg(c.guards, canon(n[0], c.graph.operators)) + dec c.inLoop + of nkTypeSection, nkProcDef, nkConverterDef, nkMethodDef, nkIteratorDef, + nkMacroDef, nkTemplateDef, nkConstSection, nkPragma, nkFuncDef, + nkMixinStmt, nkBindStmt, nkExportStmt: + discard + else: + analyseSons(c, n) + +proc transformSlices(g: ModuleGraph; idgen: IdGenerator; n: PNode): PNode = + if n.kind in nkCallKinds and n[0].kind == nkSym: + let op = n[0].sym + if op.name.s == "[]" and op.fromSystem: + result = copyNode(n) + var typ = newType(tyOpenArray, idgen, result.typ.owner) + typ.add result.typ.elementType + result.typ = typ + let opSlice = newSymNode(createMagic(g, idgen, "slice", mSlice)) + opSlice.typ = getSysType(g, n.info, tyInt) + result.add opSlice + result.add n[1] + let slice = n[2].skipStmtList + result.add slice[1] + result.add slice[2] + return result + if n.safeLen > 0: + result = shallowCopy(n) + for i in 0..<n.len: + result[i] = transformSlices(g, idgen, n[i]) + else: + result = n + +proc transformSpawn(g: ModuleGraph; idgen: IdGenerator; owner: PSym; n, barrier: PNode): PNode +proc transformSpawnSons(g: ModuleGraph; idgen: IdGenerator; owner: PSym; n, barrier: PNode): PNode = + result = shallowCopy(n) + for i in 0..<n.len: + result[i] = transformSpawn(g, idgen, owner, n[i], barrier) + +proc transformSpawn(g: ModuleGraph; idgen: IdGenerator; owner: PSym; n, barrier: PNode): PNode = + case n.kind + of nkVarSection, nkLetSection: + result = nil + for it in n: + let b = it.lastSon + if getMagic(b) == mSpawn: + if it.len != 3: localError(g.config, it.info, "invalid context for 'spawn'") + let m = transformSlices(g, idgen, b) + if result.isNil: + result = newNodeI(nkStmtList, n.info) + result.add n + let t = b[1][0].typ.returnType + if spawnResult(t, true) == srByVar: + result.add wrapProcForSpawn(g, idgen, owner, m, b.typ, barrier, it[0]) + it[^1] = newNodeI(nkEmpty, it.info) + else: + it[^1] = wrapProcForSpawn(g, idgen, owner, m, b.typ, barrier, nil) + if result.isNil: result = n + of nkAsgn, nkFastAsgn, nkSinkAsgn: + let b = n[1] + if getMagic(b) == mSpawn and (let t = b[1][0].typ.returnType; + spawnResult(t, true) == srByVar): + let m = transformSlices(g, idgen, b) + return wrapProcForSpawn(g, idgen, owner, m, b.typ, barrier, n[0]) + result = transformSpawnSons(g, idgen, owner, n, barrier) + of nkCallKinds: + if getMagic(n) == mSpawn: + result = transformSlices(g, idgen, n) + return wrapProcForSpawn(g, idgen, owner, result, n.typ, barrier, nil) + result = transformSpawnSons(g, idgen, owner, n, barrier) + elif n.safeLen > 0: + result = transformSpawnSons(g, idgen, owner, n, barrier) + else: + result = n + +proc checkArgs(a: var AnalysisCtx; n: PNode) = + discard "to implement" + +proc generateAliasChecks(a: AnalysisCtx; result: PNode) = + discard "to implement" + +proc liftParallel*(g: ModuleGraph; idgen: IdGenerator; owner: PSym; n: PNode): PNode = + # this needs to be called after the 'for' loop elimination + + # first pass: + # - detect monotonic local integer variables + # - detect used slices + # - detect used arguments + #echo "PAR ", renderTree(n) + + var a = initAnalysisCtx(g) + let body = n.lastSon + analyse(a, body) + if a.spawns == 0: + localError(g.config, n.info, "'parallel' section without 'spawn'") + checkSlicesAreDisjoint(a) + checkArgs(a, body) + + var varSection = newNodeI(nkVarSection, n.info) + var temp = newSym(skTemp, getIdent(g.cache, "barrier"), idgen, owner, n.info) + temp.typ = magicsys.getCompilerProc(g, "Barrier").typ + incl(temp.flags, sfFromGeneric) + let tempNode = newSymNode(temp) + varSection.addVar tempNode + + let barrier = genAddrOf(tempNode, idgen) + result = newNodeI(nkStmtList, n.info) + generateAliasChecks(a, result) + result.add varSection + result.add callCodegenProc(g, "openBarrier", barrier.info, barrier) + result.add transformSpawn(g, idgen, owner, body, barrier) + result.add callCodegenProc(g, "closeBarrier", barrier.info, barrier) |