# # # The Nim Compiler # (c) Copyright 2015 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## Semantic checking for 'parallel'. # - codegen needs to support mSlice (+) # - lowerings must not perform unnecessary copies (+) # - slices should become "nocopy" to openArray (+) # - need to perform bound checks (+) # # - parallel needs to insert a barrier (+) # - passed arguments need to be ensured to be "const" # - what about 'f(a)'? --> f shouldn't have side effects anyway # - passed arrays need to be ensured not to alias # - passed slices need to be ensured to be disjoint (+) # - output slices need special logic (+) import ast, astalgo, idents, lowerings, magicsys, guards, msgs, renderer, types, modulegraphs, options, spawn, lineinfos from trees import getMagic, getRoot from std/strutils import `%` discard """ one major problem: spawn f(a[i]) inc i spawn f(a[i]) is valid, but spawn f(a[i]) spawn f(a[i]) inc i is not! However, spawn f(a[i]) if guard: inc i spawn f(a[i]) is not valid either! --> We need a flow dependent analysis here. However: while foo: spawn f(a[i]) inc i spawn f(a[i]) Is not valid either! --> We should really restrict 'inc' to loop endings? The heuristic that we implement here (that has no false positives) is: Usage of 'i' in a slice *after* we determined the stride is invalid! """ type TDirection = enum ascending, descending MonotonicVar = object v, alias: PSym # to support the ordinary 'countup' iterator # we need to detect aliases lower, upper, stride: PNode dir: TDirection blacklisted: bool # blacklisted variables that are not monotonic AnalysisCtx = object locals: seq[MonotonicVar] slices: seq[tuple[x,a,b: PNode, spawnId: int, inLoop: bool]] guards: TModel # nested guards args: seq[PSym] # args must be deeply immutable spawns: int # we can check that at last 1 spawn is used in # the 'parallel' section currentSpawnId: int inLoop: int graph: ModuleGraph proc initAnalysisCtx(g: ModuleGraph): AnalysisCtx = result = AnalysisCtx(locals: @[], slices: @[], args: @[], graph: g) result.guards.s = @[] result.guards.g = g proc lookupSlot(c: AnalysisCtx; s: PSym): int = for i in 0..= 0: return addr(c.locals[s]) c.locals.setLen(c.locals.len+1) c.locals[^1].v = v return addr(c.locals[^1]) proc gatherArgs(c: var AnalysisCtx; n: PNode) = for i in 0..= 0 and c.locals[s].stride != nil: localError(c.graph.config, n.info, "invalid usage of counter after increment") else: for i in 0.. " & ?b) proc checkBounds(c: AnalysisCtx; arr, idx: PNode) = checkLe(c, lowBound(c.graph.config, arr), idx) checkLe(c, idx, highBound(c.graph.config, arr, c.graph.operators)) proc addLowerBoundAsFacts(c: var AnalysisCtx) = for v in c.locals: if not v.blacklisted: c.guards.addFactLe(v.lower, newSymNode(v.v)) proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: PNode) = checkLocal(c, n) let le = le.canon(c.graph.operators) let ri = ri.canon(c.graph.operators) # perform static bounds checking here; and not later! let oldState = c.guards.s.len addLowerBoundAsFacts(c) c.checkBounds(x, le) c.checkBounds(x, ri) c.guards.s.setLen(oldState) c.slices.add((x, le, ri, c.currentSpawnId, c.inLoop > 0)) proc overlap(m: TModel; conf: ConfigRef; x,y,c,d: PNode) = # X..Y and C..D overlap iff (X <= D and C <= Y) case proveLe(m, c, y) of impUnknown: case proveLe(m, x, d) of impNo: discard of impUnknown, impYes: message(conf, x.info, warnStaticIndexCheck, "cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" % [?c, ?y, ?x, ?y, ?c, ?d]) of impYes: case proveLe(m, x, d) of impUnknown: message(conf, x.info, warnStaticIndexCheck, "cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" % [?x, ?d, ?x, ?y, ?c, ?d]) of impYes: message(conf, x.info, warnStaticIndexCheck, "($#)..($#) not disjoint from ($#)..($#)" % [?c, ?y, ?x, ?y, ?c, ?d]) of impNo: discard of impNo: discard proc stride(c: AnalysisCtx; n: PNode): BiggestInt = if isLocal(n): let s = c.lookupSlot(n.sym) if s >= 0 and c.locals[s].stride != nil: result = c.locals[s].stride.intVal else: result = 0 else: result = 0 for i in 0..= 0 and c.locals[s].stride != nil: result = buildAdd(n, c.locals[s].stride.intVal, c.graph.operators) else: result = n elif n.safeLen > 0: result = shallowCopy(n) for i in 0..= 0 and m >= 0): # ah I cannot resist the temptation and add another sweet heuristic: # if both slices have the form (i+k)..(i+k) and (i+m)..(i+m) we # check they are disjoint and k < stride and m < stride: overlap(c.guards, c.graph.config, x.a, x.b, y.a, y.b) let stride = min(c.stride(x.a), c.stride(y.a)) if k < stride and m < stride: discard else: localError(c.graph.config, x.x.info, "cannot prove ($#)..($#) disjoint from ($#)..($#)" % [?x.a, ?x.b, ?y.a, ?y.b]) else: localError(c.graph.config, x.x.info, "cannot prove ($#)..($#) disjoint from ($#)..($#)" % [?x.a, ?x.b, ?y.a, ?y.b]) proc analyse(c: var AnalysisCtx; n: PNode) proc analyseSons(c: var AnalysisCtx; n: PNode) = for i in 0.. 0: result = shallowCopy(n) for i in 0.. 0: result = transformSpawnSons(g, idgen, owner, n, barrier) else: result = n proc checkArgs(a: var AnalysisCtx; n: PNode) = discard "to implement" proc generateAliasChecks(a: AnalysisCtx; result: PNode) = discard "to implement" proc liftParallel*(g: ModuleGraph; idgen: IdGenerator; owner: PSym; n: PNode): PNode = # this needs to be called after the 'for' loop elimination # first pass: # - detect monotonic local integer variables # - detect used slices # - detect used arguments #echo "PAR ", renderTree(n) var a = initAnalysisCtx(g) let body = n.lastSon analyse(a, body) if a.spawns == 0: localError(g.config, n.info, "'parallel' section without 'spawn'") checkSlicesAreDisjoint(a) checkArgs(a, body) var varSection = newNodeI(nkVarSection, n.info) var temp = newSym(skTemp, getIdent(g.cache, "barrier"), idgen, owner, n.info) temp.typ = magicsys.getCompilerProc(g, "Barrier").typ incl(temp.flags, sfFromGeneric) let tempNode = newSymNode(temp) varSection.addVar tempNode let barrier = genAddrOf(tempNode, idgen) result = newNodeI(nkStmtList, n.info) generateAliasChecks(a, result) result.add varSection result.add callCodegenProc(g, "openBarrier", barrier.info, barrier) result.add transformSpawn(g, idgen, owner, body, barrier) result.add callCodegenProc(g, "closeBarrier", barrier.info, barrier)