diff options
-rw-r--r-- | compiler/ccgexprs.nim | 2 | ||||
-rw-r--r-- | compiler/guards.nim | 58 | ||||
-rw-r--r-- | compiler/lowerings.nim | 286 | ||||
-rw-r--r-- | compiler/semmagic.nim | 12 | ||||
-rw-r--r-- | compiler/semparallel.nim | 89 | ||||
-rw-r--r-- | doc/manual.txt | 2 | ||||
-rw-r--r-- | lib/pure/concurrency/threadpool.nim | 112 | ||||
-rw-r--r-- | lib/system/atomics.nim | 6 | ||||
-rw-r--r-- | tests/parallel/tdisjoint_slice1.nim | 16 | ||||
-rw-r--r-- | tests/parallel/tinvalid_array_bounds.nim | 2 |
10 files changed, 470 insertions, 115 deletions
diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim index 7fb6af896..34fdf5bf1 100644 --- a/compiler/ccgexprs.nim +++ b/compiler/ccgexprs.nim @@ -1636,7 +1636,7 @@ proc genMagicExpr(p: BProc, e: PNode, d: var TLoc, op: TMagic) = of mSlurp..mQuoteAst: localError(e.info, errXMustBeCompileTime, e.sons[0].sym.name.s) of mSpawn: - let n = lowerings.wrapProcForSpawn(p.module.module, e.sons[1]) + let n = lowerings.wrapProcForSpawn(p.module.module, e[1], e.typ, nil, nil) expr(p, n, d) of mParallel: let n = semparallel.liftParallel(p.module.module, e) diff --git a/compiler/guards.nim b/compiler/guards.nim index de0ce1dcc..3df3bd1a8 100644 --- a/compiler/guards.nim +++ b/compiler/guards.nim @@ -672,12 +672,8 @@ proc simpleSlice*(a, b: PNode): BiggestInt = else: result = -1 -proc proveLe*(m: TModel; a, b: PNode): TImplication = - let res = canon(opLe.buildCall(a, b)) - #echo renderTree(res) - # we hardcode lots of axioms here: - let a = res[1] - let b = res[2] +proc ple(m: TModel; a, b: PNode): TImplication = + template `<=?`(a,b): expr = ple(m,a,b) == impYes # 0 <= 3 if a.isValue and b.isValue: return if leValue(a, b): impYes else: impNo @@ -692,26 +688,46 @@ proc proveLe*(m: TModel; a, b: PNode): TImplication = # x <= x if sameTree(a, b): return impYes - # x <= x+c iff 0 <= c - if b.getMagic in someAdd and sameTree(a, b[1]): - return proveLe(m, zero(), b[2]) + # 0 <= x.len + if b.getMagic in someLen and a.isValue: + if a.intVal <= 0: return impYes + + # x <= y+c if 0 <= c and x <= y + if b.getMagic in someAdd and zero() <=? b[2] and a <=? b[1]: return impYes + + # x+c <= y if c <= 0 and x <= y + if a.getMagic in someAdd and a[2] <=? zero() and a[1] <=? b: return impYes - # x+c <= x iff c <= 0 - if a.getMagic in someAdd and sameTree(b, a[1]): - return proveLe(m, a[2], zero()) + # x <= y*c if 1 <= c and x <= y and 0 <= y + if b.getMagic in someMul: + if a <=? b[1] and one() <=? b[2] and zero() <=? b[1]: return impYes - # x <= x*c if 1 <= c and 0 <= x: - if b.getMagic in someMul and sameTree(a, b[1]): - if proveLe(m, one(), b[2]) == impYes and proveLe(m, zero(), a) == impYes: - return impYes + # x div c <= y if 1 <= c and 0 <= y and x <= y: + if a.getMagic in someDiv: + if one() <=? a[2] and zero() <=? b and a[1] <=? b: return impYes - # x div c <= x if 1 <= c and 0 <= x: - if a.getMagic in someDiv and sameTree(a[1], b): - if proveLe(m, one(), a[2]) == impYes and proveLe(m, zero(), b) == impYes: - return impYes + # slightly subtle: + # x <= max(y, z) iff x <= y or x <= z + # note that 'x <= max(x, z)' is a special case of the above rule + if b.getMagic in someMax: + if a <=? b[1] or a <=? b[2]: return impYes + + # min(x, y) <= z iff x <= z or y <= z + if a.getMagic in someMin: + if a[1] <=? b or a[2] <=? b: return impYes # use the knowledge base: - return doesImply(m, res) + return doesImply(m, opLe.buildCall(a, b)) + +proc proveLe*(m: TModel; a, b: PNode): TImplication = + #echo "ROOT ", renderTree(a), " <=? ", b.rendertree + let x = canon(opLe.buildCall(a, b)) + #echo renderTree(res) + result = ple(m, x[1], x[2]) + if result == impUnknown: + # try an alternative: a <= b iff not (b < a) iff not (b+1 <= a): + let y = canon(opLe.buildCall(opAdd.buildCall(b, one()), a)) + result = ~ple(m, y[1], y[2]) proc addFactLe*(m: var TModel; a, b: PNode) = m.add canon(opLe.buildCall(a, b)) diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index 704cfbcdd..2a1a8e577 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -13,6 +13,8 @@ const genPrefix* = ":tmp" # prefix for generated names import ast, astalgo, types, idents, magicsys, msgs, options +from guards import createMagic +from trees import getMagic proc newTupleAccess*(tup: PNode, i: int): PNode = result = newNodeIT(nkBracketExpr, tup.info, tup.typ.skipTypes( @@ -80,19 +82,23 @@ proc newDotExpr(obj, b: PSym): PNode = addSon(result, newSymNode(field)) result.typ = field.typ -proc indirectAccess*(a: PNode, b: PSym, info: TLineInfo): PNode = +proc indirectAccess*(a: PNode, b: string, info: TLineInfo): PNode = # returns a[].b as a node var deref = newNodeI(nkHiddenDeref, info) - deref.typ = a.typ.sons[0] + deref.typ = a.typ.skipTypes(abstractInst).sons[0] assert deref.typ.kind == tyObject - let field = getSymFromList(deref.typ.n, getIdent(b.name.s & $b.id)) - assert field != nil, b.name.s + let field = getSymFromList(deref.typ.n, getIdent(b)) + assert field != nil, b addSon(deref, a) result = newNodeI(nkDotExpr, info) addSon(result, deref) addSon(result, newSymNode(field)) result.typ = field.typ +proc indirectAccess*(a: PNode, b: PSym, info: TLineInfo): PNode = + # returns a[].b as a node + result = indirectAccess(a, b.name.s & $b.id, info) + proc indirectAccess*(a, b: PSym, info: TLineInfo): PNode = result = indirectAccess(newSymNode(a), b, info) @@ -102,6 +108,11 @@ proc genAddrOf*(n: PNode): PNode = result.typ = newType(tyPtr, n.typ.owner) result.typ.rawAddSon(n.typ) +proc genDeref*(n: PNode): PNode = + result = newNodeIT(nkHiddenDeref, n.info, + n.typ.skipTypes(abstractInst).sons[0]) + result.add n + proc callCodegenProc*(name: string, arg1: PNode; arg2, arg3: PNode = nil): PNode = result = newNodeI(nkCall, arg1.info) @@ -114,14 +125,83 @@ proc callCodegenProc*(name: string, arg1: PNode; if arg2 != nil: result.add arg2 if arg3 != nil: result.add arg3 +# we have 4 cases to consider: +# - a void proc --> nothing to do +# - a proc returning GC'ed memory --> requires a future +# - a proc returning non GC'ed memory --> pass as hidden 'var' parameter +# - not in a parallel environment --> requires a future for memory safety +type + TSpawnResult = enum + srVoid, srFuture, srByVar + TFutureKind = enum + futInvalid # invalid type T for 'Future[T]' + futGC # Future of a GC'ed type + futBlob # Future of a blob type + +proc spawnResult(t: PType; inParallel: bool): TSpawnResult = + if t.isEmptyType: srVoid + elif inParallel and not containsGarbageCollectedRef(t): srByVar + else: srFuture + +proc futureKind(t: PType): TFutureKind = + if t.skipTypes(abstractInst).kind in {tyRef, tyString, tySequence}: futGC + elif containsGarbageCollectedRef(t): futInvalid + else: futBlob + +discard """ +We generate roughly this: + +proc f_wrapper(args) = + var a = args.a # copy strings/seqs; thread transfer; not generated for + # the 'parallel' statement + var b = args.b + + args.fut = createFuture(thread, sizeof(T)) # optional + nimArgsPassingDone() # signal parent that the work is done + args.fut.blob = f(a, b, ...) + # - or - + f(a, b, ...) + +stmtList: + var scratchObj + scratchObj.a = a + scratchObj.b = b + + nimSpawn(f_wrapper, addr scratchObj) + scratchObj.fut # optional + +""" + +proc createNimCreateFutureCall(fut, threadParam: PNode): PNode = + let size = newNodeIT(nkCall, fut.info, getSysType(tyInt)) + size.add newSymNode(createMagic("sizeof", mSizeOf)) + assert fut.typ.kind == tyGenericInst + size.add newNodeIT(nkType, fut.info, fut.typ.sons[1]) + + let castExpr = newNodeIT(nkCast, fut.info, fut.typ) + castExpr.add emptyNode + castExpr.add callCodeGenProc("nimCreateFuture", threadParam, size) + result = newFastAsgnStmt(fut, castExpr) + proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; - varSection, call, barrier: PNode): PSym = + varSection, call, barrier, fut: PNode): PSym = var body = newNodeI(nkStmtList, f.info) body.add varSection if barrier != nil: body.add callCodeGenProc("barrierEnter", barrier) - body.add callCodeGenProc("nimArgsPassingDone", newSymNode(threadParam)) - body.add call + if fut != nil: + body.add createNimCreateFutureCall(fut, threadParam.newSymNode) + if barrier == nil: + body.add callCodeGenProc("nimFutureCreateCondVar", fut) + + body.add callCodeGenProc("nimArgsPassingDone", threadParam.newSymNode) + if fut != nil: + body.add newAsgnStmt(indirectAccess(fut, + if fut.typ.futureKind==futGC: "data" else: "blob", fut.info), call) + if barrier == nil: + body.add callCodeGenProc("nimFutureSignal", fut) + else: + body.add call if barrier != nil: body.add callCodeGenProc("barrierLeave", barrier) @@ -151,10 +231,148 @@ proc createCastExpr(argsParam: PSym; objType: PType): PNode = result.typ = newType(tyPtr, objType.owner) result.typ.rawAddSon(objType) -proc wrapProcForSpawn*(owner: PSym; n: PNode; barrier: PNode = nil): PNode = - result = newNodeI(nkStmtList, n.info) - if n.kind notin nkCallKinds or not n.typ.isEmptyType: - localError(n.info, "'spawn' takes a call expression of type void") +proc setupArgsForConcurrency(n: PNode; objType: PType; scratchObj: PSym, + castExpr, call, varSection, result: PNode) = + let formals = n[0].typ.n + let tmpName = getIdent(genPrefix) + for i in 1 .. <n.len: + # we pick n's type here, which hopefully is 'tyArray' and not + # 'tyOpenArray': + var argType = n[i].typ.skipTypes(abstractInst) + if i < formals.len and formals[i].typ.kind == tyVar: + localError(n[i].info, "'spawn'ed function cannot have a 'var' parameter") + elif containsTyRef(argType): + localError(n[i].info, "'spawn'ed function cannot refer to 'ref'/closure") + + let fieldname = if i < formals.len: formals[i].sym.name else: tmpName + var field = newSym(skField, fieldname, objType.owner, n.info) + field.typ = argType + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n[i]) + + var temp = newSym(skTemp, tmpName, objType.owner, n.info) + temp.typ = argType + incl(temp.flags, sfFromGeneric) + + var vpart = newNodeI(nkIdentDefs, n.info, 3) + vpart.sons[0] = newSymNode(temp) + vpart.sons[1] = ast.emptyNode + vpart.sons[2] = indirectAccess(castExpr, field, n.info) + varSection.add vpart + + call.add(newSymNode(temp)) + +proc getRoot*(n: PNode): PSym = + ## ``getRoot`` takes a *path* ``n``. A path is an lvalue expression + ## like ``obj.x[i].y``. The *root* of a path is the symbol that can be + ## determined as the owner; ``obj`` in the example. + case n.kind + of nkSym: + if n.sym.kind in {skVar, skResult, skTemp, skLet, skForVar}: + result = n.sym + of nkDotExpr, nkBracketExpr, nkHiddenDeref, nkDerefExpr, + nkObjUpConv, nkObjDownConv, nkCheckedFieldExpr: + result = getRoot(n.sons[0]) + of nkHiddenStdConv, nkHiddenSubConv, nkConv: + result = getRoot(n.sons[1]) + of nkCallKinds: + if getMagic(n) == mSlice: result = getRoot(n.sons[1]) + else: discard + +proc newIntLit(value: BiggestInt): PNode = + result = nkIntLit.newIntNode(value) + result.typ = getSysType(tyInt) + +proc genHigh(n: PNode): PNode = + if skipTypes(n.typ, abstractVar).kind in {tyArrayConstr, tyArray}: + result = newIntLit(lastOrd(skipTypes(n.typ, abstractVar))) + else: + result = newNodeI(nkCall, n.info, 2) + result.typ = getSysType(tyInt) + result.sons[0] = newSymNode(createMagic("high", mHigh)) + result.sons[1] = n + +proc setupArgsForParallelism(n: PNode; objType: PType; scratchObj: PSym; + castExpr, call, result: PNode) = + let formals = n[0].typ.n + let tmpName = getIdent(genPrefix) + for i in 1 .. <n.len: + let n = n[i] + let argType = skipTypes(if i < formals.len: formals[i].typ else: n.typ, + abstractInst) + if containsTyRef(argType): + localError(n.info, "'spawn'ed function cannot refer to 'ref'/closure") + + let fieldname = if i < formals.len: formals[i].sym.name else: tmpName + var field = newSym(skField, fieldname, objType.owner, n.info) + + if argType.kind in {tyVarargs, tyOpenArray}: + # important special case: we always create a zero-copy slice: + let slice = newNodeI(nkCall, n.info, 4) + slice.typ = n.typ + slice.sons[0] = newSymNode(createMagic("slice", mSlice)) + var fieldB = newSym(skField, tmpName, objType.owner, n.info) + fieldB.typ = getSysType(tyInt) + objType.addField(fieldB) + + if getMagic(n) == mSlice: + let a = genAddrOf(n[0]) + field.typ = a.typ + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a) + + var fieldA = newSym(skField, tmpName, objType.owner, n.info) + fieldA.typ = getSysType(tyInt) + objType.addField(fieldA) + result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldA), n[2]) + result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldB), n[3]) + + slice.sons[2] = indirectAccess(castExpr, fieldA, n.info) + else: + let a = genAddrOf(n) + field.typ = a.typ + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a) + result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldB), genHigh(n)) + + slice.sons[2] = newIntLit(0) + + slice.sons[1] = genDeref(indirectAccess(castExpr, field, n.info)) + slice.sons[3] = indirectAccess(castExpr, fieldB, n.info) + call.add slice + elif (let size = computeSize(argType); size < 0 or size > 16) and + n.getRoot != nil: + # it is more efficient to pass a pointer instead: + let a = genAddrOf(n) + field.typ = a.typ + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a) + call.add(genDeref(indirectAccess(castExpr, field, n.info))) + else: + # boring case + field.typ = argType + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n) + call.add(indirectAccess(castExpr, field, n.info)) + +proc wrapProcForSpawn*(owner: PSym; n: PNode; retType: PType; + barrier, dest: PNode = nil): PNode = + # if 'barrier' != nil, then it is in a 'parallel' section and we + # generate quite different code + let spawnKind = spawnResult(retType, barrier!=nil) + case spawnKind + of srVoid: + internalAssert dest == nil + result = newNodeI(nkStmtList, n.info) + of srFuture: + internalAssert dest == nil + result = newNodeIT(nkStmtListExpr, n.info, retType) + of srByVar: + if dest == nil: localError(n.info, "'spawn' must not be discarded") + result = newNodeI(nkStmtList, n.info) + + if n.kind notin nkCallKinds: + localError(n.info, "'spawn' takes a call expression") return if optThreadAnalysis in gGlobalOptions: if {tfThread, tfNoSideEffect} * n[0].typ.flags == {}: @@ -180,7 +398,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode; barrier: PNode = nil): PNode = varSectionB.addVar(scratchObj.newSymNode) result.add varSectionB - var call = newNodeI(nkCall, n.info) + var call = newNodeIT(nkCall, n.info, n.typ) var fn = n.sons[0] # templates and macros are in fact valid here due to the nature of # the transformation: @@ -200,34 +418,10 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode; barrier: PNode = nil): PNode = call.add(fn) var varSection = newNodeI(nkVarSection, n.info) - let formals = n[0].typ.n - let tmpName = getIdent(genPrefix) - for i in 1 .. <n.len: - # we pick n's type here, which hopefully is 'tyArray' and not - # 'tyOpenArray': - var argType = n[i].typ.skipTypes(abstractInst) - if i < formals.len and formals[i].typ.kind == tyVar: - localError(n[i].info, "'spawn'ed function cannot have a 'var' parameter") - elif containsTyRef(argType): - localError(n[i].info, "'spawn'ed function cannot refer to 'ref'/closure") - - let fieldname = if i < formals.len: formals[i].sym.name else: tmpName - var field = newSym(skField, fieldname, owner, n.info) - field.typ = argType - objType.addField(field) - result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n[i]) - - var temp = newSym(skTemp, tmpName, owner, n.info) - temp.typ = argType - incl(temp.flags, sfFromGeneric) - - var vpart = newNodeI(nkIdentDefs, n.info, 3) - vpart.sons[0] = newSymNode(temp) - vpart.sons[1] = ast.emptyNode - vpart.sons[2] = indirectAccess(castExpr, field, n.info) - varSection.add vpart - - call.add(newSymNode(temp)) + if barrier.isNil: + setupArgsForConcurrency(n, objType, scratchObj, castExpr, call, varSection, result) + else: + setupArgsForParallelism(n, objType, scratchObj, castExpr, call, result) var barrierAsExpr: PNode = nil if barrier != nil: @@ -239,7 +433,17 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode; barrier: PNode = nil): PNode = result.add newFastAsgnStmt(newDotExpr(scratchObj, field), barrier) barrierAsExpr = indirectAccess(castExpr, field, n.info) + var futField, futAsExpr: PNode = nil + if spawnKind == srFuture: + var field = newSym(skField, getIdent"fut", owner, n.info) + field.typ = retType + objType.addField(field) + futField = newDotExpr(scratchObj, field) + futAsExpr = indirectAccess(castExpr, field, n.info) + let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call, - barrierAsExpr) + barrierAsExpr, futAsExpr) result.add callCodeGenProc("nimSpawn", wrapper.newSymNode, genAddrOf(scratchObj.newSymNode)) + + if spawnKind == srFuture: result.add futField diff --git a/compiler/semmagic.nim b/compiler/semmagic.nim index 80e70b8c0..3a6bfcf67 100644 --- a/compiler/semmagic.nim +++ b/compiler/semmagic.nim @@ -115,6 +115,12 @@ proc semLocals(c: PContext, n: PNode): PNode = if it.typ.skipTypes({tyGenericInst}).kind == tyVar: a = newDeref(a) result.add(a) +proc createFuture(c: PContext; t: PType; info: TLineInfo): PType = + result = newType(tyGenericInvokation, c.module) + addSonSkipIntLit(result, magicsys.getCompilerProc("Future").typ) + addSonSkipIntLit(result, t) + result = instGenericContainer(c, info, result, allowMetaTypes = false) + proc semShallowCopy(c: PContext, n: PNode, flags: TExprFlags): PNode proc magicsAfterOverloadResolution(c: PContext, n: PNode, flags: TExprFlags): PNode = @@ -130,5 +136,9 @@ proc magicsAfterOverloadResolution(c: PContext, n: PNode, of mShallowCopy: result = semShallowCopy(c, n, flags) of mNBindSym: result = semBindSym(c, n) of mLocals: result = semLocals(c, n) + of mSpawn: + result = n + # later passes may transform the type 'Future[T]' back into 'T' + if not n[1].typ.isEmptyType: + result.typ = createFuture(c, n[1].typ, n.info) else: result = n - diff --git a/compiler/semparallel.nim b/compiler/semparallel.nim index 7917cab90..b13542038 100644 --- a/compiler/semparallel.nim +++ b/compiler/semparallel.nim @@ -9,8 +9,8 @@ ## Semantic checking for 'parallel'. -# - codegen needs to support mSlice -# - lowerings must not perform unnecessary copies +# - codegen needs to support mSlice (+) +# - lowerings must not perform unnecessary copies (+) # - slices should become "nocopy" to openArray (+) # - need to perform bound checks (+) # @@ -19,7 +19,7 @@ # - what about 'f(a)'? --> f shouldn't have side effects anyway # - passed arrays need to be ensured not to alias # - passed slices need to be ensured to be disjoint (+) -# - output slices need special logic +# - output slices need special logic (+) import ast, astalgo, idents, lowerings, magicsys, guards, sempass2, msgs, @@ -94,23 +94,6 @@ proc getSlot(c: var AnalysisCtx; v: PSym): ptr MonotonicVar = c.locals[L].v = v return addr(c.locals[L]) -proc getRoot(n: PNode): PSym = - ## ``getRoot`` takes a *path* ``n``. A path is an lvalue expression - ## like ``obj.x[i].y``. The *root* of a path is the symbol that can be - ## determined as the owner; ``obj`` in the example. - case n.kind - of nkSym: - if n.sym.kind in {skVar, skResult, skTemp, skLet, skForVar}: - result = n.sym - of nkDotExpr, nkBracketExpr, nkHiddenDeref, nkDerefExpr, - nkObjUpConv, nkObjDownConv, nkCheckedFieldExpr: - result = getRoot(n.sons[0]) - of nkHiddenStdConv, nkHiddenSubConv, nkConv: - result = getRoot(n.sons[1]) - of nkCallKinds: - if getMagic(n) == mSlice: result = getRoot(n.sons[1]) - else: discard - proc gatherArgs(c: var AnalysisCtx; n: PNode) = for i in 0.. <n.safeLen: let root = getRoot n[i] @@ -184,8 +167,6 @@ proc overlap(m: TModel; x,y,c,d: PNode) = of impNo: discard proc stride(c: AnalysisCtx; n: PNode): BiggestInt = - # note: 0 if it cannot be determined is just right because then - # we analyse 'i..i' and 'i+0 .. i+0' and these are not disjoint! if isLocal(n): let s = c.lookupSlot(n.sym) if s >= 0 and c.locals[s].stride != nil: @@ -193,6 +174,20 @@ proc stride(c: AnalysisCtx; n: PNode): BiggestInt = else: for i in 0 .. <n.safeLen: result += stride(c, n.sons[i]) +proc subStride(c: AnalysisCtx; n: PNode): PNode = + # substitute with stride: + if isLocal(n): + let s = c.lookupSlot(n.sym) + if s >= 0 and c.locals[s].stride != nil: + result = n +@ c.locals[s].stride.intVal + else: + result = n + elif n.safeLen > 0: + result = shallowCopy(n) + for i in 0 .. <n.len: result.sons[i] = subStride(c, n.sons[i]) + else: + result = n + proc checkSlicesAreDisjoint(c: var AnalysisCtx) = # this is the only thing that we need to perform after we have traversed # the whole tree so that the strides are available. @@ -200,7 +195,7 @@ proc checkSlicesAreDisjoint(c: var AnalysisCtx) = addLowerBoundAsFacts(c) # Every slice used in a loop needs to be disjoint with itself: for x,a,b,id,inLoop in items(c.slices): - if inLoop: overlap(c.guards, a,b, a+@c.stride(a), b+@c.stride(b)) + if inLoop: overlap(c.guards, a,b, c.subStride(a), c.subStride(b)) # Another tricky example is: # while true: # spawn f(a[i]) @@ -283,23 +278,19 @@ proc analyseCall(c: var AnalysisCtx; n: PNode; op: PSym) = proc analyseCase(c: var AnalysisCtx; n: PNode) = analyse(c, n.sons[0]) - #let oldState = c.locals.len let oldFacts = c.guards.len for i in 1.. <n.len: let branch = n.sons[i] - #setLen(c.locals, oldState) setLen(c.guards, oldFacts) addCaseBranchFacts(c.guards, n, i) for i in 0 .. <branch.len: analyse(c, branch.sons[i]) - #setLen(c.locals, oldState) setLen(c.guards, oldFacts) proc analyseIf(c: var AnalysisCtx; n: PNode) = analyse(c, n.sons[0].sons[0]) let oldFacts = c.guards.len addFact(c.guards, n.sons[0].sons[0]) - #let oldState = c.locals.len analyse(c, n.sons[0].sons[1]) for i in 1.. <n.len: @@ -309,10 +300,8 @@ proc analyseIf(c: var AnalysisCtx; n: PNode) = addFactNeg(c.guards, n.sons[j].sons[0]) if branch.len > 1: addFact(c.guards, branch.sons[0]) - #setLen(c.locals, oldState) for i in 0 .. <branch.len: analyse(c, branch.sons[i]) - #setLen(c.locals, oldState) setLen(c.guards, oldFacts) proc analyse(c: var AnalysisCtx; n: PNode) = @@ -390,17 +379,40 @@ proc transformSlices(n: PNode): PNode = else: result = n +proc transformSpawn(owner: PSym; n, barrier: PNode): PNode +proc transformSpawnSons(owner: PSym; n, barrier: PNode): PNode = + result = shallowCopy(n) + for i in 0 .. < n.len: + result.sons[i] = transformSpawn(owner, n.sons[i], barrier) + proc transformSpawn(owner: PSym; n, barrier: PNode): PNode = - if n.kind in nkCallKinds: - if n[0].kind == nkSym: - let op = n[0].sym - if op.magic == mSpawn: - result = transformSlices(n) - return wrapProcForSpawn(owner, result[1], barrier) + case n.kind + of nkVarSection: + result = nil + for it in n: + let b = it.lastSon + if getMagic(b) == mSpawn: + if it.len != 3: localError(it.info, "invalid context for 'spawn'") + let m = transformSlices(b) + if result.isNil: + result = newNodeI(nkStmtList, n.info) + result.add n + result.add wrapProcForSpawn(owner, m[1], b.typ, barrier, it[0]) + it.sons[it.len-1] = emptyNode + if result.isNil: result = n + of nkAsgn, nkFastAsgn: + let b = n[1] + if getMagic(b) == mSpawn: + let m = transformSlices(b) + return wrapProcForSpawn(owner, m[1], b.typ, barrier, n[0]) + result = transformSpawnSons(owner, n, barrier) + of nkCallKinds: + if getMagic(n) == mSpawn: + result = transformSlices(n) + return wrapProcForSpawn(owner, result[1], n.typ, barrier, nil) + result = transformSpawnSons(owner, n, barrier) elif n.safeLen > 0: - result = shallowCopy(n) - for i in 0 .. < n.len: - result.sons[i] = transformSpawn(owner, n.sons[i], barrier) + result = transformSpawnSons(owner, n, barrier) else: result = n @@ -440,3 +452,4 @@ proc liftParallel*(owner: PSym; n: PNode): PNode = result.add callCodeGenProc("openBarrier", barrier) result.add transformSpawn(owner, body, barrier) result.add callCodeGenProc("closeBarrier", barrier) + diff --git a/doc/manual.txt b/doc/manual.txt index 39e2bad2a..b2e008969 100644 --- a/doc/manual.txt +++ b/doc/manual.txt @@ -2748,7 +2748,7 @@ The following builtin procs cannot be overloaded for reasons of implementation simplicity (they require specialized semantic checking):: defined, definedInScope, compiles, low, high, sizeOf, - is, of, echo, shallowCopy, getAst + is, of, echo, shallowCopy, getAst, spawn Thus they act more like keywords than like ordinary identifiers; unlike a keyword however, a redefinition may `shadow`:idx: the definition in diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim index 86819d25a..583c60c66 100644 --- a/lib/pure/concurrency/threadpool.nim +++ b/lib/pure/concurrency/threadpool.nim @@ -65,6 +65,30 @@ proc closeBarrier*(b: ptr Barrier) {.compilerProc.} = # ---------------------------------------------------------------------------- type + AwaitInfo = object + cv: CondVar + idx: int + + RawFuture* = ptr RawFutureObj ## untyped base class for 'Future[T]' + RawFutureObj {.inheritable.} = object # \ + # we allocate this with the thread local allocator; this + # is possible since we already need to do the GC_unref + # on the owning thread + ready, usesCondVar: bool + cv: CondVar #\ + # for 'awaitAny' support + ai: ptr AwaitInfo + idx: int + data: PObject # we incRef and unref it to keep it alive + owner: ptr Worker + next: RawFuture + align: float64 # a float for proper alignment + + Future* {.compilerProc.} [T] = ptr object of RawFutureObj + blob: T ## the underlying value, if available. Note that usually + ## you should not access this field directly! However it can + ## sometimes be more efficient than getting the value via ``^``. + WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.} Worker = object taskArrived: CondVar @@ -75,6 +99,92 @@ type ready: bool # put it here for correct alignment! initialized: bool # whether it has even been initialized shutdown: bool # the pool requests to shut down this worker thread + futureLock: TLock + head: RawFuture + +proc finished*(fut: RawFuture) = + ## This MUST be called for every created future to free its associated + ## resources. Note that the default reading operation ``^`` is destructive + ## and calls ``finished``. + doAssert fut.ai.isNil, "future is still attached to an 'awaitAny'" + assert fut.next == nil + let w = fut.owner + acquire(w.futureLock) + fut.next = w.head + w.head = fut + release(w.futureLock) + +proc cleanFutures(w: ptr Worker) = + var it = w.head + acquire(w.futureLock) + while it != nil: + let nxt = it.next + if it.usesCondVar: destroyCondVar(it.cv) + if it.data != nil: GC_unref(it.data) + dealloc(it) + it = nxt + w.head = nil + release(w.futureLock) + +proc nimCreateFuture(owner: pointer; blobSize: int): RawFuture {. + compilerProc.} = + result = cast[RawFuture](alloc0(RawFutureObj.sizeof + blobSize)) + result.owner = cast[ptr Worker](owner) + +proc nimFutureCreateCondVar(fut: RawFuture) {.compilerProc.} = + fut.cv = createCondVar() + fut.usesCondVar = true + +proc nimFutureSignal(fut: RawFuture) {.compilerProc.} = + assert fut.usesCondVar + signal(fut.cv) + +proc await*[T](fut: Future[T]) = + ## waits until the value for the future arrives. + if fut.usesCondVar: await(fut.cv) + +proc `^`*[T](fut: Future[T]): T = + ## blocks until the value is available and then returns this value. Note + ## this reading is destructive for reasons of efficiency and convenience. + ## This calls ``finished(fut)``. + await(fut) + when T is string or T is seq or T is ref: + result = cast[T](fut.data) + else: + result = fut.payload + finished(fut) + +proc notify*(fut: RawFuture) {.compilerproc.} = + if fut.ai != nil: + acquire(fut.ai.cv.L) + fut.ai.idx = fut.idx + inc fut.ai.cv.counter + release(fut.ai.cv.L) + signal(fut.ai.cv.c) + if fut.usesCondVar: signal(fut.cv) + +proc awaitAny*(futures: openArray[RawFuture]): int = + # awaits any of the given futures. Returns the index of one future for which + ## a value arrived. A future only supports one call to 'awaitAny' at the + ## same time. That means if you await([a,b]) and await([b,c]) the second + ## call will only await 'c'. If there is no future left to be able to wait + ## on, -1 is returned. + var ai: AwaitInfo + ai.cv = createCondVar() + var conflicts = 0 + for i in 0 .. futures.high: + if cas(addr futures[i].ai, nil, addr ai): + futures[i].idx = i + else: + inc conflicts + if conflicts < futures.len: + await(ai.cv) + result = ai.idx + for i in 0 .. futures.high: + discard cas(addr futures[i].ai, addr ai, nil) + else: + result = -1 + destroyCondVar(ai.cv) proc nimArgsPassingDone(p: pointer) {.compilerProc.} = let w = cast[ptr Worker](p) @@ -99,6 +209,7 @@ proc slave(w: ptr Worker) {.thread.} = await(w.taskArrived) assert(not w.ready) w.f(w, w.data) + if w.head != nil: w.cleanFutures if w.shutdown: w.shutdown = false atomicDec currentPoolSize @@ -119,6 +230,7 @@ var proc activateThread(i: int) {.noinline.} = workersData[i].taskArrived = createCondVar() workersData[i].taskStarted = createCondVar() + initLock workersData[i].futureLock workersData[i].initialized = true createThread(workers[i], slave, addr(workersData[i])) diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim index c6c603b19..96246ba01 100644 --- a/lib/system/atomics.nim +++ b/lib/system/atomics.nim @@ -209,12 +209,12 @@ when defined(windows) and not defined(gcc): proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32 {.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.} - proc cas*[T: bool|int](p: ptr T; oldValue, newValue: T): bool = + proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool = interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0 - + # XXX fix for 64 bit build else: # this is valid for GCC and Intel C++ - proc cas*[T: bool|int](p: ptr T; oldValue, newValue: T): bool + proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool {.importc: "__sync_bool_compare_and_swap", nodecl.} # XXX is this valid for 'int'? diff --git a/tests/parallel/tdisjoint_slice1.nim b/tests/parallel/tdisjoint_slice1.nim index 2ca96d6ae..c1d0e52f8 100644 --- a/tests/parallel/tdisjoint_slice1.nim +++ b/tests/parallel/tdisjoint_slice1.nim @@ -1,20 +1,20 @@ +discard """ + outputsub: "EVEN 28" +""" import threadpool -proc f(a: openArray[int]) = - for x in a: echo x - -proc f(a: int) = echo a +proc odd(a: int) = echo "ODD ", a +proc even(a: int) = echo "EVEN ", a proc main() = var a: array[0..30, int] + for i in low(a)..high(a): a[i] = i parallel: - #spawn f(a[0..15]) - #spawn f(a[16..30]) var i = 0 while i <= 29: - spawn f(a[i]) - spawn f(a[i+1]) + spawn even(a[i]) + spawn odd(a[i+1]) inc i, 2 # is correct here diff --git a/tests/parallel/tinvalid_array_bounds.nim b/tests/parallel/tinvalid_array_bounds.nim index 337fae729..4c6065fd6 100644 --- a/tests/parallel/tinvalid_array_bounds.nim +++ b/tests/parallel/tinvalid_array_bounds.nim @@ -1,5 +1,5 @@ discard """ - errormsg: "cannot prove: i + 1 <= 30" + errormsg: "can prove: i + 1 > 30" line: 21 """ |