diff options
author | Araq <rumpf_a@web.de> | 2014-05-12 11:12:37 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2014-05-12 11:12:37 +0200 |
commit | 6195dbe491ccd864c5dcb59f87826291ac1f1ff4 (patch) | |
tree | d2a549857984a296ab4b3e2be72745d413da005a | |
parent | bdb2d21f276c10aee122218384e568ef843690fa (diff) | |
download | Nim-6195dbe491ccd864c5dcb59f87826291ac1f1ff4.tar.gz |
initial non-compiling version of 'parallel'
-rw-r--r-- | compiler/guards.nim | 191 | ||||
-rw-r--r-- | compiler/lowerings.nim | 22 | ||||
-rw-r--r-- | compiler/semparallel.nim | 414 | ||||
-rw-r--r-- | compiler/sempass2.nim | 4 | ||||
-rw-r--r-- | compiler/vm.nim | 5 | ||||
-rw-r--r-- | config/nimrod.cfg | 1 | ||||
-rw-r--r-- | lib/pure/concurrency/cpuinfo.nim | 58 | ||||
-rw-r--r-- | lib/pure/concurrency/cpuload.nim | 96 | ||||
-rw-r--r-- | lib/pure/concurrency/threadpool.nim | 210 | ||||
-rw-r--r-- | lib/pure/osproc.nim | 38 | ||||
-rw-r--r-- | lib/system.nim | 3 | ||||
-rw-r--r-- | lib/system/atomics.nim | 31 | ||||
-rw-r--r-- | lib/system/sysspawn.nim | 47 | ||||
-rw-r--r-- | tests/system/tsysspawn.nim | 10 | ||||
-rw-r--r-- | tests/system/tsysspawnbadarg.nim | 2 | ||||
-rw-r--r-- | web/news.txt | 17 |
16 files changed, 1058 insertions, 91 deletions
diff --git a/compiler/guards.nim b/compiler/guards.nim index f475f5068..57cd73b11 100644 --- a/compiler/guards.nim +++ b/compiler/guards.nim @@ -9,7 +9,8 @@ ## This module implements the 'implies' relation for guards. -import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents +import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents, + saturate const someEq = {mEqI, mEqI64, mEqF64, mEqEnum, mEqCh, mEqB, mEqRef, mEqProc, @@ -25,6 +26,17 @@ const someIn = {mInRange, mInSet} + someHigh = {mHigh} + # we don't list unsigned here because wrap around semantics suck for + # proving anything: + someAdd = {mAddI, mAddI64, mAddF64, mSucc} + someSub = {mSubI, mSubI64, mSubF64, mPred} + someMul = {mMulI, mMulI64, mMulF64} + someDiv = {mDivI, mDivI64, mDivF64} + someMod = {mModI, mModI64} + someMax = {mMaxI, mMaxI64, mMaxF64} + someMin = {mMinI, mMinI64, mMinF64} + proc isValue(n: PNode): bool = n.kind in {nkCharLit..nkNilLit} proc isLocation(n: PNode): bool = not n.isValue @@ -69,19 +81,24 @@ proc isLetLocation(m: PNode, isApprox: bool): bool = proc interestingCaseExpr*(m: PNode): bool = isLetLocation(m, true) -proc getMagicOp(name: string, m: TMagic): PSym = +proc createMagic*(name: string, m: TMagic): PSym = result = newSym(skProc, getIdent(name), nil, unknownLineInfo()) result.magic = m let - opLe = getMagicOp("<=", mLeI) - opLt = getMagicOp("<", mLtI) - opAnd = getMagicOp("and", mAnd) - opOr = getMagicOp("or", mOr) - opNot = getMagicOp("not", mNot) - opIsNil = getMagicOp("isnil", mIsNil) - opContains = getMagicOp("contains", mInSet) - opEq = getMagicOp("==", mEqI) + opLe = createMagic("<=", mLeI) + opLt = createMagic("<", mLtI) + opAnd = createMagic("and", mAnd) + opOr = createMagic("or", mOr) + opNot = createMagic("not", mNot) + opIsNil = createMagic("isnil", mIsNil) + opContains = createMagic("contains", mInSet) + opEq = createMagic("==", mEqI) + opAdd = createMagic("+", mAddI) + opSub = createMagic("-", mSubI) + opMul = createMagic("*", mMulI) + opDiv = createMagic("div", mDivI) + opLen = createMagic("len", mLengthSeq) proc swapArgs(fact: PNode, newOp: PSym): PNode = result = newNodeI(nkCall, fact.info, 3) @@ -137,17 +154,118 @@ proc neg(n: PNode): PNode = result.sons[0] = newSymNode(opNot) result.sons[1] = n -proc buildIsNil(arg: PNode): PNode = - result = newNodeI(nkCall, arg.info, 2) - result.sons[0] = newSymNode(opIsNil) - result.sons[1] = arg +proc buildCall(op: PSym; a: PNode): PNode = + result = newNodeI(nkCall, a.info, 2) + result.sons[0] = newSymNode(op) + result.sons[1] = a + +proc buildCall(op: PSym; a, b: PNode): PNode = + result = newNodeI(nkCall, a.info, 3) + result.sons[0] = newSymNode(op) + result.sons[1] = a + result.sons[2] = b + +proc `+@`*(a: PNode; b: BiggestInt): PNode = + opAdd.buildCall(a, nkIntLit.newIntNode(b)) + +proc `|+|`(a, b: PNode): PNode = + result = copyNode(a) + if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |+| b.intVal + else: result.floatVal = a.floatVal + b.floatVal + +proc `|*|`(a, b: PNode): PNode = + result = copyNode(a) + if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |*| b.intVal + else: result.floatVal = a.floatVal * b.floatVal + +proc zero(): PNode = nkIntLit.newIntNode(0) +proc one(): PNode = nkIntLit.newIntNode(1) +proc minusOne(): PNode = nkIntLit.newIntNode(-1) + +proc lowBound*(x: PNode): PNode = nkIntLit.newIntNode(firstOrd(x.typ)) +proc highBound*(x: PNode): PNode = + if x.typ.skipTypes(abstractInst).kind == tyArray: + nkIntLit.newIntNode(lastOrd(x.typ)) + else: + opAdd.buildCall(opLen.buildCall(x), minusOne()) + +proc canon*(n: PNode): PNode = + # XXX for now only the new code in 'semparallel' uses this + if n.safeLen >= 1: + result = newNodeI(n.kind, n.info, n.len) + for i in 0 .. < n.safeLen: + result.sons[i] = canon(n.sons[i]) + else: + result = n + case result.getMagic + of someEq, someAdd, someMul, someMin, someMax: + # these are symmetric; put value as last: + if result.sons[1].isValue and not result.sons[2].isValue: + result = swapArgs(result, result.sons[0].sym) + # (4 + foo) + 2 --> (foo + 4) + 2 + of someHigh: + # high == len+(-1) + result = opAdd.buildCall(opLen.buildCall(result[1]), minusOne()) + of mUnaryMinusI, mUnaryMinusI64: + result = buildCall(opAdd, result[1], newIntNode(nkIntLit, -1)) + of someSub: + # x - 4 --> x + (-4) + var b = result[2] + if b.kind in {nkCharLit..nkUInt64Lit} and b.intVal != low(BiggestInt): + b = copyNode(b) + b.intVal = -b.intVal + result = buildCall(opAdd, result[1], b) + elif b.kind in {nkFloatLit..nkFloat64Lit}: + b = copyNode(b) + b.floatVal = -b.floatVal + result = buildCall(opAdd, result[1], b) + of someLen: + result.sons[0] = opLen.newSymNode + else: discard + + # re-association: + # (foo+5)+5 --> foo+10; same for '*' + case result.getMagic + of someAdd: + if result[2].isValue and + result[1].getMagic in someAdd and result[1][2].isValue: + result = opAdd.buildCall(result[1][1], result[1][2] |+| result[2]) + of someMul: + if result[2].isValue and + result[1].getMagic in someMul and result[1][2].isValue: + result = opAdd.buildCall(result[1][1], result[1][2] |*| result[2]) + else: discard + + # most important rule: (x-4) < a.len --> x < a.len+4 + case result.getMagic + of someLe, someLt: + let x = result[1] + let y = result[2] + if x.kind in nkCallKinds and x.len == 3 and x[2].isValue and + isLetLocation(x[1], true): + case x.getMagic + of someSub: + result = buildCall(result[0].sym, x[1], opAdd.buildCall(y, x[2])) + of someAdd: + result = buildCall(result[0].sym, x[1], opSub.buildCall(y, x[2])) + else: discard + elif y.kind in nkCallKinds and y.len == 3 and y[2].isValue and + isLetLocation(y[1], true): + # a.len < x-3 + case y.getMagic + of someSub: + result = buildCall(result[0].sym, y[1], opAdd.buildCall(x, y[2])) + of someAdd: + result = buildCall(result[0].sym, y[1], opSub.buildCall(x, y[2])) + else: discard + else: discard proc usefulFact(n: PNode): PNode = case n.getMagic of someEq: if skipConv(n.sons[2]).kind == nkNilLit and ( isLetLocation(n.sons[1], false) or isVar(n.sons[1])): - result = buildIsNil(n.sons[1]) + result = opIsNil.buildCall(n.sons[1]) else: if isLetLocation(n.sons[1], true) or isLetLocation(n.sons[2], true): # XXX algebraic simplifications! 'i-1 < a.len' --> 'i < a.len+1' @@ -217,7 +335,7 @@ proc addFactNeg*(m: var TModel, n: PNode) = let n = n.neg if n != nil: addFact(m, n) -proc sameTree(a, b: PNode): bool = +proc sameTree*(a, b: PNode): bool = result = false if a == b: result = true @@ -519,7 +637,46 @@ proc doesImply*(facts: TModel, prop: PNode): TImplication = if result != impUnknown: return proc impliesNotNil*(facts: TModel, arg: PNode): TImplication = - result = doesImply(facts, buildIsNil(arg).neg) + result = doesImply(facts, opIsNil.buildCall(arg).neg) + +proc proveLe*(m: TModel; a, b: PNode): TImplication = + let res = canon(opLe.buildCall(a, b)) + # we hardcode lots of axioms here: + let a = res[1] + let b = res[2] + # 0 <= 3 + if a.isValue and b.isValue: + return if leValue(a, b): impYes else: impNo + + # use type information too: x <= 4 iff high(x) <= 4 + if b.isValue and a.typ != nil and a.typ.isOrdinalType: + if lastOrd(a.typ) <= b.intVal: return impYes + # 3 <= x iff low(x) <= 3 + if a.isValue and b.typ != nil and b.typ.isOrdinalType: + if firstOrd(b.typ) <= a.intVal: return impYes + + # x <= x + if sameTree(a, b): return impYes + + # x <= x+c iff 0 <= c + if b.getMagic in someAdd and sameTree(a, b[1]): + return proveLe(m, zero(), b[2]) + + # x <= x*c if 1 <= c and 0 <= x: + if b.getMagic in someMul and sameTree(a, b[1]): + if proveLe(m, one(), b[2]) == impYes and proveLe(m, zero(), a) == impYes: + return impYes + + # x div c <= x if 1 <= c and 0 <= x: + if a.getMagic in someDiv and sameTree(a[1], b): + if proveLe(m, one(), a[2]) == impYes and proveLe(m, zero(), b) == impYes: + return impYes + + # use the knowledge base: + return doesImply(m, res) + +proc addFactLe*(m: var TModel; a, b: PNode) = + m.add canon(opLe.buildCall(a, b)) proc settype(n: PNode): PType = result = newType(tySet, n.typ.owner) diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index 1b9e5fe0f..93bfd8425 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -114,11 +114,15 @@ proc callCodegenProc*(name: string, arg1: PNode; if arg3 != nil: result.add arg3 proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; - varSection, call: PNode): PSym = + varSection, call, barrier: PNode): PSym = var body = newNodeI(nkStmtList, f.info) body.add varSection + if barrier != nil: + body.add callCodeGenProc("barrierEnter", barrier) body.add callCodeGenProc("nimArgsPassingDone", newSymNode(threadParam)) body.add call + if barrier != nil: + body.add callCodeGenProc("barrierLeave", barrier) var params = newNodeI(nkFormalParams, f.info) params.add emptyNode @@ -146,7 +150,7 @@ proc createCastExpr(argsParam: PSym; objType: PType): PNode = result.typ = newType(tyPtr, objType.owner) result.typ.rawAddSon(objType) -proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = +proc wrapProcForSpawn*(owner: PSym; n: PNode; barrier: PNode = nil): PNode = result = newNodeI(nkStmtList, n.info) if n.kind notin nkCallKinds or not n.typ.isEmptyType: localError(n.info, "'spawn' takes a call expression of type void") @@ -162,6 +166,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = threadParam.typ = ptrType argsParam.typ = ptrType argsParam.position = 1 + var objType = createObj(owner, n.info) incl(objType.flags, tfFinal) let castExpr = createCastExpr(argsParam, objType) @@ -223,6 +228,17 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = call.add(newSymNode(temp)) - let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call) + var barrierAsExpr: PNode = nil + if barrier != nil: + let typ = newType(tyPtr, owner) + typ.rawAddSon(magicsys.getCompilerProc("Barrier").typ) + var field = newSym(skField, getIdent"barrier", owner, n.info) + field.typ = typ + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), barrier) + barrierAsExpr = indirectAccess(castExpr, field, n.info) + + let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call, + barrierAsExpr) result.add callCodeGenProc("nimSpawn", wrapper.newSymNode, genAddrOf(scratchObj.newSymNode)) diff --git a/compiler/semparallel.nim b/compiler/semparallel.nim new file mode 100644 index 000000000..34a1f3af8 --- /dev/null +++ b/compiler/semparallel.nim @@ -0,0 +1,414 @@ +# +# +# The Nimrod Compiler +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Semantic checking for 'parallel'. + +# - slices should become "nocopy" to openArray (+) +# - need to perform bound checks (+) +# +# - parallel needs to insert a barrier (+) +# - passed arguments need to be ensured to be "const" +# - what about 'f(a)'? --> f shouldn't have side effects anyway +# - passed arrays need to be ensured not to alias +# - passed slices need to be ensured to be disjoint (+) +# - output slices need special logic + +import lowerings, guards, sempass2 + +discard """ + +one major problem: + spawn f(a[i]) + inc i + spawn f(a[i]) +is valid, but + spawn f(a[i]) + spawn f(a[i]) + inc i +is not! However, + spawn f(a[i]) + if guard: inc i + spawn f(a[i]) +is not valid either! --> We need a flow dependent analysis here. + +However: + while foo: + spawn f(a[i]) + inc i + spawn f(a[i]) + +Is not valid either! --> We should really restrict 'inc' to loop endings? + +The heuristic that we implement here (that has no false positives) is: Usage +of 'i' in a slice *after* we determined the stride is invalid! +""" + +type + TDirection = enum + ascending, descending + MonotonicVar = object + v: PSym + lower, upper, stride: PNode + dir: TDirection + blacklisted: bool # blacklisted variables that are not monotonic + AnalysisCtx = object + locals: seq[MonotonicVar] + slices: seq[tuple[x,a,b: PNode, spawnId: int, inLoop: bool]] + guards: TModel # nested guards + args: seq[PSym] # args must be deeply immutable + spawns: int # we can check that at last 1 spawn is used in + # the 'parallel' section + currentSpawnId: int + inLoop: int + +let opSlice = createMagic("slice", mSlice) + +proc initAnalysisCtx(): AnalysisCtx = + result.locals = @[] + result.slices = @[] + result.args = @[] + result.guards = @[] + +proc getSlot(c: var AnalysisCtx; s: PSym): ptr MonotonicVar = + var L = c.locals.len + for i in 0.. <L: + if c.locals[i].v == s: return addr(c.locals[i]) + c.locals.setLen(L+1) + c.locals[L].v = s + return addr(c.locals[L]) + +proc getRoot(n: PNode): PSym = + ## ``getRoot`` takes a *path* ``n``. A path is an lvalue expression + ## like ``obj.x[i].y``. The *root* of a path is the symbol that can be + ## determined as the owner; ``obj`` in the example. + case n.kind + of nkSym: + if n.sym.kind in {skVar, skResult, skTemp, skLet, skForVar}: + result = n.sym + of nkDotExpr, nkBracketExpr, nkHiddenDeref, nkDerefExpr, + nkObjUpConv, nkObjDownConv, nkCheckedFieldExpr: + result = getRoot(n.sons[0]) + of nkHiddenStdConv, nkHiddenSubConv, nkConv: + result = getRoot(n.sons[1]) + of nkCallKinds: + if getMagic(n) == mSlice: result = getRoot(n.sons[1]) + else: discard + +proc gatherArgs(c: var AnalysisCtx; n: PNode) = + for i in 0.. <n.safeLen: + let root = getRoot n[i] + if root != nil: + block addRoot: + for r in items(c.args): + if r == root: break addRoot + c.args.add root + gatherArgs(c, n[i]) + +proc isLocal(s: PSym): bool = + s.kind in {skResult, skTemp, skForVar, skVar, skLet} and + {sfAddrTaken, sfGlobal} * s.flags == {} + +proc checkLocal(c: var AnalysisCtx; n: PNode) = + if n.kind == nkSym and isLocal(n.sym): + let slot = c.getSlot(n[1].sym) + if slot.stride != nil: + localError(n.info, "invalid usage of counter after increment") + else: + for i in 0 .. <n.safeLen: checkLocal(c, n.sons[i]) + +proc checkLe(c: AnalysisCtx; a, b: PNode) = + case proveLe(c.guards, a, b) + of impUnkown: + localError(n.info, "cannot prove: " & a.renderTree & " <= " & b.renderTree) + of impYes: discard + of impNo: + localError(n.info, "can prove: " & a.renderTree & " > " & b.renderTree) + +proc checkBounds(c: AnalysisCtx; arr, idx: PNode) = + checkLe(c, arr.lowBound, idx) + checkLe(c, idx, arr.highBound) + +proc addLowerBoundAsFacts(c: var AnalysisCtx) = + for v in c.locals: + if not v.blacklisted: + c.guards.addFactLe(v.lower, newSymNode(v.v)) + +proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: int) = + checkLocal(c, n) + let le = n.sons[le] + let ri = n.sons[ri] + let x = n.sons[x] + # perform static bounds checking here; and not later! + let oldState = c.guards.len + addLowerBoundAsFacts(c) + c.checkBounds(x, le) + c.checkBounds(x, ri) + c.guards.setLen(oldState) + c.slices.add((x, le, ri, c.currentSpawnId, c.inLoop > 0)) + +template `?`(x): expr = x.renderTree + +proc overlap(m: TModel; x,y,c,d: PNode) = + # X..Y and C..D overlap iff (X <= D and Y >= C) + case proveLe(m, x, d) + of impUnkown: + localError(x.info, + "cannot prove: $# > $#; required for $#..$# disjoint from $#..$#" % + [?x, ?d, ?x, ?y, ?c, ?d]) + of impYes: + case proveLe(m, y, c) + of impUnknown: + localError(x.info, + "cannot prove: $# > $#; required for $#..$# disjoint from $#..$#" % + [?y, ?d, ?x, ?y, ?c, ?d]) + of impYes: + localError(x.info, "$#..$# not disjoint from $#..$#" % [?x, ?y, ?c, ?d]) + of impNo: discard + of impNo: discard + +proc stride(c: AnalysisCtx; n: PNode): BiggestInt = + # note: 0 if it cannot be determined is just right because then + # we analyse 'i..i' and 'i+0 .. i+0' and these are not disjoint! + if n.kind == nkSym and isLocal(n.sym): + let slot = c.getSlot(n[1].sym) + if slot.stride != nil: + result = slot.stride.intVal + else: + for i in 0 .. <n.safeLen: inc(result, stride(c, n.sons[i])) + +proc checkSlicesAreDisjoint(c: var AnalysisCtx) = + # this is the only thing that we need to perform after we have traversed + # the whole tree so that the strides are available. + # First we need to add all the computed lower bounds: + addLowerBoundAsFacts(c) + # Every slice used in a loop needs to be disjoint with itself: + for x,a,b,id,inLoop in items(c.slices): + if inLoop: overlap(c.guards, a,b, a+@c.stride(a), b+@c.stride(b)) + # Another tricky example is: + # while true: + # spawn f(a[i]) + # spawn f(a[i+1]) + # inc i # inc i, 2 would be correct here + # + # Or even worse: + # while true: + # spawn f(a[i+1 .. i+3]) + # spawn f(a[i+4 .. i+5]) + # inc i, 4 + # Prove that i*k*stride + 3 != i*k'*stride + 5 + # For the correct example this amounts to + # i*k*2 != i*k'*2 + 1 + # which is true. + # For now, we don't try to prove things like that at all, even though it'd + # be feasible for many useful examples. Instead we attach the slice to + # a spawn and if the attached spawns differ, we bail out: + for i in 0 .. high(c.slices): + for j in 0 .. high(c.slices): + let x = c.slices[i] + let y = c.slices[j] + if i != j and x.spawnId != y.spawnId and guards.sameTree(x.x, y.x): + if not x.inLoop and not y.inLoop: + overlap(c.guards, x.a, x.b, y.a, y.b) + else: + # ah I cannot resists the temptation and add another sweet heuristic: + # if both slices have the form (i+c)..(i+c) and (i+d)..(i+d) we + # check they are disjoint and c <= stride and d <= stride: + # XXX + localError(x.x.info, "cannot prove $#..$# disjoint from $#..$#" % + [?x.a, ?x.b, ?y.a, ?y.b]) + +proc analyse(c: var AnalysisCtx; n: PNode) + +proc analyseSons(c: var AnalysisCtx; n: PNode) = + for i in 0 .. <safeLen(n): analyse(c, n[i]) + +proc min(a, b: PNode): PNode = + if a.isNil: result = b + elif a.intVal < b.intVal: result = a + else: result = b + +proc analyseCall(c: var AnalysisCtx; n: PNode; op: PSym) = + if op.magic == mSpawn: + inc c.spawns + let oldSpawnId = c.currentSpawnId + c.currentSpawnId = c.spawns + gatherArgs(c, n[1]) + analyseSons(c, n) + c.currentSpawnId = oldSpawnId + elif op.magic == mInc or (op.name.s == "+=" and sfSystemModule in op.owner.flags): + if n[1].kind == nkSym and n[1].isLocal: + let incr = n[1].skipConv + if incr.kind in {nkCharLit..nkUInt32Lit} and incr.intVal > 0: + let slot = c.getSlot(n[1].sym) + slot.stride = min(slot.stride, incr) + analyseSons(c, n) + elif op.name.s == "[]" and sfSystemModule in op.owner.flags: + c.addSlice(n, 1, 2, 3) + analyseSons(c, n) + elif op.name.s == "[]=" and sfSystemModule in op.owner.flags: + c.addSlice(n, 1, 2, 3) + analyseSons(c, n) + else: + analyseSons(c, n) + +proc analyseCase(c: var AnalysisCtx; n: PNode) = + analyse(c, n.sons[0]) + #let oldState = c.locals.len + let oldFacts = c.guards.len + for i in 1.. <n.len: + let branch = n.sons[i] + #setLen(c.locals, oldState) + setLen(c.guards, oldFacts) + addCaseBranchFacts(c.guards, n, i) + for i in 0 .. <branch.len: + analyse(c, branch.sons[i]) + #setLen(c.locals, oldState) + setLen(c.guards, oldFacts) + +proc analyseIf(c: var AnalysisCtx; n: PNode) = + analyse(c, n.sons[0].sons[0]) + let oldFacts = c.guards.len + addFact(c.guards, n.sons[0].sons[0]) + #let oldState = c.locals.len + + analyse(c, n.sons[0].sons[1]) + for i in 1.. <n.len: + let branch = n.sons[i] + setLen(c.guards, oldFacts) + for j in 0..i-1: + addFactNeg(c.guards, n.sons[j].sons[0]) + if branch.len > 1: + addFact(c.guards, branch.sons[0]) + #setLen(c.locals, oldState) + for i in 0 .. <branch.len: + analyse(c, branch.sons[i]) + #setLen(c.locals, oldState) + setLen(c.guards, oldFacts) + +proc analyse(c: var AnalysisCtx; n: PNode) = + case n.kind + of nkAsgn, nkFastAsgn: + # since we already ensure sfAddrTaken is not in s.flags, we only need to + # prevent direct assignments to the monotonic variable: + if n[0].kind == nkSym and n[0].isLocal: + let slot = c.getSlot(it[j].sym) + slot.blackListed = true + invalidateFacts(c.guards, n.sons[0]) + analyseSons(c, n) + addAsgnFact(c.guards, n.sons[0], n.sons[1]) + of nkCallKinds: + # direct call: + if n[0].kind == nkSym: analyseCall(c, n, n[0].sym) + else: analyseSons(c, n) + of nkBracket: + c.addSlice(n, 0, 1, 1) + analyseSons(c, n) + of nkReturnStmt, nkRaiseStmt, nkTryStmt: + localError(n.info, "invalid control flow for 'parallel'") + # 'break' that leaves the 'parallel' section is not valid either + # or maybe we should generate a 'try' XXX + of nkVarSection: + for it in n: + if it.sons[it.len-1].kind != nkEmpty: + for j in 0 .. it.len-3: + if it[j].kind == nkSym and it[j].isLocal: + let slot = c.getSlot(it[j].sym) + if slot.lower.isNil: slot.lower = it.sons[it.len-1] + else: internalError(it.info, "slot already has a lower bound") + analyseSons(c, n) + + of nkCaseStmt: analyseCase(c, n) + of nkIfStmt, nkIfExpr: analyseIf(c, n) + of nkWhileStmt: + analyse(c, n.sons[0]) + # 'while true' loop? + inc c.inLoop + if isTrue(n.sons[0]): + analyseSons(c, n.sons[1]) + else: + # loop may never execute: + let oldState = c.locals.len + let oldFacts = c.guards.len + addFact(c.guards, n.sons[0]) + analyse(c, n.sons[1]) + setLen(c.locals, oldState) + setLen(c.guards, oldFacts) + # we know after the loop the negation holds: + if not containsNode(n.sons[1], nkBreakStmt): + addFactNeg(c.guards, n.sons[0]) + dec c.inLoop + of nkTypeSection, nkProcDef, nkConverterDef, nkMethodDef, nkIteratorDef, + nkMacroDef, nkTemplateDef, nkConstSection, nkPragma: + discard + else: + analyseSons(c, n) + +proc transformSlices(n: PNode): PNode = + if n.kind in nkCalls and n[0].kind == nkSym: + let op = n[0].sym + if op.name.s == "[]" and sfSystemModule in op.owner.flags: + result = copyTree(n) + result.sons[0] = opSlice + return result + if n.safeLen > 0: + result = copyNode(n.kind, n.info, n.len) + for i in 0 .. < n.len: + result.sons[i] = transformSlices(n.sons[i]) + else: + result = n + +proc transformSpawn(owner: PSym; n, barrier: PNode): PNode = + if n.kind in nkCalls: + if n[0].kind == nkSym: + let op = n[0].sym + if op.magic == mSpawn: + result = transformSlices(n) + return wrapProcForSpawn(owner, result, barrier) + elif n.safeLen > 0: + result = copyNode(n.kind, n.info, n.len) + for i in 0 .. < n.len: + result.sons[i] = transformSpawn(owner, n.sons[i], barrier) + else: + result = n + +proc liftParallel*(owner: PSym; n: PNode): PNode = + # this needs to be called after the 'for' loop elimination + + # first pass: + # - detect monotonic local integer variables + # - detect used slices + # - detect used arguments + + var a = initAnalysisCtx() + let body = n.lastSon + analyse(a, body) + if a.spawns == 0: + localError(n.info, "'parallel' section without 'spawn'") + checkSlices(a) + checkArgs(a, body) + + var varSection = newNodeI(nkVarSection, n.info) + var temp = newSym(skTemp, "barrier", owner, n.info) + temp.typ = magicsys.getCompilerProc("Barrier").typ + incl(temp.flags, sfFromGeneric) + + var vpart = newNodeI(nkIdentDefs, n.info, 3) + vpart.sons[0] = newSymNode(temp) + vpart.sons[1] = ast.emptyNode + vpart.sons[2] = indirectAccess(castExpr, field, n.info) + varSection.add vpart + + barrier = genAddrOf(vpart[0]) + + result = newNodeI(nkStmtList, n.info) + generateAliasChecks(a, result) + result.add varSection + result.add callCodeGenProc("openBarrier", barrier) + result.add transformSpawn(owner, body, barrier) + result.add callCodeGenProc("closeBarrier", barrier) diff --git a/compiler/sempass2.nim b/compiler/sempass2.nim index 6afde5f05..c8ce5e787 100644 --- a/compiler/sempass2.nim +++ b/compiler/sempass2.nim @@ -89,7 +89,7 @@ proc initVarViaNew(a: PEffects, n: PNode) = if n.kind != nkSym: return let s = n.sym if {tfNeedsInit, tfNotNil} * s.typ.flags <= {tfNotNil}: - # 'x' is not nil, but that doesn't mean it's not nil children + # 'x' is not nil, but that doesn't mean its "not nil" children # are initialized: initVar(a, n) @@ -478,7 +478,7 @@ proc trackBlock(tracked: PEffects, n: PNode) = else: track(tracked, n) -proc isTrue(n: PNode): bool = +proc isTrue*(n: PNode): bool = n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or n.kind == nkIntLit and n.intVal != 0 diff --git a/compiler/vm.nim b/compiler/vm.nim index 218369fa1..0c2c23987 100644 --- a/compiler/vm.nim +++ b/compiler/vm.nim @@ -131,8 +131,9 @@ proc createStrKeepNode(x: var TFullReg) = nfAllConst in x.node.flags: # XXX this is hacky; tests/txmlgen triggers it: x.node = newNode(nkStrLit) - # debug x.node - #assert x.node.kind in {nkStrLit..nkTripleStrLit} + # It not only hackey, it is also wrong for tgentemplate. The primary + # cause of bugs like these is that the VM does not properly distinguish + # between variable defintions (var foo = e) and variable updates (foo = e). template createStr(x) = x.node = newNode(nkStrLit) diff --git a/config/nimrod.cfg b/config/nimrod.cfg index 2817eac55..df3835ace 100644 --- a/config/nimrod.cfg +++ b/config/nimrod.cfg @@ -16,6 +16,7 @@ arm.linux.gcc.linkerexe = "arm-linux-gcc" path="$lib/core" path="$lib/pure" path="$lib/pure/collections" +path="$lib/pure/concurrency" path="$lib/impure" path="$lib/wrappers" # path="$lib/wrappers/cairo" diff --git a/lib/pure/concurrency/cpuinfo.nim b/lib/pure/concurrency/cpuinfo.nim new file mode 100644 index 000000000..dfa819f64 --- /dev/null +++ b/lib/pure/concurrency/cpuinfo.nim @@ -0,0 +1,58 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements procs to determine the number of CPUs / cores. + +include "system/inclrtl" + +import strutils, os + +when not defined(windows): + import posix + +when defined(linux): + import linux + +when defined(macosx) or defined(bsd): + const + CTL_HW = 6 + HW_AVAILCPU = 25 + HW_NCPU = 3 + proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer, + a: var csize, b: pointer, c: int): cint {. + importc: "sysctl", header: "<sys/sysctl.h>".} + +proc countProcessors*(): int {.rtl, extern: "ncpi$1".} = + ## returns the numer of the processors/cores the machine has. + ## Returns 0 if it cannot be detected. + when defined(windows): + var x = getEnv("NUMBER_OF_PROCESSORS") + if x.len > 0: result = parseInt(x.string) + elif defined(macosx) or defined(bsd): + var + mib: array[0..3, cint] + numCPU: int + len: csize + mib[0] = CTL_HW + mib[1] = HW_AVAILCPU + len = sizeof(numCPU) + discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) + if numCPU < 1: + mib[1] = HW_NCPU + discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) + result = numCPU + elif defined(hpux): + result = mpctl(MPC_GETNUMSPUS, nil, nil) + elif defined(irix): + var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint + result = sysconf(SC_NPROC_ONLN) + else: + result = sysconf(SC_NPROCESSORS_ONLN) + if result <= 0: result = 1 + diff --git a/lib/pure/concurrency/cpuload.nim b/lib/pure/concurrency/cpuload.nim new file mode 100644 index 000000000..3cf6a7392 --- /dev/null +++ b/lib/pure/concurrency/cpuload.nim @@ -0,0 +1,96 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a helper for a thread pool to determine whether +## creating a thread is a good idea. + +when defined(windows): + import winlean, os, strutils, math + + proc `-`(a, b: TFILETIME): int64 = a.rdFileTime - b.rdFileTime +elif defined(linux): + from cpuinfo import countProcessors + +type + ThreadPoolAdvice* = enum + doNothing, + doCreateThread, # create additional thread for throughput + doShutdownThread # too many threads are busy, shutdown one + + ThreadPoolState* = object + when defined(windows): + prevSysKernel, prevSysUser, prevProcKernel, prevProcUser: TFILETIME + calls*: int + +proc advice*(s: var ThreadPoolState): ThreadPoolAdvice = + when defined(windows): + var + sysIdle, sysKernel, sysUser, + procCreation, procExit, procKernel, procUser: TFILETIME + if getSystemTimes(sysIdle, sysKernel, sysUser) == 0 or + getProcessTimes(THandle(-1), procCreation, procExit, + procKernel, procUser) == 0: + return doNothing + if s.calls > 0: + let + sysKernelDiff = sysKernel - s.prevSysKernel + sysUserDiff = sysUser - s.prevSysUser + + procKernelDiff = procKernel - s.prevProcKernel + procUserDiff = procUser - s.prevProcUser + + sysTotal = int(sysKernelDiff + sysUserDiff) + procTotal = int(procKernelDiff + procUserDiff) + # total CPU usage < 85% --> create a new worker thread. + # Measurements show that 100% and often even 90% is not reached even + # if all my cores are busy. + if sysTotal == 0 or procTotal / sysTotal < 0.85: + result = doCreateThread + s.prevSysKernel = sysKernel + s.prevSysUser = sysUser + s.prevProcKernel = procKernel + s.prevProcUser = procUser + elif defined(linux): + proc fscanf(c: TFile, frmt: cstring) {.varargs, importc, + header: "<stdio.h>".} + + var f = open("/proc/loadavg") + var b: float + var busy, total: int + fscanf(f,"%lf %lf %lf %ld/%ld", + addr b, addr b, addr b, addr busy, addr total) + f.close() + let cpus = countProcessors() + if busy-1 < cpus: + result = doCreateThread + elif busy-1 >= cpus*2: + result = doShutdownThread + else: + result = doNothing + else: + # XXX implement this for other OSes + result = doNothing + inc s.calls + +when isMainModule: + proc busyLoop() = + while true: + discard random(80) + os.sleep(100) + + spawn busyLoop() + spawn busyLoop() + spawn busyLoop() + spawn busyLoop() + + var s: ThreadPoolState + + for i in 1 .. 70: + echo advice(s) + os.sleep(1000) diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim new file mode 100644 index 000000000..856820c6e --- /dev/null +++ b/lib/pure/concurrency/threadpool.nim @@ -0,0 +1,210 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Implements Nimrod's 'spawn'. + +import cpuinfo, cpuload, locks + +{.push stackTrace:off.} + +type + CondVar = object + c: TCond + L: TLock + counter: int + +proc createCondVar(): CondVar = + initCond(result.c) + initLock(result.L) + +proc destroyCondVar(cv: var CondVar) {.inline.} = + deinitCond(cv.c) + deinitLock(cv.L) + +proc await(cv: var CondVar) = + acquire(cv.L) + while cv.counter <= 0: + wait(cv.c, cv.L) + dec cv.counter + release(cv.L) + +proc signal(cv: var CondVar) = + acquire(cv.L) + inc cv.counter + release(cv.L) + signal(cv.c) + +type + Barrier* {.compilerProc.} = object + counter: int + cv: CondVar + +proc barrierEnter*(b: ptr Barrier) {.compilerProc.} = + atomicInc b.counter + +proc barrierLeave*(b: ptr Barrier) {.compilerProc.} = + atomicDec b.counter + if b.counter <= 0: signal(b.cv) + +proc openBarrier*(b: ptr Barrier) {.compilerProc.} = + b.counter = 0 + b.cv = createCondVar() + +proc closeBarrier*(b: ptr Barrier) {.compilerProc.} = + await(b.cv) + destroyCondVar(b.cv) + +{.pop.} + +# ---------------------------------------------------------------------------- + +type + WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.} + Worker = object + taskArrived: CondVar + taskStarted: CondVar #\ + # task data: + f: WorkerProc + data: pointer + ready: bool # put it here for correct alignment! + initialized: bool # whether it has even been initialized + +proc nimArgsPassingDone(p: pointer) {.compilerProc.} = + let w = cast[ptr Worker](p) + signal(w.taskStarted) + +var + gSomeReady = createCondVar() + readyWorker: ptr Worker + +proc slave(w: ptr Worker) {.thread.} = + while true: + w.ready = true + readyWorker = w + signal(gSomeReady) + await(w.taskArrived) + assert(not w.ready) + w.f(w, w.data) + +const + MaxThreadPoolSize* = 256 ## maximal size of the thread pool. 256 threads + ## should be good enough for anybody ;-) + +var + currentPoolSize: int + maxPoolSize = MaxThreadPoolSize + minPoolSize = 4 + +proc setMinPoolSize*(size: range[1..MaxThreadPoolSize]) = + ## sets the minimal thread pool size. The default value of this is 4. + minPoolSize = size + +proc setMaxPoolSize*(size: range[1..MaxThreadPoolSize]) = + ## sets the minimal thread pool size. The default value of this + ## is ``MaxThreadPoolSize``. + maxPoolSize = size + +var + workers: array[MaxThreadPoolSize, TThread[ptr Worker]] + workersData: array[MaxThreadPoolSize, Worker] + +proc activateThread(i: int) {.noinline.} = + workersData[i].taskArrived = createCondVar() + workersData[i].taskStarted = createCondVar() + workersData[i].initialized = true + createThread(workers[i], slave, addr(workersData[i])) + +proc setup() = + currentPoolSize = min(countProcessors(), MaxThreadPoolSize) + readyWorker = addr(workersData[0]) + for i in 0.. <currentPoolSize: activateThread(i) + +proc preferSpawn*(): bool = + ## Use this proc to determine quickly if a 'spawn' or a direct call is + ## preferable. If it returns 'true' a 'spawn' may make sense. In general + ## it is not necessary to call this directly; use 'spawnX' instead. + result = gSomeReady.counter > 0 + +proc spawn*(call: stmt) {.magic: "Spawn".} + ## always spawns a new task, so that the 'call' is never executed on + ## the calling thread. 'call' has to be proc call 'p(...)' where 'p' + ## is gcsafe and has 'void' as the return type. + +template spawnX*(call: stmt) = + ## spawns a new task if a CPU core is ready, otherwise executes the + ## call in the calling thread. Usually it is advised to + ## use 'spawn' in order to not block the producer for an unknown + ## amount of time. 'call' has to be proc call 'p(...)' where 'p' + ## is gcsafe and has 'void' as the return type. + if preferSpawn(): spawn call + else: call + +proc parallel*(body: stmt) {.magic: "Parallel".} + ## a parallel section can be used to execute a block in parallel. ``body`` + ## has to be in a DSL that is a particular subset of the language. Please + ## refer to the manual for further information. + +var + state: ThreadPoolState + stateLock: TLock + +initLock stateLock + +proc selectWorker(w: ptr Worker; fn: WorkerProc; data: pointer): bool = + if cas(addr w.ready, true, false): + w.data = data + w.f = fn + signal(w.taskArrived) + await(w.taskStarted) + result = true + +proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} = + # implementation of 'spawn' that is used by the code generator. + while true: + if selectWorker(readyWorker, fn, data): return + for i in 0.. <currentPoolSize: + if selectWorker(addr(workersData[i]), fn, data): return + # determine what to do, but keep in mind this is expensive too: + # state.calls < maxPoolSize: warmup phase + # (state.calls and 127) == 0: periodic check + if state.calls < maxPoolSize or (state.calls and 127) == 0: + # ensure the call to 'advice' is atomic: + if tryAcquire(stateLock): + case advice(state) + of doNothing: discard + of doCreateThread: + if currentPoolSize < maxPoolSize: + if not workersData[currentPoolSize].initialized: + activateThread(currentPoolSize) + let w = addr(workersData[currentPoolSize]) + inc currentPoolSize + if selectWorker(w, fn, data): + release(stateLock) + return + # else we didn't succeed but some other thread, so do nothing. + of doShutdownThread: + if currentPoolSize > minPoolSize: dec currentPoolSize + # we don't free anything here. Too dangerous. + release(stateLock) + # else the acquire failed, but this means some + # other thread succeeded, so we don't need to do anything here. + await(gSomeReady) + +proc sync*() = + ## a simple barrier to wait for all spawn'ed tasks. If you need more elaborate + ## waiting, you have to use an explicit barrier. + while true: + var allReady = true + for i in 0 .. <currentPoolSize: + if not allReady: break + allReady = allReady and workersData[i].ready + if allReady: break + await(gSomeReady) + +setup() diff --git a/lib/pure/osproc.nim b/lib/pure/osproc.nim index 6e250f9d5..6250ce257 100644 --- a/lib/pure/osproc.nim +++ b/lib/pure/osproc.nim @@ -1,7 +1,7 @@ # # # Nimrod's Runtime Library -# (c) Copyright 2013 Andreas Rumpf +# (c) Copyright 2014 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -13,7 +13,7 @@ include "system/inclrtl" import - strutils, os, strtabs, streams + strutils, os, strtabs, streams, cpuinfo when defined(windows): import winlean @@ -225,42 +225,10 @@ proc errorHandle*(p: PProcess): TFileHandle {.rtl, extern: "nosp$1", ## it is closed when closing the PProcess ``p``. result = p.errHandle -when defined(macosx) or defined(bsd): - const - CTL_HW = 6 - HW_AVAILCPU = 25 - HW_NCPU = 3 - proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer, - a: var csize, b: pointer, c: int): cint {. - importc: "sysctl", header: "<sys/sysctl.h>".} - proc countProcessors*(): int {.rtl, extern: "nosp$1".} = ## returns the numer of the processors/cores the machine has. ## Returns 0 if it cannot be detected. - when defined(windows): - var x = getEnv("NUMBER_OF_PROCESSORS") - if x.len > 0: result = parseInt(x.string) - elif defined(macosx) or defined(bsd): - var - mib: array[0..3, cint] - numCPU: int - len: csize - mib[0] = CTL_HW - mib[1] = HW_AVAILCPU - len = sizeof(numCPU) - discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) - if numCPU < 1: - mib[1] = HW_NCPU - discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) - result = numCPU - elif defined(hpux): - result = mpctl(MPC_GETNUMSPUS, nil, nil) - elif defined(irix): - var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint - result = sysconf(SC_NPROC_ONLN) - else: - result = sysconf(SC_NPROCESSORS_ONLN) - if result <= 0: result = 1 + result = cpuinfo.countProcessors() proc execProcesses*(cmds: openArray[string], options = {poStdErrToStdOut, poParentStreams}, diff --git a/lib/system.nim b/lib/system.nim index ad98540a7..fbd905afa 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -2934,6 +2934,3 @@ when not defined(booting): template isStatic*(x): expr = compiles(static(x)) # checks whether `x` is a value known at compile-time - -when hasThreadSupport: - when hostOS != "standalone": include "system/sysspawn" diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim index b1a96b209..c6c603b19 100644 --- a/lib/system/atomics.nim +++ b/lib/system/atomics.nim @@ -1,13 +1,14 @@ # # # Nimrod's Runtime Library -# (c) Copyright 2012 Andreas Rumpf +# (c) Copyright 2014 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## Atomic operations for Nimrod. +{.push stackTrace:off.} when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport: type @@ -203,3 +204,31 @@ proc atomicDec*(memLoc: var int, x: int = 1): int = else: dec(memLoc, x) result = memLoc + +when defined(windows) and not defined(gcc): + proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32 + {.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.} + + proc cas*[T: bool|int](p: ptr T; oldValue, newValue: T): bool = + interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0 + +else: + # this is valid for GCC and Intel C++ + proc cas*[T: bool|int](p: ptr T; oldValue, newValue: T): bool + {.importc: "__sync_bool_compare_and_swap", nodecl.} + # XXX is this valid for 'int'? + + +when (defined(x86) or defined(amd64)) and defined(gcc): + proc cpuRelax {.inline.} = + {.emit: """asm volatile("pause" ::: "memory");""".} +elif (defined(x86) or defined(amd64)) and defined(vcc): + proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".} +elif defined(intelc): + proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".} +elif false: + from os import sleep + + proc cpuRelax {.inline.} = os.sleep(1) + +{.pop.} diff --git a/lib/system/sysspawn.nim b/lib/system/sysspawn.nim index dabf35a3e..95cdba65d 100644 --- a/lib/system/sysspawn.nim +++ b/lib/system/sysspawn.nim @@ -14,30 +14,6 @@ when not defined(NimString): {.push stackTrace:off.} -when (defined(x86) or defined(amd64)) and defined(gcc): - proc cpuRelax {.inline.} = - {.emit: """asm volatile("pause" ::: "memory");""".} -elif (defined(x86) or defined(amd64)) and defined(vcc): - proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".} -elif defined(intelc): - proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".} -elif false: - from os import sleep - - proc cpuRelax {.inline.} = os.sleep(1) - -when defined(windows) and not defined(gcc): - proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32 - {.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.} - - proc cas(p: ptr bool; oldValue, newValue: bool): bool = - interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0 - -else: - # this is valid for GCC and Intel C++ - proc cas(p: ptr bool; oldValue, newValue: bool): bool - {.importc: "__sync_bool_compare_and_swap", nodecl.} - # We declare our own condition variables here to get rid of the dummy lock # on Windows: @@ -54,6 +30,9 @@ proc createCondVar(): CondVar = initSysLock(result.stupidLock) #acquireSys(result.stupidLock) +proc destroyCondVar(c: var CondVar) {.inline.} = + deinitSysCond(c.c) + proc await(cv: var CondVar) = when defined(posix): acquireSys(cv.stupidLock) @@ -100,6 +79,26 @@ proc signal(cv: var FastCondVar) = #if cas(addr cv.slowPath, true, false): signal(cv.slow) +type + Barrier* {.compilerProc.} = object + counter: int + cv: CondVar + +proc barrierEnter*(b: ptr Barrier) {.compilerProc.} = + atomicInc b.counter + +proc barrierLeave*(b: ptr Barrier) {.compilerProc.} = + atomicDec b.counter + if b.counter <= 0: signal(b.cv) + +proc openBarrier*(b: ptr Barrier) {.compilerProc.} = + b.counter = 0 + b.cv = createCondVar() + +proc closeBarrier*(b: ptr Barrier) {.compilerProc.} = + await(b.cv) + destroyCondVar(b.cv) + {.pop.} # ---------------------------------------------------------------------------- diff --git a/tests/system/tsysspawn.nim b/tests/system/tsysspawn.nim index 0388918aa..fc7921b0e 100644 --- a/tests/system/tsysspawn.nim +++ b/tests/system/tsysspawn.nim @@ -4,20 +4,22 @@ discard """ cmd: "nimrod $target --threads:on $options $file" """ +import threadpool + var x, y = 0 proc p1 = - for i in 0 .. 1_000_000: + for i in 0 .. 10_000: discard - inc x + atomicInc x proc p2 = - for i in 0 .. 1_000_000: + for i in 0 .. 10_000: discard - inc y, 2 + atomicInc y, 2 for i in 0.. 3: spawn(p1()) diff --git a/tests/system/tsysspawnbadarg.nim b/tests/system/tsysspawnbadarg.nim index ace074602..ce3c5611b 100644 --- a/tests/system/tsysspawnbadarg.nim +++ b/tests/system/tsysspawnbadarg.nim @@ -4,4 +4,6 @@ discard """ cmd: "nimrod $target --threads:on $options $file" """ +import threadpool + spawn(1) diff --git a/web/news.txt b/web/news.txt index 0bbae7b7b..b7403a3c7 100644 --- a/web/news.txt +++ b/web/news.txt @@ -2,6 +2,23 @@ News ==== +.. + 2014-06-29 Version 0.9.6 released + ================================= + + Changes affecting backwards compatibility + ----------------------------------------- + + - ``spawn`` now uses an elaborate self-adapting thread pool and as such + has been moved into its own module. So to use it, you now have to import + ``threadpool``. + + + Library Additions + ----------------- + + - Added module ``cpuinfo``. + - Added module ``threadpool``. 2014-04-21 Version 0.9.4 released |