diff options
50 files changed, 2155 insertions, 215 deletions
diff --git a/compiler/ast.nim b/compiler/ast.nim index d74818e1f..7ff22e184 100644 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -561,7 +561,7 @@ type mFloat, mFloat32, mFloat64, mFloat128, mBool, mChar, mString, mCstring, mPointer, mEmptySet, mIntSetBaseType, mNil, mExpr, mStmt, mTypeDesc, - mVoidType, mPNimrodNode, mShared, mGuarded, mLock, mSpawn, + mVoidType, mPNimrodNode, mShared, mGuarded, mLock, mSpawn, mDeepCopy, mIsMainModule, mCompileDate, mCompileTime, mNimrodVersion, mNimrodMajor, mNimrodMinor, mNimrodPatch, mCpuEndian, mHostOS, mHostCPU, mAppType, mNaN, mInf, mNegInf, @@ -606,9 +606,9 @@ const # thus cannot be overloaded (also documented in the spec!): SpecialSemMagics* = { mDefined, mDefinedInScope, mCompiles, mLow, mHigh, mSizeOf, mIs, mOf, - mEcho, mShallowCopy, mExpandToAst} + mEcho, mShallowCopy, mExpandToAst, mParallel, mSpawn} -type +type PNode* = ref TNode TNodeSeq* = seq[PNode] PType* = ref TType @@ -886,6 +886,8 @@ const nkCallKinds* = {nkCall, nkInfix, nkPrefix, nkPostfix, nkCommand, nkCallStrLit, nkHiddenCallConv} + nkIdentKinds* = {nkIdent, nkSym, nkAccQuoted, nkOpenSymChoice, + nkClosedSymChoice} nkLiterals* = {nkCharLit..nkTripleStrLit} nkLambdaKinds* = {nkLambda, nkDo} diff --git a/compiler/ccgcalls.nim b/compiler/ccgcalls.nim index a7840305d..71e23aa1d 100644 --- a/compiler/ccgcalls.nim +++ b/compiler/ccgcalls.nim @@ -86,7 +86,7 @@ proc openArrayLoc(p: BProc, n: PNode): PRope = initLocExpr(p, q[2], b) initLocExpr(p, q[3], c) let fmt = - case skipTypes(a.t, abstractVar).kind + case skipTypes(a.t, abstractVar+{tyPtr}).kind of tyOpenArray, tyVarargs, tyArray, tyArrayConstr: "($1)+($2), ($3)-($2)+1" of tyString, tySequence: diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim index 13b3091fc..e6b22819f 100644 --- a/compiler/ccgexprs.nim +++ b/compiler/ccgexprs.nim @@ -1652,7 +1652,10 @@ proc genMagicExpr(p: BProc, e: PNode, d: var TLoc, op: TMagic) = of mSlurp..mQuoteAst: localError(e.info, errXMustBeCompileTime, e.sons[0].sym.name.s) of mSpawn: - let n = lowerings.wrapProcForSpawn(p.module.module, e.sons[1]) + let n = lowerings.wrapProcForSpawn(p.module.module, e, e.typ, nil, nil) + expr(p, n, d) + of mParallel: + let n = semparallel.liftParallel(p.module.module, e) expr(p, n, d) else: internalError(e.info, "genMagicExpr: " & $op) diff --git a/compiler/cgen.nim b/compiler/cgen.nim index 7094990f7..a5852c735 100644 --- a/compiler/cgen.nim +++ b/compiler/cgen.nim @@ -14,7 +14,8 @@ import options, intsets, nversion, nimsets, msgs, crc, bitsets, idents, lists, types, ccgutils, os, times, ropes, math, passes, rodread, wordrecg, treetab, cgmeth, - rodutils, renderer, idgen, cgendata, ccgmerge, semfold, aliases, lowerings + rodutils, renderer, idgen, cgendata, ccgmerge, semfold, aliases, lowerings, + semparallel when options.hasTinyCBackend: import tccgen diff --git a/compiler/guards.nim b/compiler/guards.nim index 2b69024d2..4cf06fe02 100644 --- a/compiler/guards.nim +++ b/compiler/guards.nim @@ -1,7 +1,7 @@ # # # The Nimrod Compiler -# (c) Copyright 2013 Andreas Rumpf +# (c) Copyright 2014 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -9,7 +9,8 @@ ## This module implements the 'implies' relation for guards. -import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents +import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents, + saturate const someEq = {mEqI, mEqI64, mEqF64, mEqEnum, mEqCh, mEqB, mEqRef, mEqProc, @@ -25,6 +26,17 @@ const someIn = {mInRange, mInSet} + someHigh = {mHigh} + # we don't list unsigned here because wrap around semantics suck for + # proving anything: + someAdd = {mAddI, mAddI64, mAddF64, mSucc} + someSub = {mSubI, mSubI64, mSubF64, mPred} + someMul = {mMulI, mMulI64, mMulF64} + someDiv = {mDivI, mDivI64, mDivF64} + someMod = {mModI, mModI64} + someMax = {mMaxI, mMaxI64, mMaxF64} + someMin = {mMinI, mMinI64, mMinF64} + proc isValue(n: PNode): bool = n.kind in {nkCharLit..nkNilLit} proc isLocation(n: PNode): bool = not n.isValue @@ -69,19 +81,24 @@ proc isLetLocation(m: PNode, isApprox: bool): bool = proc interestingCaseExpr*(m: PNode): bool = isLetLocation(m, true) -proc getMagicOp(name: string, m: TMagic): PSym = +proc createMagic*(name: string, m: TMagic): PSym = result = newSym(skProc, getIdent(name), nil, unknownLineInfo()) result.magic = m let - opLe = getMagicOp("<=", mLeI) - opLt = getMagicOp("<", mLtI) - opAnd = getMagicOp("and", mAnd) - opOr = getMagicOp("or", mOr) - opNot = getMagicOp("not", mNot) - opIsNil = getMagicOp("isnil", mIsNil) - opContains = getMagicOp("contains", mInSet) - opEq = getMagicOp("==", mEqI) + opLe = createMagic("<=", mLeI) + opLt = createMagic("<", mLtI) + opAnd = createMagic("and", mAnd) + opOr = createMagic("or", mOr) + opNot = createMagic("not", mNot) + opIsNil = createMagic("isnil", mIsNil) + opContains = createMagic("contains", mInSet) + opEq = createMagic("==", mEqI) + opAdd = createMagic("+", mAddI) + opSub = createMagic("-", mSubI) + opMul = createMagic("*", mMulI) + opDiv = createMagic("div", mDivI) + opLen = createMagic("len", mLengthSeq) proc swapArgs(fact: PNode, newOp: PSym): PNode = result = newNodeI(nkCall, fact.info, 3) @@ -137,17 +154,141 @@ proc neg(n: PNode): PNode = result.sons[0] = newSymNode(opNot) result.sons[1] = n -proc buildIsNil(arg: PNode): PNode = - result = newNodeI(nkCall, arg.info, 2) - result.sons[0] = newSymNode(opIsNil) - result.sons[1] = arg +proc buildCall(op: PSym; a: PNode): PNode = + result = newNodeI(nkCall, a.info, 2) + result.sons[0] = newSymNode(op) + result.sons[1] = a + +proc buildCall(op: PSym; a, b: PNode): PNode = + result = newNodeI(nkInfix, a.info, 3) + result.sons[0] = newSymNode(op) + result.sons[1] = a + result.sons[2] = b + +proc `|+|`(a, b: PNode): PNode = + result = copyNode(a) + if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |+| b.intVal + else: result.floatVal = a.floatVal + b.floatVal + +proc `|*|`(a, b: PNode): PNode = + result = copyNode(a) + if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |*| b.intVal + else: result.floatVal = a.floatVal * b.floatVal + +proc negate(a, b, res: PNode): PNode = + if b.kind in {nkCharLit..nkUInt64Lit} and b.intVal != low(BiggestInt): + var b = copyNode(b) + b.intVal = -b.intVal + if a.kind in {nkCharLit..nkUInt64Lit}: + b.intVal = b.intVal |+| a.intVal + result = b + else: + result = buildCall(opAdd, a, b) + elif b.kind in {nkFloatLit..nkFloat64Lit}: + var b = copyNode(b) + b.floatVal = -b.floatVal + result = buildCall(opAdd, a, b) + else: + result = res + +proc zero(): PNode = nkIntLit.newIntNode(0) +proc one(): PNode = nkIntLit.newIntNode(1) +proc minusOne(): PNode = nkIntLit.newIntNode(-1) + +proc lowBound*(x: PNode): PNode = + result = nkIntLit.newIntNode(firstOrd(x.typ)) + result.info = x.info + +proc highBound*(x: PNode): PNode = + result = if x.typ.skipTypes(abstractInst).kind == tyArray: + nkIntLit.newIntNode(lastOrd(x.typ)) + else: + opAdd.buildCall(opLen.buildCall(x), minusOne()) + result.info = x.info + +proc reassociation(n: PNode): PNode = + result = n + # (foo+5)+5 --> foo+10; same for '*' + case result.getMagic + of someAdd: + if result[2].isValue and + result[1].getMagic in someAdd and result[1][2].isValue: + result = opAdd.buildCall(result[1][1], result[1][2] |+| result[2]) + of someMul: + if result[2].isValue and + result[1].getMagic in someMul and result[1][2].isValue: + result = opAdd.buildCall(result[1][1], result[1][2] |*| result[2]) + else: discard + +proc canon*(n: PNode): PNode = + # XXX for now only the new code in 'semparallel' uses this + if n.safeLen >= 1: + result = shallowCopy(n) + for i in 0 .. < n.len: + result.sons[i] = canon(n.sons[i]) + else: + result = n + case result.getMagic + of someEq, someAdd, someMul, someMin, someMax: + # these are symmetric; put value as last: + if result.sons[1].isValue and not result.sons[2].isValue: + result = swapArgs(result, result.sons[0].sym) + # (4 + foo) + 2 --> (foo + 4) + 2 + of someHigh: + # high == len+(-1) + result = opAdd.buildCall(opLen.buildCall(result[1]), minusOne()) + of mUnaryMinusI, mUnaryMinusI64: + result = buildCall(opAdd, result[1], newIntNode(nkIntLit, -1)) + of someSub: + # x - 4 --> x + (-4) + result = negate(result[1], result[2], result) + of someLen: + result.sons[0] = opLen.newSymNode + else: discard + + result = skipConv(result) + result = reassociation(result) + # most important rule: (x-4) < a.len --> x < a.len+4 + case result.getMagic + of someLe, someLt: + let x = result[1] + let y = result[2] + if x.kind in nkCallKinds and x.len == 3 and x[2].isValue and + isLetLocation(x[1], true): + case x.getMagic + of someSub: + result = buildCall(result[0].sym, x[1], + reassociation(opAdd.buildCall(y, x[2]))) + of someAdd: + # Rule A: + let plus = negate(y, x[2], nil).reassociation + if plus != nil: result = buildCall(result[0].sym, x[1], plus) + else: discard + elif y.kind in nkCallKinds and y.len == 3 and y[2].isValue and + isLetLocation(y[1], true): + # a.len < x-3 + case y.getMagic + of someSub: + result = buildCall(result[0].sym, y[1], + reassociation(opAdd.buildCall(x, y[2]))) + of someAdd: + let plus = negate(x, y[2], nil).reassociation + # ensure that Rule A will not trigger afterwards with the + # additional 'not isLetLocation' constraint: + if plus != nil and not isLetLocation(x, true): + result = buildCall(result[0].sym, plus, y[1]) + else: discard + else: discard + +proc `+@`*(a: PNode; b: BiggestInt): PNode = + canon(if b != 0: opAdd.buildCall(a, nkIntLit.newIntNode(b)) else: a) proc usefulFact(n: PNode): PNode = case n.getMagic of someEq: if skipConv(n.sons[2]).kind == nkNilLit and ( isLetLocation(n.sons[1], false) or isVar(n.sons[1])): - result = buildIsNil(n.sons[1]) + result = opIsNil.buildCall(n.sons[1]) else: if isLetLocation(n.sons[1], true) or isLetLocation(n.sons[2], true): # XXX algebraic simplifications! 'i-1 < a.len' --> 'i < a.len+1' @@ -217,7 +358,7 @@ proc addFactNeg*(m: var TModel, n: PNode) = let n = n.neg if n != nil: addFact(m, n) -proc sameTree(a, b: PNode): bool = +proc sameTree*(a, b: PNode): bool = result = false if a == b: result = true @@ -519,7 +660,144 @@ proc doesImply*(facts: TModel, prop: PNode): TImplication = if result != impUnknown: return proc impliesNotNil*(facts: TModel, arg: PNode): TImplication = - result = doesImply(facts, buildIsNil(arg).neg) + result = doesImply(facts, opIsNil.buildCall(arg).neg) + +proc simpleSlice*(a, b: PNode): BiggestInt = + # returns 'c' if a..b matches (i+c)..(i+c), -1 otherwise. (i)..(i) is matched + # as if it is (i+0)..(i+0). + if guards.sameTree(a, b): + if a.getMagic in someAdd and a[2].kind in {nkCharLit..nkUInt64Lit}: + result = a[2].intVal + else: + result = 0 + else: + result = -1 + +proc pleViaModel(model: TModel; aa, bb: PNode): TImplication + +proc ple(m: TModel; a, b: PNode): TImplication = + template `<=?`(a,b): expr = ple(m,a,b) == impYes + # 0 <= 3 + if a.isValue and b.isValue: + return if leValue(a, b): impYes else: impNo + + # use type information too: x <= 4 iff high(x) <= 4 + if b.isValue and a.typ != nil and a.typ.isOrdinalType: + if lastOrd(a.typ) <= b.intVal: return impYes + # 3 <= x iff low(x) <= 3 + if a.isValue and b.typ != nil and b.typ.isOrdinalType: + if firstOrd(b.typ) <= a.intVal: return impYes + + # x <= x + if sameTree(a, b): return impYes + + # 0 <= x.len + if b.getMagic in someLen and a.isValue: + if a.intVal <= 0: return impYes + + # x <= y+c if 0 <= c and x <= y + if b.getMagic in someAdd and zero() <=? b[2] and a <=? b[1]: return impYes + + # x+c <= y if c <= 0 and x <= y + if a.getMagic in someAdd and a[2] <=? zero() and a[1] <=? b: return impYes + + # x <= y*c if 1 <= c and x <= y and 0 <= y + if b.getMagic in someMul: + if a <=? b[1] and one() <=? b[2] and zero() <=? b[1]: return impYes + + # x div c <= y if 1 <= c and 0 <= y and x <= y: + if a.getMagic in someDiv: + if one() <=? a[2] and zero() <=? b and a[1] <=? b: return impYes + + # slightly subtle: + # x <= max(y, z) iff x <= y or x <= z + # note that 'x <= max(x, z)' is a special case of the above rule + if b.getMagic in someMax: + if a <=? b[1] or a <=? b[2]: return impYes + + # min(x, y) <= z iff x <= z or y <= z + if a.getMagic in someMin: + if a[1] <=? b or a[2] <=? b: return impYes + + # use the knowledge base: + return pleViaModel(m, a, b) + #return doesImply(m, opLe.buildCall(a, b)) + +type TReplacements = seq[tuple[a,b: PNode]] + +proc replaceSubTree(n, x, by: PNode): PNode = + if sameTree(n, x): + result = by + elif hasSubTree(n, x): + result = shallowCopy(n) + for i in 0 .. safeLen(n)-1: + result.sons[i] = replaceSubTree(n.sons[i], x, by) + else: + result = n + +proc applyReplacements(n: PNode; rep: TReplacements): PNode = + result = n + for x in rep: result = result.replaceSubTree(x.a, x.b) + +proc pleViaModelRec(m: var TModel; a, b: PNode): TImplication = + # now check for inferrable facts: a <= b and b <= c implies a <= c + for i in 0..m.high: + let fact = m[i] + if fact != nil and fact.getMagic in someLe: + # x <= y implies a <= b if a <= x and y <= b + let x = fact[1] + let y = fact[2] + # mark as used: + m[i] = nil + if ple(m, a, x) == impYes: + if ple(m, y, b) == impYes: return impYes + #if pleViaModelRec(m, y, b): return impYes + # fact: 16 <= i + # x y + # question: i <= 15? no! + result = impliesLe(fact, a, b) + if result != impUnknown: return result + if sameTree(y, a): + result = ple(m, x, b) + if result != impUnknown: return result + +proc pleViaModel(model: TModel; aa, bb: PNode): TImplication = + # compute replacements: + var replacements: TReplacements = @[] + for fact in model: + if fact != nil and fact.getMagic in someEq: + let a = fact[1] + let b = fact[2] + if a.kind == nkSym: replacements.add((a,b)) + else: replacements.add((b,a)) + var m: TModel + var a = aa + var b = bb + if replacements.len > 0: + m = @[] + # make the other facts consistent: + for fact in model: + if fact != nil and fact.getMagic notin someEq: + # XXX 'canon' should not be necessary here, but it is + m.add applyReplacements(fact, replacements).canon + a = applyReplacements(aa, replacements) + b = applyReplacements(bb, replacements) + else: + # we have to make a copy here, because the model will be modified: + m = model + result = pleViaModelRec(m, a, b) + +proc proveLe*(m: TModel; a, b: PNode): TImplication = + let x = canon(opLe.buildCall(a, b)) + #echo "ROOT ", renderTree(x[1]), " <=? ", renderTree(x[2]) + result = ple(m, x[1], x[2]) + if result == impUnknown: + # try an alternative: a <= b iff not (b < a) iff not (b+1 <= a): + let y = canon(opLe.buildCall(opAdd.buildCall(b, one()), a)) + result = ~ple(m, y[1], y[2]) + +proc addFactLe*(m: var TModel; a, b: PNode) = + m.add canon(opLe.buildCall(a, b)) proc settype(n: PNode): PType = result = newType(tySet, n.typ.owner) diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index 1b9e5fe0f..e2afa4362 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -13,6 +13,8 @@ const genPrefix* = ":tmp" # prefix for generated names import ast, astalgo, types, idents, magicsys, msgs, options +from guards import createMagic +from trees import getMagic proc newTupleAccess*(tup: PNode, i: int): PNode = result = newNodeIT(nkBracketExpr, tup.info, tup.typ.skipTypes( @@ -68,6 +70,7 @@ proc addField*(obj: PType; s: PSym) = var field = newSym(skField, getIdent(s.name.s & $s.id), s.owner, s.info) let t = skipIntLit(s.typ) field.typ = t + assert t.kind != tyStmt field.position = sonsLen(obj.n) addSon(obj.n, newSymNode(field)) @@ -79,19 +82,30 @@ proc newDotExpr(obj, b: PSym): PNode = addSon(result, newSymNode(field)) result.typ = field.typ -proc indirectAccess*(a: PNode, b: PSym, info: TLineInfo): PNode = +proc indirectAccess*(a: PNode, b: string, info: TLineInfo): PNode = # returns a[].b as a node var deref = newNodeI(nkHiddenDeref, info) - deref.typ = a.typ.sons[0] - assert deref.typ.kind == tyObject - let field = getSymFromList(deref.typ.n, getIdent(b.name.s & $b.id)) - assert field != nil, b.name.s + deref.typ = a.typ.skipTypes(abstractInst).sons[0] + var t = deref.typ.skipTypes(abstractInst) + var field: PSym + while true: + assert t.kind == tyObject + field = getSymFromList(t.n, getIdent(b)) + if field != nil: break + t = t.sons[0] + if t == nil: break + t = t.skipTypes(abstractInst) + assert field != nil, b addSon(deref, a) result = newNodeI(nkDotExpr, info) addSon(result, deref) addSon(result, newSymNode(field)) result.typ = field.typ +proc indirectAccess*(a: PNode, b: PSym, info: TLineInfo): PNode = + # returns a[].b as a node + result = indirectAccess(a, b.name.s & $b.id, info) + proc indirectAccess*(a, b: PSym, info: TLineInfo): PNode = result = indirectAccess(newSymNode(a), b, info) @@ -101,6 +115,11 @@ proc genAddrOf*(n: PNode): PNode = result.typ = newType(tyPtr, n.typ.owner) result.typ.rawAddSon(n.typ) +proc genDeref*(n: PNode): PNode = + result = newNodeIT(nkHiddenDeref, n.info, + n.typ.skipTypes(abstractInst).sons[0]) + result.add n + proc callCodegenProc*(name: string, arg1: PNode; arg2, arg3: PNode = nil): PNode = result = newNodeI(nkCall, arg1.info) @@ -112,13 +131,120 @@ proc callCodegenProc*(name: string, arg1: PNode; result.add arg1 if arg2 != nil: result.add arg2 if arg3 != nil: result.add arg3 + result.typ = sym.typ.sons[0] + +proc callProc(a: PNode): PNode = + result = newNodeI(nkCall, a.info) + result.add a + result.typ = a.typ.sons[0] + +# we have 4 cases to consider: +# - a void proc --> nothing to do +# - a proc returning GC'ed memory --> requires a flowVar +# - a proc returning non GC'ed memory --> pass as hidden 'var' parameter +# - not in a parallel environment --> requires a flowVar for memory safety +type + TSpawnResult = enum + srVoid, srFlowVar, srByVar + TFlowVarKind = enum + fvInvalid # invalid type T for 'FlowVar[T]' + fvGC # FlowVar of a GC'ed type + fvBlob # FlowVar of a blob type + +proc spawnResult(t: PType; inParallel: bool): TSpawnResult = + if t.isEmptyType: srVoid + elif inParallel and not containsGarbageCollectedRef(t): srByVar + else: srFlowVar + +proc flowVarKind(t: PType): TFlowVarKind = + if t.skipTypes(abstractInst).kind in {tyRef, tyString, tySequence}: fvGC + elif containsGarbageCollectedRef(t): fvInvalid + else: fvBlob + +proc addLocalVar(varSection: PNode; owner: PSym; typ: PType; v: PNode): PSym = + result = newSym(skTemp, getIdent(genPrefix), owner, varSection.info) + result.typ = typ + incl(result.flags, sfFromGeneric) + + var vpart = newNodeI(nkIdentDefs, varSection.info, 3) + vpart.sons[0] = newSymNode(result) + vpart.sons[1] = ast.emptyNode + vpart.sons[2] = v + varSection.add vpart + +discard """ +We generate roughly this: + +proc f_wrapper(thread, args) = + barrierEnter(args.barrier) # for parallel statement + var a = args.a # thread transfer; deepCopy or shallowCopy or no copy + # depending on whether we're in a 'parallel' statement + var b = args.b + var fv = args.fv + + fv.owner = thread # optional + nimArgsPassingDone() # signal parent that the work is done + # + args.fv.blob = f(a, b, ...) + nimFlowVarSignal(args.fv) + + # - or - + f(a, b, ...) + barrierLeave(args.barrier) # for parallel statement + +stmtList: + var scratchObj + scratchObj.a = a + scratchObj.b = b + + nimSpawn(f_wrapper, addr scratchObj) + scratchObj.fv # optional + +""" proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; - varSection, call: PNode): PSym = + varSection, call, barrier, fv: PNode; + spawnKind: TSpawnResult): PSym = var body = newNodeI(nkStmtList, f.info) + var threadLocalBarrier: PSym + if barrier != nil: + var varSection = newNodeI(nkVarSection, barrier.info) + threadLocalBarrier = addLocalVar(varSection, argsParam.owner, + barrier.typ, barrier) + body.add varSection + body.add callCodeGenProc("barrierEnter", threadLocalBarrier.newSymNode) + var threadLocalProm: PSym + if spawnKind == srByVar: + threadLocalProm = addLocalVar(varSection, argsParam.owner, fv.typ, fv) + elif fv != nil: + internalAssert fv.typ.kind == tyGenericInst + threadLocalProm = addLocalVar(varSection, argsParam.owner, fv.typ, fv) + body.add varSection - body.add callCodeGenProc("nimArgsPassingDone", newSymNode(threadParam)) - body.add call + if fv != nil and spawnKind != srByVar: + # generate: + # fv.owner = threadParam + body.add newAsgnStmt(indirectAccess(threadLocalProm.newSymNode, + "owner", fv.info), threadParam.newSymNode) + + body.add callCodeGenProc("nimArgsPassingDone", threadParam.newSymNode) + if spawnKind == srByVar: + body.add newAsgnStmt(genDeref(threadLocalProm.newSymNode), call) + elif fv != nil: + let fk = fv.typ.sons[1].flowVarKind + if fk == fvInvalid: + localError(f.info, "cannot create a flowVar of type: " & + typeToString(fv.typ.sons[1])) + body.add newAsgnStmt(indirectAccess(threadLocalProm.newSymNode, + if fk == fvGC: "data" else: "blob", fv.info), call) + if barrier == nil: + # by now 'fv' is shared and thus might have beeen overwritten! we need + # to use the thread-local view instead: + body.add callCodeGenProc("nimFlowVarSignal", threadLocalProm.newSymNode) + else: + body.add call + if barrier != nil: + body.add callCodeGenProc("barrierLeave", threadLocalBarrier.newSymNode) var params = newNodeI(nkFormalParams, f.info) params.add emptyNode @@ -146,10 +272,152 @@ proc createCastExpr(argsParam: PSym; objType: PType): PNode = result.typ = newType(tyPtr, objType.owner) result.typ.rawAddSon(objType) -proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = - result = newNodeI(nkStmtList, n.info) - if n.kind notin nkCallKinds or not n.typ.isEmptyType: - localError(n.info, "'spawn' takes a call expression of type void") +proc setupArgsForConcurrency(n: PNode; objType: PType; scratchObj: PSym, + castExpr, call, varSection, result: PNode) = + let formals = n[0].typ.n + let tmpName = getIdent(genPrefix) + for i in 1 .. <n.len: + # we pick n's type here, which hopefully is 'tyArray' and not + # 'tyOpenArray': + var argType = n[i].typ.skipTypes(abstractInst) + if i < formals.len and formals[i].typ.kind == tyVar: + localError(n[i].info, "'spawn'ed function cannot have a 'var' parameter") + elif containsTyRef(argType): + localError(n[i].info, "'spawn'ed function cannot refer to 'ref'/closure") + + let fieldname = if i < formals.len: formals[i].sym.name else: tmpName + var field = newSym(skField, fieldname, objType.owner, n.info) + field.typ = argType + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n[i]) + + let temp = addLocalVar(varSection, objType.owner, argType, + indirectAccess(castExpr, field, n.info)) + call.add(newSymNode(temp)) + +proc getRoot*(n: PNode): PSym = + ## ``getRoot`` takes a *path* ``n``. A path is an lvalue expression + ## like ``obj.x[i].y``. The *root* of a path is the symbol that can be + ## determined as the owner; ``obj`` in the example. + case n.kind + of nkSym: + if n.sym.kind in {skVar, skResult, skTemp, skLet, skForVar}: + result = n.sym + of nkDotExpr, nkBracketExpr, nkHiddenDeref, nkDerefExpr, + nkObjUpConv, nkObjDownConv, nkCheckedFieldExpr: + result = getRoot(n.sons[0]) + of nkHiddenStdConv, nkHiddenSubConv, nkConv: + result = getRoot(n.sons[1]) + of nkCallKinds: + if getMagic(n) == mSlice: result = getRoot(n.sons[1]) + else: discard + +proc newIntLit(value: BiggestInt): PNode = + result = nkIntLit.newIntNode(value) + result.typ = getSysType(tyInt) + +proc genHigh(n: PNode): PNode = + if skipTypes(n.typ, abstractVar).kind in {tyArrayConstr, tyArray}: + result = newIntLit(lastOrd(skipTypes(n.typ, abstractVar))) + else: + result = newNodeI(nkCall, n.info, 2) + result.typ = getSysType(tyInt) + result.sons[0] = newSymNode(createMagic("high", mHigh)) + result.sons[1] = n + +proc setupArgsForParallelism(n: PNode; objType: PType; scratchObj: PSym; + castExpr, call, varSection, result: PNode) = + let formals = n[0].typ.n + let tmpName = getIdent(genPrefix) + # we need to copy the foreign scratch object fields into local variables + # for correctness: These are called 'threadLocal' here. + for i in 1 .. <n.len: + let n = n[i] + let argType = skipTypes(if i < formals.len: formals[i].typ else: n.typ, + abstractInst) + if containsTyRef(argType): + localError(n.info, "'spawn'ed function cannot refer to 'ref'/closure") + + let fieldname = if i < formals.len: formals[i].sym.name else: tmpName + var field = newSym(skField, fieldname, objType.owner, n.info) + + if argType.kind in {tyVarargs, tyOpenArray}: + # important special case: we always create a zero-copy slice: + let slice = newNodeI(nkCall, n.info, 4) + slice.typ = n.typ + slice.sons[0] = newSymNode(createMagic("slice", mSlice)) + var fieldB = newSym(skField, tmpName, objType.owner, n.info) + fieldB.typ = getSysType(tyInt) + objType.addField(fieldB) + + if getMagic(n) == mSlice: + let a = genAddrOf(n[1]) + field.typ = a.typ + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a) + + var fieldA = newSym(skField, tmpName, objType.owner, n.info) + fieldA.typ = getSysType(tyInt) + objType.addField(fieldA) + result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldA), n[2]) + result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldB), n[3]) + + let threadLocal = addLocalVar(varSection, objType.owner, fieldA.typ, + indirectAccess(castExpr, fieldA, n.info)) + slice.sons[2] = threadLocal.newSymNode + else: + let a = genAddrOf(n) + field.typ = a.typ + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a) + result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldB), genHigh(n)) + + slice.sons[2] = newIntLit(0) + # the array itself does not need to go through a thread local variable: + slice.sons[1] = genDeref(indirectAccess(castExpr, field, n.info)) + + let threadLocal = addLocalVar(varSection, objType.owner, fieldB.typ, + indirectAccess(castExpr, fieldB, n.info)) + slice.sons[3] = threadLocal.newSymNode + call.add slice + elif (let size = computeSize(argType); size < 0 or size > 16) and + n.getRoot != nil: + # it is more efficient to pass a pointer instead: + let a = genAddrOf(n) + field.typ = a.typ + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a) + let threadLocal = addLocalVar(varSection, objType.owner, field.typ, + indirectAccess(castExpr, field, n.info)) + call.add(genDeref(threadLocal.newSymNode)) + else: + # boring case + field.typ = argType + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n) + let threadLocal = addLocalVar(varSection, objType.owner, field.typ, + indirectAccess(castExpr, field, n.info)) + call.add(threadLocal.newSymNode) + +proc wrapProcForSpawn*(owner: PSym; spawnExpr: PNode; retType: PType; + barrier, dest: PNode = nil): PNode = + # if 'barrier' != nil, then it is in a 'parallel' section and we + # generate quite different code + let n = spawnExpr[1] + let spawnKind = spawnResult(retType, barrier!=nil) + case spawnKind + of srVoid: + internalAssert dest == nil + result = newNodeI(nkStmtList, n.info) + of srFlowVar: + internalAssert dest == nil + result = newNodeIT(nkStmtListExpr, n.info, retType) + of srByVar: + if dest == nil: localError(n.info, "'spawn' must not be discarded") + result = newNodeI(nkStmtList, n.info) + + if n.kind notin nkCallKinds: + localError(n.info, "'spawn' takes a call expression") return if optThreadAnalysis in gGlobalOptions: if {tfThread, tfNoSideEffect} * n[0].typ.flags == {}: @@ -162,6 +430,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = threadParam.typ = ptrType argsParam.typ = ptrType argsParam.position = 1 + var objType = createObj(owner, n.info) incl(objType.flags, tfFinal) let castExpr = createCastExpr(argsParam, objType) @@ -174,7 +443,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = varSectionB.addVar(scratchObj.newSymNode) result.add varSectionB - var call = newNodeI(nkCall, n.info) + var call = newNodeIT(nkCall, n.info, n.typ) var fn = n.sons[0] # templates and macros are in fact valid here due to the nature of # the transformation: @@ -194,35 +463,44 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = call.add(fn) var varSection = newNodeI(nkVarSection, n.info) - let formals = n[0].typ.n - let tmpName = getIdent(genPrefix) - for i in 1 .. <n.len: - # we pick n's type here, which hopefully is 'tyArray' and not - # 'tyOpenArray': - var argType = n[i].typ.skipTypes(abstractInst) - if i < formals.len and formals[i].typ.kind == tyVar: - localError(n[i].info, "'spawn'ed function cannot have a 'var' parameter") - elif containsTyRef(argType): - localError(n[i].info, "'spawn'ed function cannot refer to 'ref'/closure") + if barrier.isNil: + setupArgsForConcurrency(n, objType, scratchObj, castExpr, call, varSection, result) + else: + setupArgsForParallelism(n, objType, scratchObj, castExpr, call, varSection, result) - let fieldname = if i < formals.len: formals[i].sym.name else: tmpName - var field = newSym(skField, fieldname, owner, n.info) - field.typ = argType + var barrierAsExpr: PNode = nil + if barrier != nil: + let typ = newType(tyPtr, owner) + typ.rawAddSon(magicsys.getCompilerProc("Barrier").typ) + var field = newSym(skField, getIdent"barrier", owner, n.info) + field.typ = typ objType.addField(field) - result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n[i]) - - var temp = newSym(skTemp, tmpName, owner, n.info) - temp.typ = argType - incl(temp.flags, sfFromGeneric) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), barrier) + barrierAsExpr = indirectAccess(castExpr, field, n.info) - var vpart = newNodeI(nkIdentDefs, n.info, 3) - vpart.sons[0] = newSymNode(temp) - vpart.sons[1] = ast.emptyNode - vpart.sons[2] = indirectAccess(castExpr, field, n.info) - varSection.add vpart + var fvField, fvAsExpr: PNode = nil + if spawnKind == srFlowVar: + var field = newSym(skField, getIdent"fv", owner, n.info) + field.typ = retType + objType.addField(field) + fvField = newDotExpr(scratchObj, field) + fvAsExpr = indirectAccess(castExpr, field, n.info) + # create flowVar: + result.add newFastAsgnStmt(fvField, callProc(spawnExpr[2])) + if barrier == nil: + result.add callCodeGenProc("nimFlowVarCreateCondVar", fvField) - call.add(newSymNode(temp)) + elif spawnKind == srByVar: + var field = newSym(skField, getIdent"fv", owner, n.info) + field.typ = newType(tyPtr, objType.owner) + field.typ.rawAddSon(retType) + objType.addField(field) + fvAsExpr = indirectAccess(castExpr, field, n.info) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), genAddrOf(dest)) - let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call) + let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call, + barrierAsExpr, fvAsExpr, spawnKind) result.add callCodeGenProc("nimSpawn", wrapper.newSymNode, genAddrOf(scratchObj.newSymNode)) + + if spawnKind == srFlowVar: result.add fvField diff --git a/compiler/pragmas.nim b/compiler/pragmas.nim index f2d8988ea..a17773aa4 100644 --- a/compiler/pragmas.nim +++ b/compiler/pragmas.nim @@ -644,12 +644,13 @@ proc singlePragma(c: PContext, sym: PSym, n: PNode, i: int, incl(sym.flags, sfNoReturn) of wDynlib: processDynLib(c, it, sym) - of wCompilerproc: + of wCompilerproc: noVal(it) # compilerproc may not get a string! - makeExternExport(sym, "$1", it.info) - incl(sym.flags, sfCompilerProc) - incl(sym.flags, sfUsed) # suppress all those stupid warnings - registerCompilerProc(sym) + if sfFromGeneric notin sym.flags: + makeExternExport(sym, "$1", it.info) + incl(sym.flags, sfCompilerProc) + incl(sym.flags, sfUsed) # suppress all those stupid warnings + registerCompilerProc(sym) of wProcVar: noVal(it) incl(sym.flags, sfProcvar) diff --git a/compiler/sem.nim b/compiler/sem.nim index 0c793eeba..e4ef6473f 100644 --- a/compiler/sem.nim +++ b/compiler/sem.nim @@ -15,7 +15,8 @@ import magicsys, parser, nversion, nimsets, semfold, importer, procfind, lookups, rodread, pragmas, passes, semdata, semtypinst, sigmatch, intsets, transf, vmdef, vm, idgen, aliases, cgmeth, lambdalifting, - evaltempl, patterns, parampatterns, sempass2, pretty, semmacrosanity + evaltempl, patterns, parampatterns, sempass2, pretty, semmacrosanity, + semparallel # implementation diff --git a/compiler/semdata.nim b/compiler/semdata.nim index d2b2f90cd..abecc1b6d 100644 --- a/compiler/semdata.nim +++ b/compiler/semdata.nim @@ -91,6 +91,7 @@ type generics*: seq[TInstantiationPair] # pending list of instantiated generics to compile lastGenericIdx*: int # used for the generics stack hloLoopDetector*: int # used to prevent endless loops in the HLO + inParallelStmt*: int proc makeInstPair*(s: PSym, inst: PInstantiation): TInstantiationPair = result.genericSym = s diff --git a/compiler/semexprs.nim b/compiler/semexprs.nim index 99ffa9b54..078e95fbe 100644 --- a/compiler/semexprs.nim +++ b/compiler/semexprs.nim @@ -1394,11 +1394,6 @@ proc semDefined(c: PContext, n: PNode, onlyCurrentScope: bool): PNode = result.info = n.info result.typ = getSysType(tyBool) -proc setMs(n: PNode, s: PSym): PNode = - result = n - n.sons[0] = newSymNode(s) - n.sons[0].info = n.info - proc expectMacroOrTemplateCall(c: PContext, n: PNode): PSym = ## The argument to the proc should be nkCall(...) or similar ## Returns the macro/template symbol @@ -1590,6 +1585,27 @@ proc semShallowCopy(c: PContext, n: PNode, flags: TExprFlags): PNode = else: result = semDirectOp(c, n, flags) +proc createFlowVar(c: PContext; t: PType; info: TLineInfo): PType = + result = newType(tyGenericInvokation, c.module) + addSonSkipIntLit(result, magicsys.getCompilerProc("FlowVar").typ) + addSonSkipIntLit(result, t) + result = instGenericContainer(c, info, result, allowMetaTypes = false) + +proc instantiateCreateFlowVarCall(c: PContext; t: PType; + info: TLineInfo): PSym = + let sym = magicsys.getCompilerProc("nimCreateFlowVar") + if sym == nil: + localError(info, errSystemNeeds, "nimCreateFlowVar") + var bindings: TIdTable + initIdTable(bindings) + bindings.idTablePut(sym.ast[genericParamsPos].sons[0].typ, t) + result = c.semGenerateInstance(c, sym, bindings, info) + +proc setMs(n: PNode, s: PSym): PNode = + result = n + n.sons[0] = newSymNode(s) + n.sons[0].info = n.info + proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode = # this is a hotspot in the compiler! # DON'T forget to update ast.SpecialSemMagics if you add a magic here! @@ -1611,6 +1627,22 @@ proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode = checkSonsLen(n, 2) result = newStrNodeT(renderTree(n[1], {renderNoComments}), n) result.typ = getSysType(tyString) + of mParallel: + result = setMs(n, s) + var x = n.lastSon + if x.kind == nkDo: x = x.sons[bodyPos] + inc c.inParallelStmt + result.sons[1] = semStmt(c, x) + dec c.inParallelStmt + of mSpawn: + result = setMs(n, s) + result.sons[1] = semExpr(c, n.sons[1]) + if not result[1].typ.isEmptyType: + if c.inParallelStmt > 0: + result.typ = result[1].typ + else: + result.typ = createFlowVar(c, result[1].typ, n.info) + result.add instantiateCreateFlowVarCall(c, result[1].typ, n.info).newSymNode else: result = semDirectOp(c, n, flags) proc semWhen(c: PContext, n: PNode, semCheck = true): PNode = diff --git a/compiler/semmagic.nim b/compiler/semmagic.nim index 4caf1fb8e..f943e7006 100644 --- a/compiler/semmagic.nim +++ b/compiler/semmagic.nim @@ -1,7 +1,7 @@ # # # The Nimrod Compiler -# (c) Copyright 2013 Andreas Rumpf +# (c) Copyright 2014 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -131,4 +131,3 @@ proc magicsAfterOverloadResolution(c: PContext, n: PNode, of mNBindSym: result = semBindSym(c, n) of mLocals: result = semLocals(c, n) else: result = n - diff --git a/compiler/semparallel.nim b/compiler/semparallel.nim new file mode 100644 index 000000000..c594a4788 --- /dev/null +++ b/compiler/semparallel.nim @@ -0,0 +1,465 @@ +# +# +# The Nimrod Compiler +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Semantic checking for 'parallel'. + +# - codegen needs to support mSlice (+) +# - lowerings must not perform unnecessary copies (+) +# - slices should become "nocopy" to openArray (+) +# - need to perform bound checks (+) +# +# - parallel needs to insert a barrier (+) +# - passed arguments need to be ensured to be "const" +# - what about 'f(a)'? --> f shouldn't have side effects anyway +# - passed arrays need to be ensured not to alias +# - passed slices need to be ensured to be disjoint (+) +# - output slices need special logic (+) + +import + ast, astalgo, idents, lowerings, magicsys, guards, sempass2, msgs, + renderer +from trees import getMagic +from strutils import `%` + +discard """ + +one major problem: + spawn f(a[i]) + inc i + spawn f(a[i]) +is valid, but + spawn f(a[i]) + spawn f(a[i]) + inc i +is not! However, + spawn f(a[i]) + if guard: inc i + spawn f(a[i]) +is not valid either! --> We need a flow dependent analysis here. + +However: + while foo: + spawn f(a[i]) + inc i + spawn f(a[i]) + +Is not valid either! --> We should really restrict 'inc' to loop endings? + +The heuristic that we implement here (that has no false positives) is: Usage +of 'i' in a slice *after* we determined the stride is invalid! +""" + +type + TDirection = enum + ascending, descending + MonotonicVar = object + v, alias: PSym # to support the ordinary 'countup' iterator + # we need to detect aliases + lower, upper, stride: PNode + dir: TDirection + blacklisted: bool # blacklisted variables that are not monotonic + AnalysisCtx = object + locals: seq[MonotonicVar] + slices: seq[tuple[x,a,b: PNode, spawnId: int, inLoop: bool]] + guards: TModel # nested guards + args: seq[PSym] # args must be deeply immutable + spawns: int # we can check that at last 1 spawn is used in + # the 'parallel' section + currentSpawnId: int + inLoop: int + +let opSlice = createMagic("slice", mSlice) + +proc initAnalysisCtx(): AnalysisCtx = + result.locals = @[] + result.slices = @[] + result.args = @[] + result.guards = @[] + +proc lookupSlot(c: AnalysisCtx; s: PSym): int = + for i in 0.. <c.locals.len: + if c.locals[i].v == s or c.locals[i].alias == s: return i + return -1 + +proc getSlot(c: var AnalysisCtx; v: PSym): ptr MonotonicVar = + let s = lookupSlot(c, v) + if s >= 0: return addr(c.locals[s]) + let L = c.locals.len + c.locals.setLen(L+1) + c.locals[L].v = v + return addr(c.locals[L]) + +proc gatherArgs(c: var AnalysisCtx; n: PNode) = + for i in 0.. <n.safeLen: + let root = getRoot n[i] + if root != nil: + block addRoot: + for r in items(c.args): + if r == root: break addRoot + c.args.add root + gatherArgs(c, n[i]) + +proc isSingleAssignable(n: PNode): bool = + n.kind == nkSym and (let s = n.sym; + s.kind in {skTemp, skForVar, skLet} and + {sfAddrTaken, sfGlobal} * s.flags == {}) + +proc isLocal(n: PNode): bool = + n.kind == nkSym and (let s = n.sym; + s.kind in {skResult, skTemp, skForVar, skVar, skLet} and + {sfAddrTaken, sfGlobal} * s.flags == {}) + +proc checkLocal(c: AnalysisCtx; n: PNode) = + if isLocal(n): + let s = c.lookupSlot(n.sym) + if s >= 0 and c.locals[s].stride != nil: + localError(n.info, "invalid usage of counter after increment") + else: + for i in 0 .. <n.safeLen: checkLocal(c, n.sons[i]) + +template `?`(x): expr = x.renderTree + +proc checkLe(c: AnalysisCtx; a, b: PNode) = + case proveLe(c.guards, a, b) + of impUnknown: + localError(a.info, "cannot prove: " & ?a & " <= " & ?b) + of impYes: discard + of impNo: + localError(a.info, "can prove: " & ?a & " > " & ?b) + +proc checkBounds(c: AnalysisCtx; arr, idx: PNode) = + checkLe(c, arr.lowBound, idx) + checkLe(c, idx, arr.highBound) + +proc addLowerBoundAsFacts(c: var AnalysisCtx) = + for v in c.locals: + if not v.blacklisted: + c.guards.addFactLe(v.lower, newSymNode(v.v)) + +proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: PNode) = + checkLocal(c, n) + let le = le.canon + let ri = ri.canon + # perform static bounds checking here; and not later! + let oldState = c.guards.len + addLowerBoundAsFacts(c) + c.checkBounds(x, le) + c.checkBounds(x, ri) + c.guards.setLen(oldState) + c.slices.add((x, le, ri, c.currentSpawnId, c.inLoop > 0)) + +proc overlap(m: TModel; x,y,c,d: PNode) = + # X..Y and C..D overlap iff (X <= D and C <= Y) + case proveLe(m, x, d) + of impUnknown: + localError(x.info, + "cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" % + [?x, ?d, ?x, ?y, ?c, ?d]) + of impYes: + case proveLe(m, c, y) + of impUnknown: + localError(x.info, + "cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" % + [?c, ?y, ?x, ?y, ?c, ?d]) + of impYes: + localError(x.info, "($#)..($#) not disjoint from ($#)..($#)" % [?x, ?y, ?c, ?d]) + of impNo: discard + of impNo: discard + +proc stride(c: AnalysisCtx; n: PNode): BiggestInt = + if isLocal(n): + let s = c.lookupSlot(n.sym) + if s >= 0 and c.locals[s].stride != nil: + result = c.locals[s].stride.intVal + else: + for i in 0 .. <n.safeLen: result += stride(c, n.sons[i]) + +proc subStride(c: AnalysisCtx; n: PNode): PNode = + # substitute with stride: + if isLocal(n): + let s = c.lookupSlot(n.sym) + if s >= 0 and c.locals[s].stride != nil: + result = n +@ c.locals[s].stride.intVal + else: + result = n + elif n.safeLen > 0: + result = shallowCopy(n) + for i in 0 .. <n.len: result.sons[i] = subStride(c, n.sons[i]) + else: + result = n + +proc checkSlicesAreDisjoint(c: var AnalysisCtx) = + # this is the only thing that we need to perform after we have traversed + # the whole tree so that the strides are available. + # First we need to add all the computed lower bounds: + addLowerBoundAsFacts(c) + # Every slice used in a loop needs to be disjoint with itself: + for x,a,b,id,inLoop in items(c.slices): + if inLoop: overlap(c.guards, a,b, c.subStride(a), c.subStride(b)) + # Another tricky example is: + # while true: + # spawn f(a[i]) + # spawn f(a[i+1]) + # inc i # inc i, 2 would be correct here + # + # Or even worse: + # while true: + # spawn f(a[i+1 .. i+3]) + # spawn f(a[i+4 .. i+5]) + # inc i, 4 + # Prove that i*k*stride + 3 != i*k'*stride + 5 + # For the correct example this amounts to + # i*k*2 != i*k'*2 + 1 + # which is true. + # For now, we don't try to prove things like that at all, even though it'd + # be feasible for many useful examples. Instead we attach the slice to + # a spawn and if the attached spawns differ, we bail out: + for i in 0 .. high(c.slices): + for j in i+1 .. high(c.slices): + let x = c.slices[i] + let y = c.slices[j] + if x.spawnId != y.spawnId and guards.sameTree(x.x, y.x): + if not x.inLoop or not y.inLoop: + # XXX strictly speaking, 'or' is not correct here and it needs to + # be 'and'. However this prevents too many obviously correct programs + # like f(a[0..x]); for i in x+1 .. a.high: f(a[i]) + overlap(c.guards, x.a, x.b, y.a, y.b) + elif (let k = simpleSlice(x.a, x.b); let m = simpleSlice(y.a, y.b); + k >= 0 and m >= 0): + # ah I cannot resist the temptation and add another sweet heuristic: + # if both slices have the form (i+k)..(i+k) and (i+m)..(i+m) we + # check they are disjoint and k < stride and m < stride: + overlap(c.guards, x.a, x.b, y.a, y.b) + let stride = min(c.stride(x.a), c.stride(y.a)) + if k < stride and m < stride: + discard + else: + localError(x.x.info, "cannot prove ($#)..($#) disjoint from ($#)..($#)" % + [?x.a, ?x.b, ?y.a, ?y.b]) + else: + localError(x.x.info, "cannot prove ($#)..($#) disjoint from ($#)..($#)" % + [?x.a, ?x.b, ?y.a, ?y.b]) + +proc analyse(c: var AnalysisCtx; n: PNode) + +proc analyseSons(c: var AnalysisCtx; n: PNode) = + for i in 0 .. <safeLen(n): analyse(c, n[i]) + +proc min(a, b: PNode): PNode = + if a.isNil: result = b + elif a.intVal < b.intVal: result = a + else: result = b + +proc fromSystem(op: PSym): bool = sfSystemModule in getModule(op).flags + +proc analyseCall(c: var AnalysisCtx; n: PNode; op: PSym) = + if op.magic == mSpawn: + inc c.spawns + let oldSpawnId = c.currentSpawnId + c.currentSpawnId = c.spawns + gatherArgs(c, n[1]) + analyseSons(c, n) + c.currentSpawnId = oldSpawnId + elif op.magic == mInc or (op.name.s == "+=" and op.fromSystem): + if n[1].isLocal: + let incr = n[2].skipConv + if incr.kind in {nkCharLit..nkUInt32Lit} and incr.intVal > 0: + let slot = c.getSlot(n[1].sym) + slot.stride = min(slot.stride, incr) + analyseSons(c, n) + elif op.name.s == "[]" and op.fromSystem: + c.addSlice(n, n[1], n[2][1], n[2][2]) + analyseSons(c, n) + elif op.name.s == "[]=" and op.fromSystem: + c.addSlice(n, n[1], n[2][1], n[2][2]) + analyseSons(c, n) + else: + analyseSons(c, n) + +proc analyseCase(c: var AnalysisCtx; n: PNode) = + analyse(c, n.sons[0]) + let oldFacts = c.guards.len + for i in 1.. <n.len: + let branch = n.sons[i] + setLen(c.guards, oldFacts) + addCaseBranchFacts(c.guards, n, i) + for i in 0 .. <branch.len: + analyse(c, branch.sons[i]) + setLen(c.guards, oldFacts) + +proc analyseIf(c: var AnalysisCtx; n: PNode) = + analyse(c, n.sons[0].sons[0]) + let oldFacts = c.guards.len + addFact(c.guards, canon(n.sons[0].sons[0])) + + analyse(c, n.sons[0].sons[1]) + for i in 1.. <n.len: + let branch = n.sons[i] + setLen(c.guards, oldFacts) + for j in 0..i-1: + addFactNeg(c.guards, canon(n.sons[j].sons[0])) + if branch.len > 1: + addFact(c.guards, canon(branch.sons[0])) + for i in 0 .. <branch.len: + analyse(c, branch.sons[i]) + setLen(c.guards, oldFacts) + +proc analyse(c: var AnalysisCtx; n: PNode) = + case n.kind + of nkAsgn, nkFastAsgn: + if n[0].isSingleAssignable and n[1].isLocal: + let slot = c.getSlot(n[1].sym) + slot.alias = n[0].sym + elif n[0].isLocal: + # since we already ensure sfAddrTaken is not in s.flags, we only need to + # prevent direct assignments to the monotonic variable: + let slot = c.getSlot(n[0].sym) + slot.blackListed = true + invalidateFacts(c.guards, n[0]) + analyseSons(c, n) + addAsgnFact(c.guards, n[0], n[1]) + of nkCallKinds: + # direct call: + if n[0].kind == nkSym: analyseCall(c, n, n[0].sym) + else: analyseSons(c, n) + of nkBracketExpr: + c.addSlice(n, n[0], n[1], n[1]) + analyseSons(c, n) + of nkReturnStmt, nkRaiseStmt, nkTryStmt: + localError(n.info, "invalid control flow for 'parallel'") + # 'break' that leaves the 'parallel' section is not valid either + # or maybe we should generate a 'try' XXX + of nkVarSection: + for it in n: + let value = it.lastSon + if value.kind != nkEmpty: + for j in 0 .. it.len-3: + if it[j].isLocal: + let slot = c.getSlot(it[j].sym) + if slot.lower.isNil: slot.lower = value + else: internalError(it.info, "slot already has a lower bound") + analyse(c, value) + of nkCaseStmt: analyseCase(c, n) + of nkIfStmt, nkIfExpr: analyseIf(c, n) + of nkWhileStmt: + analyse(c, n.sons[0]) + # 'while true' loop? + inc c.inLoop + if isTrue(n.sons[0]): + analyseSons(c, n.sons[1]) + else: + # loop may never execute: + let oldState = c.locals.len + let oldFacts = c.guards.len + addFact(c.guards, canon(n.sons[0])) + analyse(c, n.sons[1]) + setLen(c.locals, oldState) + setLen(c.guards, oldFacts) + # we know after the loop the negation holds: + if not hasSubnodeWith(n.sons[1], nkBreakStmt): + addFactNeg(c.guards, canon(n.sons[0])) + dec c.inLoop + of nkTypeSection, nkProcDef, nkConverterDef, nkMethodDef, nkIteratorDef, + nkMacroDef, nkTemplateDef, nkConstSection, nkPragma: + discard + else: + analyseSons(c, n) + +proc transformSlices(n: PNode): PNode = + if n.kind in nkCallKinds and n[0].kind == nkSym: + let op = n[0].sym + if op.name.s == "[]" and op.fromSystem: + result = copyNode(n) + result.add opSlice.newSymNode + result.add n[1] + result.add n[2][1] + result.add n[2][2] + return result + if n.safeLen > 0: + result = shallowCopy(n) + for i in 0 .. < n.len: + result.sons[i] = transformSlices(n.sons[i]) + else: + result = n + +proc transformSpawn(owner: PSym; n, barrier: PNode): PNode +proc transformSpawnSons(owner: PSym; n, barrier: PNode): PNode = + result = shallowCopy(n) + for i in 0 .. < n.len: + result.sons[i] = transformSpawn(owner, n.sons[i], barrier) + +proc transformSpawn(owner: PSym; n, barrier: PNode): PNode = + case n.kind + of nkVarSection: + result = nil + for it in n: + let b = it.lastSon + if getMagic(b) == mSpawn: + if it.len != 3: localError(it.info, "invalid context for 'spawn'") + let m = transformSlices(b) + if result.isNil: + result = newNodeI(nkStmtList, n.info) + result.add n + result.add wrapProcForSpawn(owner, m, b.typ, barrier, it[0]) + it.sons[it.len-1] = emptyNode + if result.isNil: result = n + of nkAsgn, nkFastAsgn: + let b = n[1] + if getMagic(b) == mSpawn: + let m = transformSlices(b) + return wrapProcForSpawn(owner, m, b.typ, barrier, n[0]) + result = transformSpawnSons(owner, n, barrier) + of nkCallKinds: + if getMagic(n) == mSpawn: + result = transformSlices(n) + return wrapProcForSpawn(owner, result, n.typ, barrier, nil) + result = transformSpawnSons(owner, n, barrier) + elif n.safeLen > 0: + result = transformSpawnSons(owner, n, barrier) + else: + result = n + +proc checkArgs(a: var AnalysisCtx; n: PNode) = + discard "too implement" + +proc generateAliasChecks(a: AnalysisCtx; result: PNode) = + discard "too implement" + +proc liftParallel*(owner: PSym; n: PNode): PNode = + # this needs to be called after the 'for' loop elimination + + # first pass: + # - detect monotonic local integer variables + # - detect used slices + # - detect used arguments + #echo "PAR ", renderTree(n) + + var a = initAnalysisCtx() + let body = n.lastSon + analyse(a, body) + if a.spawns == 0: + localError(n.info, "'parallel' section without 'spawn'") + checkSlicesAreDisjoint(a) + checkArgs(a, body) + + var varSection = newNodeI(nkVarSection, n.info) + var temp = newSym(skTemp, getIdent"barrier", owner, n.info) + temp.typ = magicsys.getCompilerProc("Barrier").typ + incl(temp.flags, sfFromGeneric) + let tempNode = newSymNode(temp) + varSection.addVar tempNode + + let barrier = genAddrOf(tempNode) + result = newNodeI(nkStmtList, n.info) + generateAliasChecks(a, result) + result.add varSection + result.add callCodeGenProc("openBarrier", barrier) + result.add transformSpawn(owner, body, barrier) + result.add callCodeGenProc("closeBarrier", barrier) + diff --git a/compiler/sempass2.nim b/compiler/sempass2.nim index 6afde5f05..c8ce5e787 100644 --- a/compiler/sempass2.nim +++ b/compiler/sempass2.nim @@ -89,7 +89,7 @@ proc initVarViaNew(a: PEffects, n: PNode) = if n.kind != nkSym: return let s = n.sym if {tfNeedsInit, tfNotNil} * s.typ.flags <= {tfNotNil}: - # 'x' is not nil, but that doesn't mean it's not nil children + # 'x' is not nil, but that doesn't mean its "not nil" children # are initialized: initVar(a, n) @@ -478,7 +478,7 @@ proc trackBlock(tracked: PEffects, n: PNode) = else: track(tracked, n) -proc isTrue(n: PNode): bool = +proc isTrue*(n: PNode): bool = n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or n.kind == nkIntLit and n.intVal != 0 diff --git a/compiler/semtypes.nim b/compiler/semtypes.nim index 2dcca8f1e..9d38c4619 100644 --- a/compiler/semtypes.nim +++ b/compiler/semtypes.nim @@ -1085,8 +1085,10 @@ proc semTypeNode(c: PContext, n: PNode, prev: PType): PType = of nkCallKinds: if isRange(n): result = semRangeAux(c, n, prev) - elif n[0].kind == nkIdent: - let op = n.sons[0].ident + elif n[0].kind notin nkIdentKinds: + result = semTypeExpr(c, n) + else: + let op = considerQuotedIdent(n.sons[0]) if op.id in {ord(wAnd), ord(wOr)} or op.s == "|": checkSonsLen(n, 3) var @@ -1121,8 +1123,6 @@ proc semTypeNode(c: PContext, n: PNode, prev: PType): PType = result = semAnyRef(c, n, tyRef, prev) else: result = semTypeExpr(c, n) - else: - result = semTypeExpr(c, n) of nkWhenStmt: var whenResult = semWhen(c, n, false) if whenResult.kind == nkStmtList: whenResult.kind = nkStmtListType diff --git a/compiler/suggest.nim b/compiler/suggest.nim index a46c6a082..db95c480f 100644 --- a/compiler/suggest.nim +++ b/compiler/suggest.nim @@ -322,7 +322,7 @@ proc suggestSym*(n: PNode, s: PSym) {.inline.} = findUsages(n, s) if optDef in gGlobalOptions: findDefinition(n, s) - if isServing: + if isServing and not n.isNil: addToSourceMap(s, n.info) proc markUsed(n: PNode, s: PSym) = diff --git a/compiler/vm.nim b/compiler/vm.nim index 218369fa1..0c2c23987 100644 --- a/compiler/vm.nim +++ b/compiler/vm.nim @@ -131,8 +131,9 @@ proc createStrKeepNode(x: var TFullReg) = nfAllConst in x.node.flags: # XXX this is hacky; tests/txmlgen triggers it: x.node = newNode(nkStrLit) - # debug x.node - #assert x.node.kind in {nkStrLit..nkTripleStrLit} + # It not only hackey, it is also wrong for tgentemplate. The primary + # cause of bugs like these is that the VM does not properly distinguish + # between variable defintions (var foo = e) and variable updates (foo = e). template createStr(x) = x.node = newNode(nkStrLit) diff --git a/config/nimrod.cfg b/config/nimrod.cfg index 2817eac55..df3835ace 100644 --- a/config/nimrod.cfg +++ b/config/nimrod.cfg @@ -16,6 +16,7 @@ arm.linux.gcc.linkerexe = "arm-linux-gcc" path="$lib/core" path="$lib/pure" path="$lib/pure/collections" +path="$lib/pure/concurrency" path="$lib/impure" path="$lib/wrappers" # path="$lib/wrappers/cairo" diff --git a/doc/manual.txt b/doc/manual.txt index 3af41faec..e96e50999 100644 --- a/doc/manual.txt +++ b/doc/manual.txt @@ -2823,7 +2823,7 @@ The following builtin procs cannot be overloaded for reasons of implementation simplicity (they require specialized semantic checking):: defined, definedInScope, compiles, low, high, sizeOf, - is, of, echo, shallowCopy, getAst + is, of, echo, shallowCopy, getAst, spawn Thus they act more like keywords than like ordinary identifiers; unlike a keyword however, a redefinition may `shadow`:idx: the definition in diff --git a/doc/spawn.txt b/doc/spawn.txt new file mode 100644 index 000000000..ed500f3a5 --- /dev/null +++ b/doc/spawn.txt @@ -0,0 +1,98 @@ +========================================================== + Parallel & Spawn +========================================================== + +Nimrod has two flavors of parallelism: +1) `Structured`:idx parallelism via the ``parallel`` statement. +2) `Unstructured`:idx: parallelism via the standalone ``spawn`` statement. + +Both need the `threadpool <threadpool.html>`_ module to work. + +Somewhat confusingly, ``spawn`` is also used in the ``parallel`` statement +with slightly different semantics. ``spawn`` always takes a call expression of +the form ``f(a, ...)``. Let ``T`` be ``f``'s return type. If ``T`` is ``void`` +then ``spawn``'s return type is also ``void``. Within a ``parallel`` section +``spawn``'s return type is ``T``, otherwise it is ``FlowVar[T]``. + +The compiler can ensure the location in ``location = spawn f(...)`` is not +read prematurely within a ``parallel`` section and so there is no need for +the overhead of an indirection via ``FlowVar[T]`` to ensure correctness. + + +Parallel statement +================== + +Example: + +.. code-block:: nimrod + # Compute PI in an inefficient way + import strutils, math, threadpool + + proc term(k: float): float = 4 * math.pow(-1, k) / (2*k + 1) + + proc pi(n: int): float = + var ch = newSeq[float](n+1) + parallel: + for k in 0..ch.high: + ch[k] = spawn term(float(k)) + for k in 0..ch.high: + result += ch[k] + + echo formatFloat(pi(5000)) + + +The parallel statement is the preferred mechanism to introduce parallelism +in a Nimrod program. A subset of the Nimrod language is valid within a +``parallel`` section. This subset is checked to be free of data races at +compile time. A sophisticated `disjoint checker`:idx: ensures that no data +races are possible even though shared memory is extensively supported! + +The subset is in fact the full language with the following +restrictions / changes: + +* ``spawn`` within a ``parallel`` section has special semantics. +* Every location of the form ``a[i]`` and ``a[i..j]`` and ``dest`` where + ``dest`` is part of the pattern ``dest = spawn f(...)`` has to be + provable disjoint. This is called the *disjoint check*. +* Every other complex location ``loc`` that is used in a spawned + proc (``spawn f(loc)``) has to be immutable for the duration of + the ``parallel`` section. This is called the *immutability check*. Currently + it is not specified what exactly "complex location" means. We need to make + this an optimization! +* Every array access has to be provable within bounds. This is called + the *bounds check*. +* Slices are optimized so that no copy is performed. This optimization is not + yet performed for ordinary slices outside of a ``parallel`` section. Slices + are also special in that they currently do not support negative indexes! + + + + +Spawn statement +=============== + +A standalone ``spawn`` statement is a simple construct. It executes +the passed expression on the thread pool and returns a `data flow variable`:idx: +``FlowVar[T]`` that can be read from. The reading with the ``^`` operator is +**blocking**. However, one can use ``awaitAny`` to wait on multiple flow +variables at the same time: + +.. code-block:: nimrod + import threadpool, ... + + # wait until 2 out of 3 servers received the update: + proc main = + var responses = newSeq[RawFlowVar](3) + for i in 0..2: + responses[i] = spawn tellServer(Update, "key", "value") + var index = awaitAny(responses) + assert index >= 0 + responses.del(index) + discard awaitAny(responses) + +Like the ``parallel`` statement data flow variables ensure that no data races +are possible. Due to technical limitations not every type ``T`` is possible in +a data flow variable: ``T`` has to be of the type ``ref``, ``string``, ``seq`` +or of a type that doesn't contain a type that is garbage collected. This +restriction will be removed in the future. + diff --git a/doc/tut1.txt b/doc/tut1.txt index 8c6f140eb..9874f267b 100644 --- a/doc/tut1.txt +++ b/doc/tut1.txt @@ -1385,8 +1385,8 @@ Tuples A tuple type defines various named *fields* and an *order* of the fields. The constructor ``()`` can be used to construct tuples. The order of the fields in the constructor must match the order in the tuple's definition. -Different tuple-types are *equivalent* if they specify the same fields of -the same type in the same order. +Different tuple-types are *equivalent* if they specify fields of +the same type and of the same name in the same order. The assignment operator for tuples copies each component. The notation ``t.field`` is used to access a tuple's field. Another notation is diff --git a/lib/pure/asyncdispatch.nim b/lib/pure/asyncdispatch.nim index 87ee83ad9..6d9e605f1 100644 --- a/lib/pure/asyncdispatch.nim +++ b/lib/pure/asyncdispatch.nim @@ -27,9 +27,11 @@ export TPort # TODO: Discarded void PFutures need to checked for exception. -# TODO: Exceptions are currently uncatchable due to the limitation that -# you cannot have yield in a try stmt. Perhaps I can get the macro to put -# a user's try except around ``future.read``. +# TODO: ``except`` statement (without `try`) does not work. +# TODO: Multiple exception names in a ``except`` don't work. +# TODO: The effect system (raises: []) has trouble with my try transformation. +# TODO: Can't await in a 'except' body + # -- Futures @@ -922,14 +924,17 @@ proc getName(node: PNimrodNode): string {.compileTime.} = return $node[1].ident of nnkIdent: return $node.ident + of nnkEmpty: + return "anonymous" else: - assert false + error("Unknown name.") macro async*(prc: stmt): stmt {.immediate.} = ## Macro which processes async procedures into the appropriate ## iterators and yield statements. - - expectKind(prc, nnkProcDef) + if prc.kind notin {nnkProcDef, nnkLambda}: + error("Cannot transform this node kind into an async proc." & + " Proc definition or lambda node expected.") hint("Processing " & prc[0].getName & " as an async proc.") @@ -941,7 +946,9 @@ macro async*(prc: stmt): stmt {.immediate.} = if $returnType[0] != "PFuture": error("Expected return type of 'PFuture' got '" & $returnType[0] & "'") - let subtypeIsVoid = returnType.kind == nnkEmpty + let subtypeIsVoid = returnType.kind == nnkEmpty or + (returnType.kind == nnkBracketExpr and + returnType[1].kind == nnkIdent and returnType[1].ident == !"void") var outerProcBody = newNimNode(nnkStmtList) @@ -990,17 +997,19 @@ macro async*(prc: stmt): stmt {.immediate.} = # Remove the 'async' pragma. for i in 0 .. <result[4].len: - if result[4][i].ident == !"async": + if result[4][i].kind == nnkIdent and result[4][i].ident == !"async": result[4].del(i) if subtypeIsVoid: # Add discardable pragma. - result[4].add(newIdentNode("discardable")) + if prc.kind == nnkProcDef: # TODO: This is a workaround for #1287 + result[4].add(newIdentNode("discardable")) if returnType.kind == nnkEmpty: # Add PFuture[void] result[3][0] = parseExpr("PFuture[void]") result[6] = outerProcBody + #echo(treeRepr(result)) #echo(toStrLit(result)) proc recvLine*(socket: TAsyncFD): PFuture[string] {.async.} = diff --git a/lib/pure/asynchttpserver.nim b/lib/pure/asynchttpserver.nim index 6c2414d99..005c56ebc 100644 --- a/lib/pure/asynchttpserver.nim +++ b/lib/pure/asynchttpserver.nim @@ -14,12 +14,13 @@ import strtabs, asyncnet, asyncdispatch, parseutils, parseurl, strutils type TRequest* = object - client: PAsyncSocket # TODO: Separate this into a Response object? + client*: PAsyncSocket # TODO: Separate this into a Response object? reqMethod*: string headers*: PStringTable protocol*: tuple[orig: string, major, minor: int] url*: TURL hostname*: string ## The hostname of the client that made the request. + body*: string # TODO PAsyncHttpServer* = ref object socket: PAsyncSocket @@ -169,6 +170,10 @@ proc serve*(server: PAsyncHttpServer, port: TPort, var fut = await server.socket.acceptAddr() processClient(fut.client, fut.address, callback) +proc close*(server: PAsyncHttpServer) = + ## Terminates the async http server instance. + server.socket.close() + when isMainModule: var server = newAsyncHttpServer() proc cb(req: TRequest) {.async.} = diff --git a/lib/pure/concurrency/cpuinfo.nim b/lib/pure/concurrency/cpuinfo.nim new file mode 100644 index 000000000..dfa819f64 --- /dev/null +++ b/lib/pure/concurrency/cpuinfo.nim @@ -0,0 +1,58 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements procs to determine the number of CPUs / cores. + +include "system/inclrtl" + +import strutils, os + +when not defined(windows): + import posix + +when defined(linux): + import linux + +when defined(macosx) or defined(bsd): + const + CTL_HW = 6 + HW_AVAILCPU = 25 + HW_NCPU = 3 + proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer, + a: var csize, b: pointer, c: int): cint {. + importc: "sysctl", header: "<sys/sysctl.h>".} + +proc countProcessors*(): int {.rtl, extern: "ncpi$1".} = + ## returns the numer of the processors/cores the machine has. + ## Returns 0 if it cannot be detected. + when defined(windows): + var x = getEnv("NUMBER_OF_PROCESSORS") + if x.len > 0: result = parseInt(x.string) + elif defined(macosx) or defined(bsd): + var + mib: array[0..3, cint] + numCPU: int + len: csize + mib[0] = CTL_HW + mib[1] = HW_AVAILCPU + len = sizeof(numCPU) + discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) + if numCPU < 1: + mib[1] = HW_NCPU + discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) + result = numCPU + elif defined(hpux): + result = mpctl(MPC_GETNUMSPUS, nil, nil) + elif defined(irix): + var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint + result = sysconf(SC_NPROC_ONLN) + else: + result = sysconf(SC_NPROCESSORS_ONLN) + if result <= 0: result = 1 + diff --git a/lib/pure/concurrency/cpuload.nim b/lib/pure/concurrency/cpuload.nim new file mode 100644 index 000000000..3cf6a7392 --- /dev/null +++ b/lib/pure/concurrency/cpuload.nim @@ -0,0 +1,96 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a helper for a thread pool to determine whether +## creating a thread is a good idea. + +when defined(windows): + import winlean, os, strutils, math + + proc `-`(a, b: TFILETIME): int64 = a.rdFileTime - b.rdFileTime +elif defined(linux): + from cpuinfo import countProcessors + +type + ThreadPoolAdvice* = enum + doNothing, + doCreateThread, # create additional thread for throughput + doShutdownThread # too many threads are busy, shutdown one + + ThreadPoolState* = object + when defined(windows): + prevSysKernel, prevSysUser, prevProcKernel, prevProcUser: TFILETIME + calls*: int + +proc advice*(s: var ThreadPoolState): ThreadPoolAdvice = + when defined(windows): + var + sysIdle, sysKernel, sysUser, + procCreation, procExit, procKernel, procUser: TFILETIME + if getSystemTimes(sysIdle, sysKernel, sysUser) == 0 or + getProcessTimes(THandle(-1), procCreation, procExit, + procKernel, procUser) == 0: + return doNothing + if s.calls > 0: + let + sysKernelDiff = sysKernel - s.prevSysKernel + sysUserDiff = sysUser - s.prevSysUser + + procKernelDiff = procKernel - s.prevProcKernel + procUserDiff = procUser - s.prevProcUser + + sysTotal = int(sysKernelDiff + sysUserDiff) + procTotal = int(procKernelDiff + procUserDiff) + # total CPU usage < 85% --> create a new worker thread. + # Measurements show that 100% and often even 90% is not reached even + # if all my cores are busy. + if sysTotal == 0 or procTotal / sysTotal < 0.85: + result = doCreateThread + s.prevSysKernel = sysKernel + s.prevSysUser = sysUser + s.prevProcKernel = procKernel + s.prevProcUser = procUser + elif defined(linux): + proc fscanf(c: TFile, frmt: cstring) {.varargs, importc, + header: "<stdio.h>".} + + var f = open("/proc/loadavg") + var b: float + var busy, total: int + fscanf(f,"%lf %lf %lf %ld/%ld", + addr b, addr b, addr b, addr busy, addr total) + f.close() + let cpus = countProcessors() + if busy-1 < cpus: + result = doCreateThread + elif busy-1 >= cpus*2: + result = doShutdownThread + else: + result = doNothing + else: + # XXX implement this for other OSes + result = doNothing + inc s.calls + +when isMainModule: + proc busyLoop() = + while true: + discard random(80) + os.sleep(100) + + spawn busyLoop() + spawn busyLoop() + spawn busyLoop() + spawn busyLoop() + + var s: ThreadPoolState + + for i in 1 .. 70: + echo advice(s) + os.sleep(1000) diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim new file mode 100644 index 000000000..fd1041918 --- /dev/null +++ b/lib/pure/concurrency/threadpool.nim @@ -0,0 +1,378 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Implements Nimrod's 'spawn'. + +import cpuinfo, cpuload, locks + +{.push stackTrace:off.} + +type + CondVar = object + c: TCond + L: TLock + counter: int + +proc createCondVar(): CondVar = + initCond(result.c) + initLock(result.L) + +proc destroyCondVar(cv: var CondVar) {.inline.} = + deinitCond(cv.c) + deinitLock(cv.L) + +proc await(cv: var CondVar) = + acquire(cv.L) + while cv.counter <= 0: + wait(cv.c, cv.L) + dec cv.counter + release(cv.L) + +proc signal(cv: var CondVar) = + acquire(cv.L) + inc cv.counter + release(cv.L) + signal(cv.c) + +const CacheLineSize = 32 # true for most archs + +type + Barrier {.compilerProc.} = object + entered: int + cv: CondVar # condvar takes 3 words at least + when sizeof(int) < 8: + cacheAlign: array[CacheLineSize-4*sizeof(int), byte] + left: int + cacheAlign2: array[CacheLineSize-sizeof(int), byte] + interest: bool ## wether the master is interested in the "all done" event + +proc barrierEnter(b: ptr Barrier) {.compilerProc, inline.} = + # due to the signaling between threads, it is ensured we are the only + # one with access to 'entered' so we don't need 'atomicInc' here: + inc b.entered + # also we need no 'fence' instructions here as soon 'nimArgsPassingDone' + # will be called which already will perform a fence for us. + +proc barrierLeave(b: ptr Barrier) {.compilerProc, inline.} = + atomicInc b.left + when not defined(x86): fence() + if b.interest and b.left == b.entered: signal(b.cv) + +proc openBarrier(b: ptr Barrier) {.compilerProc, inline.} = + b.entered = 0 + b.left = 0 + b.interest = false + +proc closeBarrier(b: ptr Barrier) {.compilerProc.} = + fence() + if b.left != b.entered: + b.cv = createCondVar() + fence() + b.interest = true + fence() + while b.left != b.entered: await(b.cv) + destroyCondVar(b.cv) + +{.pop.} + +# ---------------------------------------------------------------------------- + +type + foreign* = object ## a region that indicates the pointer comes from a + ## foreign thread heap. + AwaitInfo = object + cv: CondVar + idx: int + + FlowVarBase* = ref FlowVarBaseObj ## untyped base class for 'FlowVar[T]' + FlowVarBaseObj = object of TObject + ready, usesCondVar: bool + cv: CondVar #\ + # for 'awaitAny' support + ai: ptr AwaitInfo + idx: int + data: pointer # we incRef and unref it to keep it alive + owner: pointer # ptr Worker + + FlowVarObj[T] = object of FlowVarBaseObj + blob: T + + FlowVar*{.compilerProc.}[T] = ref FlowVarObj[T] ## a data flow variable + + ToFreeQueue = object + len: int + lock: TLock + empty: TCond + data: array[512, pointer] + + WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.} + Worker = object + taskArrived: CondVar + taskStarted: CondVar #\ + # task data: + f: WorkerProc + data: pointer + ready: bool # put it here for correct alignment! + initialized: bool # whether it has even been initialized + shutdown: bool # the pool requests to shut down this worker thread + q: ToFreeQueue + +proc await*(fv: FlowVarBase) = + ## waits until the value for the flowVar arrives. Usually it is not necessary + ## to call this explicitly. + if fv.usesCondVar: + fv.usesCondVar = false + await(fv.cv) + destroyCondVar(fv.cv) + +proc finished(fv: FlowVarBase) = + doAssert fv.ai.isNil, "flowVar is still attached to an 'awaitAny'" + # we have to protect against the rare cases where the owner of the flowVar + # simply disregards the flowVar and yet the "flowVarr" has not yet written + # anything to it: + await(fv) + if fv.data.isNil: return + let owner = cast[ptr Worker](fv.owner) + let q = addr(owner.q) + var waited = false + while true: + acquire(q.lock) + if q.len < q.data.len: + q.data[q.len] = fv.data + inc q.len + release(q.lock) + break + else: + # the queue is exhausted! We block until it has been cleaned: + release(q.lock) + wait(q.empty, q.lock) + waited = true + fv.data = nil + # wakeup other potentially waiting threads: + if waited: signal(q.empty) + +proc cleanFlowVars(w: ptr Worker) = + let q = addr(w.q) + acquire(q.lock) + for i in 0 .. <q.len: + GC_unref(cast[PObject](q.data[i])) + q.len = 0 + release(q.lock) + signal(q.empty) + +proc fvFinalizer[T](fv: FlowVar[T]) = finished(fv) + +proc nimCreateFlowVar[T](): FlowVar[T] {.compilerProc.} = + new(result, fvFinalizer) + +proc nimFlowVarCreateCondVar(fv: FlowVarBase) {.compilerProc.} = + fv.cv = createCondVar() + fv.usesCondVar = true + +proc nimFlowVarSignal(fv: FlowVarBase) {.compilerProc.} = + if fv.ai != nil: + acquire(fv.ai.cv.L) + fv.ai.idx = fv.idx + inc fv.ai.cv.counter + release(fv.ai.cv.L) + signal(fv.ai.cv.c) + if fv.usesCondVar: signal(fv.cv) + +proc awaitAndThen*[T](fv: FlowVar[T]; action: proc (x: T) {.closure.}) = + ## blocks until the ``fv`` is available and then passes its value + ## to ``action``. Note that due to Nimrod's parameter passing semantics this + ## means that ``T`` doesn't need to be copied and so ``awaitAndThen`` can + ## sometimes be more efficient than ``^``. + await(fv) + when T is string or T is seq: + action(cast[T](fv.data)) + elif T is ref: + {.error: "'awaitAndThen' not available for FlowVar[ref]".} + else: + action(fv.blob) + finished(fv) + +proc `^`*[T](fv: FlowVar[ref T]): foreign ptr T = + ## blocks until the value is available and then returns this value. + await(fv) + result = cast[foreign ptr T](fv.data) + +proc `^`*[T](fv: FlowVar[T]): T = + ## blocks until the value is available and then returns this value. + await(fv) + when T is string or T is seq: + result = cast[T](fv.data) + else: + result = fv.blob + +proc awaitAny*(flowVars: openArray[FlowVarBase]): int = + ## awaits any of the given flowVars. Returns the index of one flowVar for + ## which a value arrived. A flowVar only supports one call to 'awaitAny' at + ## the same time. That means if you await([a,b]) and await([b,c]) the second + ## call will only await 'c'. If there is no flowVar left to be able to wait + ## on, -1 is returned. + ## **Note**: This results in non-deterministic behaviour and so should be + ## avoided. + var ai: AwaitInfo + ai.cv = createCondVar() + var conflicts = 0 + for i in 0 .. flowVars.high: + if cas(addr flowVars[i].ai, nil, addr ai): + flowVars[i].idx = i + else: + inc conflicts + if conflicts < flowVars.len: + await(ai.cv) + result = ai.idx + for i in 0 .. flowVars.high: + discard cas(addr flowVars[i].ai, addr ai, nil) + else: + result = -1 + destroyCondVar(ai.cv) + +proc nimArgsPassingDone(p: pointer) {.compilerProc.} = + let w = cast[ptr Worker](p) + signal(w.taskStarted) + +const + MaxThreadPoolSize* = 256 ## maximal size of the thread pool. 256 threads + ## should be good enough for anybody ;-) + +var + currentPoolSize: int + maxPoolSize = MaxThreadPoolSize + minPoolSize = 4 + gSomeReady = createCondVar() + readyWorker: ptr Worker + +proc slave(w: ptr Worker) {.thread.} = + while true: + w.ready = true + readyWorker = w + signal(gSomeReady) + await(w.taskArrived) + assert(not w.ready) + w.f(w, w.data) + if w.q.len != 0: w.cleanFlowVars + if w.shutdown: + w.shutdown = false + atomicDec currentPoolSize + +proc setMinPoolSize*(size: range[1..MaxThreadPoolSize]) = + ## sets the minimal thread pool size. The default value of this is 4. + minPoolSize = size + +proc setMaxPoolSize*(size: range[1..MaxThreadPoolSize]) = + ## sets the minimal thread pool size. The default value of this + ## is ``MaxThreadPoolSize``. + maxPoolSize = size + +var + workers: array[MaxThreadPoolSize, TThread[ptr Worker]] + workersData: array[MaxThreadPoolSize, Worker] + +proc activateThread(i: int) {.noinline.} = + workersData[i].taskArrived = createCondVar() + workersData[i].taskStarted = createCondVar() + workersData[i].initialized = true + initCond(workersData[i].q.empty) + initLock(workersData[i].q.lock) + createThread(workers[i], slave, addr(workersData[i])) + +proc setup() = + currentPoolSize = min(countProcessors(), MaxThreadPoolSize) + readyWorker = addr(workersData[0]) + for i in 0.. <currentPoolSize: activateThread(i) + +proc preferSpawn*(): bool = + ## Use this proc to determine quickly if a 'spawn' or a direct call is + ## preferable. If it returns 'true' a 'spawn' may make sense. In general + ## it is not necessary to call this directly; use 'spawnX' instead. + result = gSomeReady.counter > 0 + +proc spawn*(call: expr): expr {.magic: "Spawn".} + ## always spawns a new task, so that the 'call' is never executed on + ## the calling thread. 'call' has to be proc call 'p(...)' where 'p' + ## is gcsafe and has a return type that is either 'void' or compatible + ## with ``FlowVar[T]``. + +template spawnX*(call: expr): expr = + ## spawns a new task if a CPU core is ready, otherwise executes the + ## call in the calling thread. Usually it is advised to + ## use 'spawn' in order to not block the producer for an unknown + ## amount of time. 'call' has to be proc call 'p(...)' where 'p' + ## is gcsafe and has a return type that is either 'void' or compatible + ## with ``FlowVar[T]``. + (if preferSpawn(): spawn call else: call) + +proc parallel*(body: stmt) {.magic: "Parallel".} + ## a parallel section can be used to execute a block in parallel. ``body`` + ## has to be in a DSL that is a particular subset of the language. Please + ## refer to the manual for further information. + +var + state: ThreadPoolState + stateLock: TLock + +initLock stateLock + +proc selectWorker(w: ptr Worker; fn: WorkerProc; data: pointer): bool = + if cas(addr w.ready, true, false): + w.data = data + w.f = fn + signal(w.taskArrived) + await(w.taskStarted) + result = true + +proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} = + # implementation of 'spawn' that is used by the code generator. + while true: + if selectWorker(readyWorker, fn, data): return + for i in 0.. <currentPoolSize: + if selectWorker(addr(workersData[i]), fn, data): return + # determine what to do, but keep in mind this is expensive too: + # state.calls < maxPoolSize: warmup phase + # (state.calls and 127) == 0: periodic check + if state.calls < maxPoolSize or (state.calls and 127) == 0: + # ensure the call to 'advice' is atomic: + if tryAcquire(stateLock): + case advice(state) + of doNothing: discard + of doCreateThread: + if currentPoolSize < maxPoolSize: + if not workersData[currentPoolSize].initialized: + activateThread(currentPoolSize) + let w = addr(workersData[currentPoolSize]) + atomicInc currentPoolSize + if selectWorker(w, fn, data): + release(stateLock) + return + # else we didn't succeed but some other thread, so do nothing. + of doShutdownThread: + if currentPoolSize > minPoolSize: + let w = addr(workersData[currentPoolSize-1]) + w.shutdown = true + # we don't free anything here. Too dangerous. + release(stateLock) + # else the acquire failed, but this means some + # other thread succeeded, so we don't need to do anything here. + await(gSomeReady) + +proc sync*() = + ## a simple barrier to wait for all spawn'ed tasks. If you need more elaborate + ## waiting, you have to use an explicit barrier. + while true: + var allReady = true + for i in 0 .. <currentPoolSize: + if not allReady: break + allReady = allReady and workersData[i].ready + if allReady: break + await(gSomeReady) + +setup() diff --git a/lib/pure/json.nim b/lib/pure/json.nim index bd5259f95..4e369b854 100644 --- a/lib/pure/json.nim +++ b/lib/pure/json.nim @@ -679,8 +679,8 @@ proc `[]`*(node: PJsonNode, name: string): PJsonNode = proc `[]`*(node: PJsonNode, index: int): PJsonNode = ## Gets the node at `index` in an Array. Result is undefined if `index` ## is out of bounds + assert(not isNil(node)) assert(node.kind == JArray) - assert(node != nil) return node.elems[index] proc hasKey*(node: PJsonNode, key: string): bool = diff --git a/lib/pure/net.nim b/lib/pure/net.nim index 2f1a6fa46..e34c88327 100644 --- a/lib/pure/net.nim +++ b/lib/pure/net.nim @@ -11,7 +11,7 @@ {.deadCodeElim: on.} import rawsockets, os, strutils, unsigned, parseutils, times -export TPort +export TPort, `$` const useWinVersion = defined(Windows) or defined(nimdoc) diff --git a/lib/pure/osproc.nim b/lib/pure/osproc.nim index cb9792f2b..04a0c2403 100644 --- a/lib/pure/osproc.nim +++ b/lib/pure/osproc.nim @@ -1,7 +1,7 @@ # # # Nimrod's Runtime Library -# (c) Copyright 2013 Andreas Rumpf +# (c) Copyright 2014 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -13,7 +13,7 @@ include "system/inclrtl" import - strutils, os, strtabs, streams + strutils, os, strtabs, streams, cpuinfo when defined(windows): import winlean @@ -225,42 +225,10 @@ proc errorHandle*(p: PProcess): TFileHandle {.rtl, extern: "nosp$1", ## it is closed when closing the PProcess ``p``. result = p.errHandle -when defined(macosx) or defined(bsd): - const - CTL_HW = 6 - HW_AVAILCPU = 25 - HW_NCPU = 3 - proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer, - a: var csize, b: pointer, c: int): cint {. - importc: "sysctl", header: "<sys/sysctl.h>".} - proc countProcessors*(): int {.rtl, extern: "nosp$1".} = ## returns the numer of the processors/cores the machine has. ## Returns 0 if it cannot be detected. - when defined(windows): - var x = getEnv("NUMBER_OF_PROCESSORS") - if x.len > 0: result = parseInt(x.string) - elif defined(macosx) or defined(bsd): - var - mib: array[0..3, cint] - numCPU: int - len: csize - mib[0] = CTL_HW - mib[1] = HW_AVAILCPU - len = sizeof(numCPU) - discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) - if numCPU < 1: - mib[1] = HW_NCPU - discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) - result = numCPU - elif defined(hpux): - result = mpctl(MPC_GETNUMSPUS, nil, nil) - elif defined(irix): - var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint - result = sysconf(SC_NPROC_ONLN) - else: - result = sysconf(SC_NPROCESSORS_ONLN) - if result <= 0: result = 1 + result = cpuinfo.countProcessors() proc execProcesses*(cmds: openArray[string], options = {poStdErrToStdOut, poParentStreams}, diff --git a/lib/pure/rawsockets.nim b/lib/pure/rawsockets.nim index 07b647b68..94189fd89 100644 --- a/lib/pure/rawsockets.nim +++ b/lib/pure/rawsockets.nim @@ -39,7 +39,6 @@ export MSG_PEEK type - TPort* = distinct uint16 ## port type TDomain* = enum ## domain, which specifies the protocol family of the diff --git a/lib/system.nim b/lib/system.nim index 2f24f68b1..f45707849 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -42,7 +42,6 @@ type cstring* {.magic: Cstring.} ## built-in cstring (*compatible string*) type pointer* {.magic: Pointer.} ## built-in pointer type, use the ``addr`` ## operator to get a pointer to a variable - const on* = true ## alias for ``true`` off* = false ## alias for ``false`` @@ -51,6 +50,9 @@ const type Ordinal* {.magic: Ordinal.}[T] + `ptr`* {.magic: Pointer.}[T] ## built-in generic untraced pointer type + `ref`* {.magic: Pointer.}[T] ## built-in generic traced pointer type + `nil` {.magic: "Nil".} expr* {.magic: Expr.} ## meta type to denote an expression (for templates) stmt* {.magic: Stmt.} ## meta type to denote a statement (for templates) @@ -2983,6 +2985,10 @@ proc locals*(): TObject {.magic: "Locals", noSideEffect.} = ## # -> B is 1 discard +proc deepCopy*[T](x: T): T {.magic: "DeepCopy", noSideEffect.} = discard + ## performs a deep copy of `x`. This is also used by the code generator + ## for the implementation of ``spawn``. + when not defined(booting): type semistatic*[T] = static[T] | T @@ -2991,6 +2997,3 @@ when not defined(booting): template isStatic*(x): expr = compiles(static(x)) # checks whether `x` is a value known at compile-time - -when hasThreadSupport: - when hostOS != "standalone": include "system/sysspawn" diff --git a/lib/system/assign.nim b/lib/system/assign.nim index 75c749633..2ae945fb1 100644 --- a/lib/system/assign.nim +++ b/lib/system/assign.nim @@ -179,7 +179,8 @@ when not defined(nimmixin): # internal proc used for destroying sequences and arrays for i in countup(0, r.len - 1): destroy(r[i]) else: - # XXX Why is this exported and no compilerproc? + # XXX Why is this exported and no compilerproc? -> compilerprocs cannot be + # generic for now proc nimDestroyRange*[T](r: T) = # internal proc used for destroying sequences and arrays mixin destroy diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim index b1a96b209..43b3f0438 100644 --- a/lib/system/atomics.nim +++ b/lib/system/atomics.nim @@ -1,15 +1,18 @@ # # # Nimrod's Runtime Library -# (c) Copyright 2012 Andreas Rumpf +# (c) Copyright 2014 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## Atomic operations for Nimrod. +{.push stackTrace:off.} -when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport: +const someGcc = defined(gcc) or defined(llvm_gcc) or defined(clang) + +when someGcc and hasThreadSupport: type AtomMemModel* = enum ATOMIC_RELAXED, ## No barriers or synchronization. @@ -152,41 +155,16 @@ when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport: ## A value of 0 indicates typical alignment should be used. The compiler may also ## ignore this parameter. + template fence*() = atomicThreadFence(ATOMIC_SEQ_CST) elif defined(vcc) and hasThreadSupport: proc addAndFetch*(p: ptr int, val: int): int {. importc: "NimXadd", nodecl.} + else: proc addAndFetch*(p: ptr int, val: int): int {.inline.} = inc(p[], val) result = p[] -# atomic compare and swap (CAS) funcitons to implement lock-free algorithms - -#if defined(windows) and not defined(gcc) and hasThreadSupport: -# proc InterlockedCompareExchangePointer(mem: ptr pointer, -# newValue: pointer, comparand: pointer) : pointer {.nodecl, -# importc: "InterlockedCompareExchangePointer", header:"windows.h".} - -# proc compareAndSwap*[T](mem: ptr T, -# expected: T, newValue: T): bool {.inline.}= -# ## Returns true if successfully set value at mem to newValue when value -# ## at mem == expected -# return InterlockedCompareExchangePointer(addr(mem), -# addr(newValue), addr(expected))[] == expected - -#elif not hasThreadSupport: -# proc compareAndSwap*[T](mem: ptr T, -# expected: T, newValue: T): bool {.inline.} = -# ## Returns true if successfully set value at mem to newValue when value -# ## at mem == expected -# var oldval = mem[] -# if oldval == expected: -# mem[] = newValue -# return true -# return false - - -# Some convenient functions proc atomicInc*(memLoc: var int, x: int = 1): int = when defined(gcc) and hasThreadSupport: result = atomic_add_fetch(memLoc.addr, x, ATOMIC_RELAXED) @@ -203,3 +181,37 @@ proc atomicDec*(memLoc: var int, x: int = 1): int = else: dec(memLoc, x) result = memLoc + +when defined(windows) and not someGcc: + proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32 + {.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.} + + proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool = + interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0 + # XXX fix for 64 bit build +else: + # this is valid for GCC and Intel C++ + proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool + {.importc: "__sync_bool_compare_and_swap", nodecl.} + # XXX is this valid for 'int'? + + +when (defined(x86) or defined(amd64)) and (defined(gcc) or defined(llvm_gcc)): + proc cpuRelax {.inline.} = + {.emit: """asm volatile("pause" ::: "memory");""".} +elif (defined(x86) or defined(amd64)) and defined(vcc): + proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".} +elif defined(intelc): + proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".} +elif false: + from os import sleep + + proc cpuRelax {.inline.} = os.sleep(1) + +when not defined(fence) and hasThreadSupport: + # XXX fixme + proc fence*() {.inline.} = + var dummy: bool + discard cas(addr dummy, false, true) + +{.pop.} diff --git a/lib/system/sysspawn.nim b/lib/system/sysspawn.nim index dabf35a3e..95cdba65d 100644 --- a/lib/system/sysspawn.nim +++ b/lib/system/sysspawn.nim @@ -14,30 +14,6 @@ when not defined(NimString): {.push stackTrace:off.} -when (defined(x86) or defined(amd64)) and defined(gcc): - proc cpuRelax {.inline.} = - {.emit: """asm volatile("pause" ::: "memory");""".} -elif (defined(x86) or defined(amd64)) and defined(vcc): - proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".} -elif defined(intelc): - proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".} -elif false: - from os import sleep - - proc cpuRelax {.inline.} = os.sleep(1) - -when defined(windows) and not defined(gcc): - proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32 - {.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.} - - proc cas(p: ptr bool; oldValue, newValue: bool): bool = - interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0 - -else: - # this is valid for GCC and Intel C++ - proc cas(p: ptr bool; oldValue, newValue: bool): bool - {.importc: "__sync_bool_compare_and_swap", nodecl.} - # We declare our own condition variables here to get rid of the dummy lock # on Windows: @@ -54,6 +30,9 @@ proc createCondVar(): CondVar = initSysLock(result.stupidLock) #acquireSys(result.stupidLock) +proc destroyCondVar(c: var CondVar) {.inline.} = + deinitSysCond(c.c) + proc await(cv: var CondVar) = when defined(posix): acquireSys(cv.stupidLock) @@ -100,6 +79,26 @@ proc signal(cv: var FastCondVar) = #if cas(addr cv.slowPath, true, false): signal(cv.slow) +type + Barrier* {.compilerProc.} = object + counter: int + cv: CondVar + +proc barrierEnter*(b: ptr Barrier) {.compilerProc.} = + atomicInc b.counter + +proc barrierLeave*(b: ptr Barrier) {.compilerProc.} = + atomicDec b.counter + if b.counter <= 0: signal(b.cv) + +proc openBarrier*(b: ptr Barrier) {.compilerProc.} = + b.counter = 0 + b.cv = createCondVar() + +proc closeBarrier*(b: ptr Barrier) {.compilerProc.} = + await(b.cv) + destroyCondVar(b.cv) + {.pop.} # ---------------------------------------------------------------------------- diff --git a/tests/parallel/nimrod.cfg b/tests/parallel/nimrod.cfg new file mode 100644 index 000000000..b81c89721 --- /dev/null +++ b/tests/parallel/nimrod.cfg @@ -0,0 +1 @@ +threads:on diff --git a/tests/parallel/tdisjoint_slice1.nim b/tests/parallel/tdisjoint_slice1.nim new file mode 100644 index 000000000..c1d0e52f8 --- /dev/null +++ b/tests/parallel/tdisjoint_slice1.nim @@ -0,0 +1,21 @@ +discard """ + outputsub: "EVEN 28" +""" + +import threadpool + +proc odd(a: int) = echo "ODD ", a +proc even(a: int) = echo "EVEN ", a + +proc main() = + var a: array[0..30, int] + for i in low(a)..high(a): a[i] = i + parallel: + var i = 0 + while i <= 29: + spawn even(a[i]) + spawn odd(a[i+1]) + inc i, 2 + # is correct here + +main() diff --git a/tests/parallel/tdisjoint_slice2.nim b/tests/parallel/tdisjoint_slice2.nim new file mode 100644 index 000000000..1e86ea644 --- /dev/null +++ b/tests/parallel/tdisjoint_slice2.nim @@ -0,0 +1,33 @@ +discard """ + output: '''0 +1 +2 +3 +4 +5 +6 +7 +8''' + sortoutput: true +""" + +import threadpool + +proc f(a: openArray[int]) = + for x in a: echo x + +proc f(a: int) = echo a + +proc main() = + var a: array[0..9, int] = [0,1,2,3,4,5,6,7,8,9] + parallel: + spawn f(a[0..2]) + #spawn f(a[16..30]) + var i = 3 + while i <= 8: + spawn f(a[i]) + spawn f(a[i+1]) + inc i, 2 + # is correct here + +main() diff --git a/tests/parallel/tflowvar.nim b/tests/parallel/tflowvar.nim new file mode 100644 index 000000000..77fab14b5 --- /dev/null +++ b/tests/parallel/tflowvar.nim @@ -0,0 +1,17 @@ +discard """ + output: '''foobarfoobarbazbearbazbear''' + cmd: "nimrod $target --threads:on $options $file" +""" + +import threadpool + +proc computeSomething(a, b: string): string = a & b & a & b + +proc main = + let fvA = spawn computeSomething("foo", "bar") + let fvB = spawn computeSomething("baz", "bear") + + echo(^fvA, ^fvB) + +main() +sync() diff --git a/tests/parallel/tforstmt.nim b/tests/parallel/tforstmt.nim new file mode 100644 index 000000000..58de833f3 --- /dev/null +++ b/tests/parallel/tforstmt.nim @@ -0,0 +1,25 @@ +discard """ + output: '''3 +4 +5 +6 +7''' + sortoutput: true +""" + +import threadpool, os + +proc p(x: int) = + os.sleep(100 - x*10) + echo x + +proc testFor(a, b: int; foo: var openArray[int]) = + parallel: + for i in max(a, 0) .. min(b, foo.high): + spawn p(foo[i]) + +var arr = [0, 1, 2, 3, 4, 5, 6, 7] + +testFor(3, 10, arr) + + diff --git a/tests/parallel/tinvalid_array_bounds.nim b/tests/parallel/tinvalid_array_bounds.nim new file mode 100644 index 000000000..4c6065fd6 --- /dev/null +++ b/tests/parallel/tinvalid_array_bounds.nim @@ -0,0 +1,25 @@ +discard """ + errormsg: "can prove: i + 1 > 30" + line: 21 +""" + +import threadpool + +proc f(a: openArray[int]) = + for x in a: echo x + +proc f(a: int) = echo a + +proc main() = + var a: array[0..30, int] + parallel: + spawn f(a[0..15]) + spawn f(a[16..30]) + var i = 0 + while i <= 30: + spawn f(a[i]) + spawn f(a[i+1]) + inc i + #inc i # inc i, 2 would be correct here + +main() diff --git a/tests/parallel/tinvalid_counter_usage.nim b/tests/parallel/tinvalid_counter_usage.nim new file mode 100644 index 000000000..c6303c651 --- /dev/null +++ b/tests/parallel/tinvalid_counter_usage.nim @@ -0,0 +1,26 @@ +discard """ + errormsg: "invalid usage of counter after increment" + line: 21 +""" + +import threadpool + +proc f(a: openArray[int]) = + for x in a: echo x + +proc f(a: int) = echo a + +proc main() = + var a: array[0..30, int] + parallel: + spawn f(a[0..15]) + spawn f(a[16..30]) + var i = 0 + while i <= 30: + inc i + spawn f(a[i]) + inc i + #spawn f(a[i+1]) + #inc i # inc i, 2 would be correct here + +main() diff --git a/tests/parallel/tnon_disjoint_slice1.nim b/tests/parallel/tnon_disjoint_slice1.nim new file mode 100644 index 000000000..72d008bbd --- /dev/null +++ b/tests/parallel/tnon_disjoint_slice1.nim @@ -0,0 +1,25 @@ +discard """ + errormsg: "cannot prove (i)..(i) disjoint from (i + 1)..(i + 1)" + line: 20 +""" + +import threadpool + +proc f(a: openArray[int]) = + for x in a: echo x + +proc f(a: int) = echo a + +proc main() = + var a: array[0..30, int] + parallel: + #spawn f(a[0..15]) + #spawn f(a[16..30]) + var i = 0 + while i <= 29: + spawn f(a[i]) + spawn f(a[i+1]) + inc i + #inc i # inc i, 2 would be correct here + +main() diff --git a/tests/parallel/tpi.nim b/tests/parallel/tpi.nim new file mode 100644 index 000000000..dcb9b8fc5 --- /dev/null +++ b/tests/parallel/tpi.nim @@ -0,0 +1,26 @@ +discard """ + output: '''3.141792613595791 +3.141792613595791''' +""" + +import strutils, math, threadpool + +proc term(k: float): float = 4 * math.pow(-1, k) / (2*k + 1) + +proc piU(n: int): float = + var ch = newSeq[FlowVar[float]](n+1) + for k in 0..n: + ch[k] = spawn term(float(k)) + for k in 0..n: + result += ^ch[k] + +proc piS(n: int): float = + var ch = newSeq[float](n+1) + parallel: + for k in 0..ch.high: + ch[k] = spawn term(float(k)) + for k in 0..ch.high: + result += ch[k] + +echo formatFloat(piU(5000)) +echo formatFloat(piS(5000)) diff --git a/tests/system/tsysspawn.nim b/tests/parallel/tsysspawn.nim index 0388918aa..fc7921b0e 100644 --- a/tests/system/tsysspawn.nim +++ b/tests/parallel/tsysspawn.nim @@ -4,20 +4,22 @@ discard """ cmd: "nimrod $target --threads:on $options $file" """ +import threadpool + var x, y = 0 proc p1 = - for i in 0 .. 1_000_000: + for i in 0 .. 10_000: discard - inc x + atomicInc x proc p2 = - for i in 0 .. 1_000_000: + for i in 0 .. 10_000: discard - inc y, 2 + atomicInc y, 2 for i in 0.. 3: spawn(p1()) diff --git a/tests/parallel/tsysspawnbadarg.nim b/tests/parallel/tsysspawnbadarg.nim new file mode 100644 index 000000000..ad798a7d3 --- /dev/null +++ b/tests/parallel/tsysspawnbadarg.nim @@ -0,0 +1,9 @@ +discard """ + line: 9 + errormsg: "'spawn' takes a call expression" + cmd: "nimrod $target --threads:on $options $file" +""" + +import threadpool + +let foo = spawn(1) diff --git a/tests/system/tsysspawnbadarg.nim b/tests/system/tsysspawnbadarg.nim deleted file mode 100644 index ace074602..000000000 --- a/tests/system/tsysspawnbadarg.nim +++ /dev/null @@ -1,7 +0,0 @@ -discard """ - line: 7 - errormsg: "'spawn' takes a call expression of type void" - cmd: "nimrod $target --threads:on $options $file" -""" - -spawn(1) diff --git a/tests/testament/caasdriver.nim b/tests/testament/caasdriver.nim index 8804f3ed7..ddfe88273 100644 --- a/tests/testament/caasdriver.nim +++ b/tests/testament/caasdriver.nim @@ -106,7 +106,7 @@ proc close(session: var TNimrodSession) {.destructor.} = if session.mode == CaasRun: session.nim.close -proc doScenario(script: string, output: PStream, mode: TRunMode): bool = +proc doScenario(script: string, output: PStream, mode: TRunMode, verbose: bool): bool = result = true var f = open(script) @@ -138,7 +138,7 @@ proc doScenario(script: string, output: PStream, mode: TRunMode): bool = continue elif line.startsWith(">"): s.doCommand(line.substr(1).strip) - output.writeln line, "\n", s.lastOutput + output.writeln line, "\n", if verbose: s.lastOutput else: "" else: var expectMatch = true var pattern = s.replaceVars(line) @@ -155,13 +155,14 @@ proc doScenario(script: string, output: PStream, mode: TRunMode): bool = output.writeln "FAILURE ", line result = false -iterator caasTestsRunner*(filter = ""): tuple[test, output: string, - status: bool, mode: TRunMode] = +iterator caasTestsRunner*(filter = "", verbose = false): tuple[test, + output: string, status: bool, + mode: TRunMode] = for scenario in os.walkFiles(TesterDir / "caas/*.txt"): if filter.len > 0 and find(scenario, filter) == -1: continue for mode in modes: var outStream = newStringStream() - let r = doScenario(scenario, outStream, mode) + let r = doScenario(scenario, outStream, mode, verbose) yield (scenario, outStream.data, r, mode) when isMainModule: @@ -179,9 +180,12 @@ when isMainModule: if verbose and len(filter) > 0: echo "Running only test cases matching filter '$1'" % [filter] - for test, output, result, mode in caasTestsRunner(filter): + for test, output, result, mode in caasTestsRunner(filter, verbose): if not result or verbose: - echo test, "\n", output, "-> ", $mode, ":", $result, "\n-----" + echo "Mode ", $mode, " (", if result: "succeeded)" else: "failed)" + echo test + echo output + echo "---------\n" if not result: failures += 1 diff --git a/tests/testament/specs.nim b/tests/testament/specs.nim index 225ea1891..6e72f4b5e 100644 --- a/tests/testament/specs.nim +++ b/tests/testament/specs.nim @@ -46,7 +46,7 @@ type msg*: string ccodeCheck*: string err*: TResultEnum - substr*: bool + substr*, sortoutput*: bool targets*: set[TTarget] const @@ -113,6 +113,8 @@ proc parseSpec*(filename: string): TSpec = result.action = actionRun result.outp = e.value result.substr = true + of "sortoutput": + result.sortoutput = parseCfgBool(e.value) of "exitcode": discard parseInt(e.value, result.exitCode) of "msg": diff --git a/tests/testament/tester.nim b/tests/testament/tester.nim index 50d0e6eac..fc6b4ff95 100644 --- a/tests/testament/tester.nim +++ b/tests/testament/tester.nim @@ -11,7 +11,8 @@ import parseutils, strutils, pegs, os, osproc, streams, parsecfg, json, - marshal, backend, parseopt, specs, htmlgen, browsers, terminal + marshal, backend, parseopt, specs, htmlgen, browsers, terminal, + algorithm const resultsFile = "testresults.html" @@ -150,6 +151,11 @@ proc codegenCheck(test: TTest, check: string, given: var TSpec) = except EIO: given.err = reCodeNotFound +proc makeDeterministic(s: string): string = + var x = splitLines(s) + sort(x, system.cmp) + result = join(x, "\n") + proc testSpec(r: var TResults, test: TTest) = # major entry point for a single test let tname = test.name.addFileExt(".nim") @@ -191,8 +197,10 @@ proc testSpec(r: var TResults, test: TTest) = r.addResult(test, "exitcode: " & $expected.exitCode, "exitcode: " & $exitCode, reExitCodesDiffer) else: - if strip(buf.string) != strip(expected.outp): - if not (expected.substr and expected.outp in buf.string): + var bufB = strip(buf.string) + if expected.sortoutput: bufB = makeDeterministic(bufB) + if bufB != strip(expected.outp): + if not (expected.substr and expected.outp in bufB): given.err = reOutputsDiffer if given.err == reSuccess: codeGenCheck(test, expected.ccodeCheck, given) diff --git a/todo.txt b/todo.txt index 539089281..df18ff7e9 100644 --- a/todo.txt +++ b/todo.txt @@ -1,6 +1,23 @@ version 0.9.6 ============= +Concurrency +----------- + +- the disjoint checker needs to deal with 'a = spawn f(); g = spawn f()' +- implement 'deepCopy' builtin +- implement 'foo[1..4] = spawn(f[4..7])' +- support for exception propagation +- Minor: The copying of the 'ref Promise' into the thead local storage only + happens to work due to the write barrier's implementation +- 'gcsafe' inferrence needs to be fixed +- implement lock levels --> first without the more complex race avoidance +- document the new 'spawn' and 'parallel' statements + + +Misc +---- + - fix the bug that keeps 'defer' template from working - make '--implicitStatic:on' the default - fix the tuple unpacking in lambda bug diff --git a/web/news.txt b/web/news.txt index 0bbae7b7b..b7403a3c7 100644 --- a/web/news.txt +++ b/web/news.txt @@ -2,6 +2,23 @@ News ==== +.. + 2014-06-29 Version 0.9.6 released + ================================= + + Changes affecting backwards compatibility + ----------------------------------------- + + - ``spawn`` now uses an elaborate self-adapting thread pool and as such + has been moved into its own module. So to use it, you now have to import + ``threadpool``. + + + Library Additions + ----------------- + + - Added module ``cpuinfo``. + - Added module ``threadpool``. 2014-04-21 Version 0.9.4 released |