diff options
38 files changed, 1923 insertions, 159 deletions
diff --git a/compiler/ast.nim b/compiler/ast.nim index 80b9e9bb2..c3cb63df4 100644 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -605,9 +605,9 @@ const # thus cannot be overloaded (also documented in the spec!): SpecialSemMagics* = { mDefined, mDefinedInScope, mCompiles, mLow, mHigh, mSizeOf, mIs, mOf, - mEcho, mShallowCopy, mExpandToAst} + mEcho, mShallowCopy, mExpandToAst, mParallel, mSpawn} -type +type PNode* = ref TNode TNodeSeq* = seq[PNode] PType* = ref TType @@ -885,6 +885,8 @@ const nkCallKinds* = {nkCall, nkInfix, nkPrefix, nkPostfix, nkCommand, nkCallStrLit, nkHiddenCallConv} + nkIdentKinds* = {nkIdent, nkSym, nkAccQuoted, nkOpenSymChoice, + nkClosedSymChoice} nkLiterals* = {nkCharLit..nkTripleStrLit} nkLambdaKinds* = {nkLambda, nkDo} diff --git a/compiler/ccgcalls.nim b/compiler/ccgcalls.nim index a7840305d..71e23aa1d 100644 --- a/compiler/ccgcalls.nim +++ b/compiler/ccgcalls.nim @@ -86,7 +86,7 @@ proc openArrayLoc(p: BProc, n: PNode): PRope = initLocExpr(p, q[2], b) initLocExpr(p, q[3], c) let fmt = - case skipTypes(a.t, abstractVar).kind + case skipTypes(a.t, abstractVar+{tyPtr}).kind of tyOpenArray, tyVarargs, tyArray, tyArrayConstr: "($1)+($2), ($3)-($2)+1" of tyString, tySequence: diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim index 39333a80d..fb672f121 100644 --- a/compiler/ccgexprs.nim +++ b/compiler/ccgexprs.nim @@ -1636,7 +1636,10 @@ proc genMagicExpr(p: BProc, e: PNode, d: var TLoc, op: TMagic) = of mSlurp..mQuoteAst: localError(e.info, errXMustBeCompileTime, e.sons[0].sym.name.s) of mSpawn: - let n = lowerings.wrapProcForSpawn(p.module.module, e.sons[1]) + let n = lowerings.wrapProcForSpawn(p.module.module, e[1], e.typ, nil, nil) + expr(p, n, d) + of mParallel: + let n = semparallel.liftParallel(p.module.module, e) expr(p, n, d) else: internalError(e.info, "genMagicExpr: " & $op) diff --git a/compiler/cgen.nim b/compiler/cgen.nim index 198b1187d..314af7784 100644 --- a/compiler/cgen.nim +++ b/compiler/cgen.nim @@ -14,7 +14,8 @@ import options, intsets, nversion, nimsets, msgs, crc, bitsets, idents, lists, types, ccgutils, os, times, ropes, math, passes, rodread, wordrecg, treetab, cgmeth, - rodutils, renderer, idgen, cgendata, ccgmerge, semfold, aliases, lowerings + rodutils, renderer, idgen, cgendata, ccgmerge, semfold, aliases, lowerings, + semparallel when options.hasTinyCBackend: import tccgen diff --git a/compiler/guards.nim b/compiler/guards.nim index f475f5068..813a30014 100644 --- a/compiler/guards.nim +++ b/compiler/guards.nim @@ -1,7 +1,7 @@ # # # The Nimrod Compiler -# (c) Copyright 2013 Andreas Rumpf +# (c) Copyright 2014 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -9,7 +9,8 @@ ## This module implements the 'implies' relation for guards. -import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents +import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents, + saturate const someEq = {mEqI, mEqI64, mEqF64, mEqEnum, mEqCh, mEqB, mEqRef, mEqProc, @@ -25,6 +26,17 @@ const someIn = {mInRange, mInSet} + someHigh = {mHigh} + # we don't list unsigned here because wrap around semantics suck for + # proving anything: + someAdd = {mAddI, mAddI64, mAddF64, mSucc} + someSub = {mSubI, mSubI64, mSubF64, mPred} + someMul = {mMulI, mMulI64, mMulF64} + someDiv = {mDivI, mDivI64, mDivF64} + someMod = {mModI, mModI64} + someMax = {mMaxI, mMaxI64, mMaxF64} + someMin = {mMinI, mMinI64, mMinF64} + proc isValue(n: PNode): bool = n.kind in {nkCharLit..nkNilLit} proc isLocation(n: PNode): bool = not n.isValue @@ -69,19 +81,24 @@ proc isLetLocation(m: PNode, isApprox: bool): bool = proc interestingCaseExpr*(m: PNode): bool = isLetLocation(m, true) -proc getMagicOp(name: string, m: TMagic): PSym = +proc createMagic*(name: string, m: TMagic): PSym = result = newSym(skProc, getIdent(name), nil, unknownLineInfo()) result.magic = m let - opLe = getMagicOp("<=", mLeI) - opLt = getMagicOp("<", mLtI) - opAnd = getMagicOp("and", mAnd) - opOr = getMagicOp("or", mOr) - opNot = getMagicOp("not", mNot) - opIsNil = getMagicOp("isnil", mIsNil) - opContains = getMagicOp("contains", mInSet) - opEq = getMagicOp("==", mEqI) + opLe = createMagic("<=", mLeI) + opLt = createMagic("<", mLtI) + opAnd = createMagic("and", mAnd) + opOr = createMagic("or", mOr) + opNot = createMagic("not", mNot) + opIsNil = createMagic("isnil", mIsNil) + opContains = createMagic("contains", mInSet) + opEq = createMagic("==", mEqI) + opAdd = createMagic("+", mAddI) + opSub = createMagic("-", mSubI) + opMul = createMagic("*", mMulI) + opDiv = createMagic("div", mDivI) + opLen = createMagic("len", mLengthSeq) proc swapArgs(fact: PNode, newOp: PSym): PNode = result = newNodeI(nkCall, fact.info, 3) @@ -137,17 +154,141 @@ proc neg(n: PNode): PNode = result.sons[0] = newSymNode(opNot) result.sons[1] = n -proc buildIsNil(arg: PNode): PNode = - result = newNodeI(nkCall, arg.info, 2) - result.sons[0] = newSymNode(opIsNil) - result.sons[1] = arg +proc buildCall(op: PSym; a: PNode): PNode = + result = newNodeI(nkCall, a.info, 2) + result.sons[0] = newSymNode(op) + result.sons[1] = a + +proc buildCall(op: PSym; a, b: PNode): PNode = + result = newNodeI(nkInfix, a.info, 3) + result.sons[0] = newSymNode(op) + result.sons[1] = a + result.sons[2] = b + +proc `|+|`(a, b: PNode): PNode = + result = copyNode(a) + if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |+| b.intVal + else: result.floatVal = a.floatVal + b.floatVal + +proc `|*|`(a, b: PNode): PNode = + result = copyNode(a) + if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |*| b.intVal + else: result.floatVal = a.floatVal * b.floatVal + +proc negate(a, b, res: PNode): PNode = + if b.kind in {nkCharLit..nkUInt64Lit} and b.intVal != low(BiggestInt): + var b = copyNode(b) + b.intVal = -b.intVal + if a.kind in {nkCharLit..nkUInt64Lit}: + b.intVal = b.intVal |+| a.intVal + result = b + else: + result = buildCall(opAdd, a, b) + elif b.kind in {nkFloatLit..nkFloat64Lit}: + var b = copyNode(b) + b.floatVal = -b.floatVal + result = buildCall(opAdd, a, b) + else: + result = res + +proc zero(): PNode = nkIntLit.newIntNode(0) +proc one(): PNode = nkIntLit.newIntNode(1) +proc minusOne(): PNode = nkIntLit.newIntNode(-1) + +proc lowBound*(x: PNode): PNode = + result = nkIntLit.newIntNode(firstOrd(x.typ)) + result.info = x.info + +proc highBound*(x: PNode): PNode = + result = if x.typ.skipTypes(abstractInst).kind == tyArray: + nkIntLit.newIntNode(lastOrd(x.typ)) + else: + opAdd.buildCall(opLen.buildCall(x), minusOne()) + result.info = x.info + +proc reassociation(n: PNode): PNode = + result = n + # (foo+5)+5 --> foo+10; same for '*' + case result.getMagic + of someAdd: + if result[2].isValue and + result[1].getMagic in someAdd and result[1][2].isValue: + result = opAdd.buildCall(result[1][1], result[1][2] |+| result[2]) + of someMul: + if result[2].isValue and + result[1].getMagic in someMul and result[1][2].isValue: + result = opAdd.buildCall(result[1][1], result[1][2] |*| result[2]) + else: discard + +proc canon*(n: PNode): PNode = + # XXX for now only the new code in 'semparallel' uses this + if n.safeLen >= 1: + result = shallowCopy(n) + for i in 0 .. < n.len: + result.sons[i] = canon(n.sons[i]) + else: + result = n + case result.getMagic + of someEq, someAdd, someMul, someMin, someMax: + # these are symmetric; put value as last: + if result.sons[1].isValue and not result.sons[2].isValue: + result = swapArgs(result, result.sons[0].sym) + # (4 + foo) + 2 --> (foo + 4) + 2 + of someHigh: + # high == len+(-1) + result = opAdd.buildCall(opLen.buildCall(result[1]), minusOne()) + of mUnaryMinusI, mUnaryMinusI64: + result = buildCall(opAdd, result[1], newIntNode(nkIntLit, -1)) + of someSub: + # x - 4 --> x + (-4) + result = negate(result[1], result[2], result) + of someLen: + result.sons[0] = opLen.newSymNode + else: discard + + result = skipConv(result) + result = reassociation(result) + # most important rule: (x-4) < a.len --> x < a.len+4 + case result.getMagic + of someLe, someLt: + let x = result[1] + let y = result[2] + if x.kind in nkCallKinds and x.len == 3 and x[2].isValue and + isLetLocation(x[1], true): + case x.getMagic + of someSub: + result = buildCall(result[0].sym, x[1], + reassociation(opAdd.buildCall(y, x[2]))) + of someAdd: + # Rule A: + let plus = negate(y, x[2], nil).reassociation + if plus != nil: result = buildCall(result[0].sym, x[1], plus) + else: discard + elif y.kind in nkCallKinds and y.len == 3 and y[2].isValue and + isLetLocation(y[1], true): + # a.len < x-3 + case y.getMagic + of someSub: + result = buildCall(result[0].sym, y[1], + reassociation(opAdd.buildCall(x, y[2]))) + of someAdd: + let plus = negate(x, y[2], nil).reassociation + # ensure that Rule A will not trigger afterwards with the + # additional 'not isLetLocation' constraint: + if plus != nil and not isLetLocation(x, true): + result = buildCall(result[0].sym, plus, y[1]) + else: discard + else: discard + +proc `+@`*(a: PNode; b: BiggestInt): PNode = + canon(if b != 0: opAdd.buildCall(a, nkIntLit.newIntNode(b)) else: a) proc usefulFact(n: PNode): PNode = case n.getMagic of someEq: if skipConv(n.sons[2]).kind == nkNilLit and ( isLetLocation(n.sons[1], false) or isVar(n.sons[1])): - result = buildIsNil(n.sons[1]) + result = opIsNil.buildCall(n.sons[1]) else: if isLetLocation(n.sons[1], true) or isLetLocation(n.sons[2], true): # XXX algebraic simplifications! 'i-1 < a.len' --> 'i < a.len+1' @@ -217,7 +358,7 @@ proc addFactNeg*(m: var TModel, n: PNode) = let n = n.neg if n != nil: addFact(m, n) -proc sameTree(a, b: PNode): bool = +proc sameTree*(a, b: PNode): bool = result = false if a == b: result = true @@ -519,7 +660,144 @@ proc doesImply*(facts: TModel, prop: PNode): TImplication = if result != impUnknown: return proc impliesNotNil*(facts: TModel, arg: PNode): TImplication = - result = doesImply(facts, buildIsNil(arg).neg) + result = doesImply(facts, opIsNil.buildCall(arg).neg) + +proc simpleSlice*(a, b: PNode): BiggestInt = + # returns 'c' if a..b matches (i+c)..(i+c), -1 otherwise. (i)..(i) is matched + # as if it is (i+0)..(i+0). + if guards.sameTree(a, b): + if a.getMagic in someAdd and a[2].kind in {nkCharLit..nkUInt64Lit}: + result = a[2].intVal + else: + result = 0 + else: + result = -1 + +proc pleViaModel(model: TModel; aa, bb: PNode): TImplication + +proc ple(m: TModel; a, b: PNode): TImplication = + template `<=?`(a,b): expr = ple(m,a,b) == impYes + # 0 <= 3 + if a.isValue and b.isValue: + return if leValue(a, b): impYes else: impNo + + # use type information too: x <= 4 iff high(x) <= 4 + if b.isValue and a.typ != nil and a.typ.isOrdinalType: + if lastOrd(a.typ) <= b.intVal: return impYes + # 3 <= x iff low(x) <= 3 + if a.isValue and b.typ != nil and b.typ.isOrdinalType: + if firstOrd(b.typ) <= a.intVal: return impYes + + # x <= x + if sameTree(a, b): return impYes + + # 0 <= x.len + if b.getMagic in someLen and a.isValue: + if a.intVal <= 0: return impYes + + # x <= y+c if 0 <= c and x <= y + if b.getMagic in someAdd and zero() <=? b[2] and a <=? b[1]: return impYes + + # x+c <= y if c <= 0 and x <= y + if a.getMagic in someAdd and a[2] <=? zero() and a[1] <=? b: return impYes + + # x <= y*c if 1 <= c and x <= y and 0 <= y + if b.getMagic in someMul: + if a <=? b[1] and one() <=? b[2] and zero() <=? b[1]: return impYes + + # x div c <= y if 1 <= c and 0 <= y and x <= y: + if a.getMagic in someDiv: + if one() <=? a[2] and zero() <=? b and a[1] <=? b: return impYes + + # slightly subtle: + # x <= max(y, z) iff x <= y or x <= z + # note that 'x <= max(x, z)' is a special case of the above rule + if b.getMagic in someMax: + if a <=? b[1] or a <=? b[2]: return impYes + + # min(x, y) <= z iff x <= z or y <= z + if a.getMagic in someMin: + if a[1] <=? b or a[2] <=? b: return impYes + + # use the knowledge base: + return pleViaModel(m, a, b) + #return doesImply(m, opLe.buildCall(a, b)) + +type TReplacements = seq[tuple[a,b: PNode]] + +proc replaceSubTree(n, x, by: PNode): PNode = + if sameTree(n, x): + result = by + elif hasSubTree(n, x): + result = shallowCopy(n) + for i in 0 .. safeLen(n)-1: + result.sons[i] = replaceSubTree(n.sons[i], x, by) + else: + result = n + +proc applyReplacements(n: PNode; rep: TReplacements): PNode = + result = n + for x in rep: result = result.replaceSubTree(x.a, x.b) + +proc pleViaModelRec(m: var TModel; a, b: PNode): TImplication = + # now check for inferrable facts: a <= b and b <= c implies a <= c + for i in 0..m.high: + let fact = m[i] + if fact != nil and fact.getMagic in someLe: + # x <= y implies a <= b if a <= x and y <= b + let x = fact[1] + let y = fact[2] + # mark as used: + m[i] = nil + if ple(m, a, x) == impYes: + if ple(m, y, b) == impYes: return impYes + #if pleViaModelRec(m, y, b): return impYes + # fact: 16 <= i + # x y + # question: i <= 15? no! + result = impliesLe(fact, a, b) + if result != impUnknown: return result + if sameTree(y, a): + result = ple(m, x, b) + if result != impUnknown: return result + +proc pleViaModel(model: TModel; aa, bb: PNode): TImplication = + # compute replacements: + var replacements: TReplacements = @[] + for fact in model: + if fact != nil and fact.getMagic in someEq: + let a = fact[1] + let b = fact[2] + if a.kind == nkSym: replacements.add((a,b)) + else: replacements.add((b,a)) + var m: TModel + var a = aa + var b = bb + if replacements.len > 0: + m = @[] + # make the other facts consistent: + for fact in model: + if fact != nil and fact.getMagic notin someEq: + # XXX 'canon' should not be necessary here, but it is + m.add applyReplacements(fact, replacements).canon + a = applyReplacements(aa, replacements) + b = applyReplacements(bb, replacements) + else: + # we have to make a copy here, because the model will be modified: + m = model + result = pleViaModelRec(m, a, b) + +proc proveLe*(m: TModel; a, b: PNode): TImplication = + let x = canon(opLe.buildCall(a, b)) + #echo "ROOT ", renderTree(x[1]), " <=? ", renderTree(x[2]) + result = ple(m, x[1], x[2]) + if result == impUnknown: + # try an alternative: a <= b iff not (b < a) iff not (b+1 <= a): + let y = canon(opLe.buildCall(opAdd.buildCall(b, one()), a)) + result = ~ple(m, y[1], y[2]) + +proc addFactLe*(m: var TModel; a, b: PNode) = + m.add canon(opLe.buildCall(a, b)) proc settype(n: PNode): PType = result = newType(tySet, n.typ.owner) diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index 1b9e5fe0f..df2816a0e 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -13,6 +13,8 @@ const genPrefix* = ":tmp" # prefix for generated names import ast, astalgo, types, idents, magicsys, msgs, options +from guards import createMagic +from trees import getMagic proc newTupleAccess*(tup: PNode, i: int): PNode = result = newNodeIT(nkBracketExpr, tup.info, tup.typ.skipTypes( @@ -68,6 +70,7 @@ proc addField*(obj: PType; s: PSym) = var field = newSym(skField, getIdent(s.name.s & $s.id), s.owner, s.info) let t = skipIntLit(s.typ) field.typ = t + assert t.kind != tyStmt field.position = sonsLen(obj.n) addSon(obj.n, newSymNode(field)) @@ -79,19 +82,29 @@ proc newDotExpr(obj, b: PSym): PNode = addSon(result, newSymNode(field)) result.typ = field.typ -proc indirectAccess*(a: PNode, b: PSym, info: TLineInfo): PNode = +proc indirectAccess*(a: PNode, b: string, info: TLineInfo): PNode = # returns a[].b as a node var deref = newNodeI(nkHiddenDeref, info) - deref.typ = a.typ.sons[0] - assert deref.typ.kind == tyObject - let field = getSymFromList(deref.typ.n, getIdent(b.name.s & $b.id)) - assert field != nil, b.name.s + deref.typ = a.typ.skipTypes(abstractInst).sons[0] + var t = deref.typ + var field: PSym + while true: + assert t.kind == tyObject + field = getSymFromList(t.n, getIdent(b)) + if field != nil: break + t = t.sons[0] + if t == nil: break + assert field != nil, b addSon(deref, a) result = newNodeI(nkDotExpr, info) addSon(result, deref) addSon(result, newSymNode(field)) result.typ = field.typ +proc indirectAccess*(a: PNode, b: PSym, info: TLineInfo): PNode = + # returns a[].b as a node + result = indirectAccess(a, b.name.s & $b.id, info) + proc indirectAccess*(a, b: PSym, info: TLineInfo): PNode = result = indirectAccess(newSymNode(a), b, info) @@ -101,6 +114,11 @@ proc genAddrOf*(n: PNode): PNode = result.typ = newType(tyPtr, n.typ.owner) result.typ.rawAddSon(n.typ) +proc genDeref*(n: PNode): PNode = + result = newNodeIT(nkHiddenDeref, n.info, + n.typ.skipTypes(abstractInst).sons[0]) + result.add n + proc callCodegenProc*(name: string, arg1: PNode; arg2, arg3: PNode = nil): PNode = result = newNodeI(nkCall, arg1.info) @@ -112,13 +130,114 @@ proc callCodegenProc*(name: string, arg1: PNode; result.add arg1 if arg2 != nil: result.add arg2 if arg3 != nil: result.add arg3 + result.typ = sym.typ.sons[0] + +# we have 4 cases to consider: +# - a void proc --> nothing to do +# - a proc returning GC'ed memory --> requires a promise +# - a proc returning non GC'ed memory --> pass as hidden 'var' parameter +# - not in a parallel environment --> requires a promise for memory safety +type + TSpawnResult = enum + srVoid, srPromise, srByVar + TPromiseKind = enum + promInvalid # invalid type T for 'Promise[T]' + promGC # Promise of a GC'ed type + promBlob # Promise of a blob type + +proc spawnResult(t: PType; inParallel: bool): TSpawnResult = + if t.isEmptyType: srVoid + elif inParallel and not containsGarbageCollectedRef(t): srByVar + else: srPromise + +proc promiseKind(t: PType): TPromiseKind = + if t.skipTypes(abstractInst).kind in {tyRef, tyString, tySequence}: promGC + elif containsGarbageCollectedRef(t): promInvalid + else: promBlob + +proc addLocalVar(varSection: PNode; owner: PSym; typ: PType; v: PNode): PSym = + result = newSym(skTemp, getIdent(genPrefix), owner, varSection.info) + result.typ = typ + incl(result.flags, sfFromGeneric) + + var vpart = newNodeI(nkIdentDefs, varSection.info, 3) + vpart.sons[0] = newSymNode(result) + vpart.sons[1] = ast.emptyNode + vpart.sons[2] = v + varSection.add vpart + +discard """ +We generate roughly this: + +proc f_wrapper(args) = + barrierEnter(args.barrier) # for parallel statement + var a = args.a # thread transfer; deepCopy or shallowCopy or no copy + # depending on whether we're in a 'parallel' statement + var b = args.b + + args.prom = nimCreatePromise(thread, sizeof(T)) # optional + nimPromiseCreateCondVar(args.prom) # optional + nimArgsPassingDone() # signal parent that the work is done + # + args.prom.blob = f(a, b, ...) + nimPromiseSignal(args.prom) + + # - or - + f(a, b, ...) + barrierLeave(args.barrier) # for parallel statement + +stmtList: + var scratchObj + scratchObj.a = a + scratchObj.b = b + + nimSpawn(f_wrapper, addr scratchObj) + scratchObj.prom # optional + +""" + +proc createNimCreatePromiseCall(prom, threadParam: PNode): PNode = + let size = newNodeIT(nkCall, prom.info, getSysType(tyInt)) + size.add newSymNode(createMagic("sizeof", mSizeOf)) + assert prom.typ.kind == tyGenericInst + size.add newNodeIT(nkType, prom.info, prom.typ.sons[1]) + + let castExpr = newNodeIT(nkCast, prom.info, prom.typ) + castExpr.add emptyNode + castExpr.add callCodeGenProc("nimCreatePromise", threadParam, size) + result = newFastAsgnStmt(prom, castExpr) proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; - varSection, call: PNode): PSym = + varSection, call, barrier, prom: PNode; + spawnKind: TSpawnResult): PSym = var body = newNodeI(nkStmtList, f.info) + if barrier != nil: + body.add callCodeGenProc("barrierEnter", barrier) + var threadLocalProm: PSym + if spawnKind == srByVar: + threadLocalProm = addLocalVar(varSection, argsParam.owner, prom.typ, prom) body.add varSection - body.add callCodeGenProc("nimArgsPassingDone", newSymNode(threadParam)) - body.add call + if prom != nil and spawnKind != srByVar: + body.add createNimCreatePromiseCall(prom, threadParam.newSymNode) + if barrier == nil: + body.add callCodeGenProc("nimPromiseCreateCondVar", prom) + + body.add callCodeGenProc("nimArgsPassingDone", threadParam.newSymNode) + if spawnKind == srByVar: + body.add newAsgnStmt(genDeref(threadLocalProm.newSymNode), call) + elif prom != nil: + let fk = prom.typ.sons[1].promiseKind + if fk == promInvalid: + localError(f.info, "cannot create a promise of type: " & + typeToString(prom.typ.sons[1])) + body.add newAsgnStmt(indirectAccess(prom, + if fk == promGC: "data" else: "blob", prom.info), call) + if barrier == nil: + body.add callCodeGenProc("nimPromiseSignal", prom) + else: + body.add call + if barrier != nil: + body.add callCodeGenProc("barrierLeave", barrier) var params = newNodeI(nkFormalParams, f.info) params.add emptyNode @@ -146,10 +265,151 @@ proc createCastExpr(argsParam: PSym; objType: PType): PNode = result.typ = newType(tyPtr, objType.owner) result.typ.rawAddSon(objType) -proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = - result = newNodeI(nkStmtList, n.info) - if n.kind notin nkCallKinds or not n.typ.isEmptyType: - localError(n.info, "'spawn' takes a call expression of type void") +proc setupArgsForConcurrency(n: PNode; objType: PType; scratchObj: PSym, + castExpr, call, varSection, result: PNode) = + let formals = n[0].typ.n + let tmpName = getIdent(genPrefix) + for i in 1 .. <n.len: + # we pick n's type here, which hopefully is 'tyArray' and not + # 'tyOpenArray': + var argType = n[i].typ.skipTypes(abstractInst) + if i < formals.len and formals[i].typ.kind == tyVar: + localError(n[i].info, "'spawn'ed function cannot have a 'var' parameter") + elif containsTyRef(argType): + localError(n[i].info, "'spawn'ed function cannot refer to 'ref'/closure") + + let fieldname = if i < formals.len: formals[i].sym.name else: tmpName + var field = newSym(skField, fieldname, objType.owner, n.info) + field.typ = argType + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n[i]) + + let temp = addLocalVar(varSection, objType.owner, argType, + indirectAccess(castExpr, field, n.info)) + call.add(newSymNode(temp)) + +proc getRoot*(n: PNode): PSym = + ## ``getRoot`` takes a *path* ``n``. A path is an lvalue expression + ## like ``obj.x[i].y``. The *root* of a path is the symbol that can be + ## determined as the owner; ``obj`` in the example. + case n.kind + of nkSym: + if n.sym.kind in {skVar, skResult, skTemp, skLet, skForVar}: + result = n.sym + of nkDotExpr, nkBracketExpr, nkHiddenDeref, nkDerefExpr, + nkObjUpConv, nkObjDownConv, nkCheckedFieldExpr: + result = getRoot(n.sons[0]) + of nkHiddenStdConv, nkHiddenSubConv, nkConv: + result = getRoot(n.sons[1]) + of nkCallKinds: + if getMagic(n) == mSlice: result = getRoot(n.sons[1]) + else: discard + +proc newIntLit(value: BiggestInt): PNode = + result = nkIntLit.newIntNode(value) + result.typ = getSysType(tyInt) + +proc genHigh(n: PNode): PNode = + if skipTypes(n.typ, abstractVar).kind in {tyArrayConstr, tyArray}: + result = newIntLit(lastOrd(skipTypes(n.typ, abstractVar))) + else: + result = newNodeI(nkCall, n.info, 2) + result.typ = getSysType(tyInt) + result.sons[0] = newSymNode(createMagic("high", mHigh)) + result.sons[1] = n + +proc setupArgsForParallelism(n: PNode; objType: PType; scratchObj: PSym; + castExpr, call, varSection, result: PNode) = + let formals = n[0].typ.n + let tmpName = getIdent(genPrefix) + # we need to copy the foreign scratch object fields into local variables + # for correctness: These are called 'threadLocal' here. + for i in 1 .. <n.len: + let n = n[i] + let argType = skipTypes(if i < formals.len: formals[i].typ else: n.typ, + abstractInst) + if containsTyRef(argType): + localError(n.info, "'spawn'ed function cannot refer to 'ref'/closure") + + let fieldname = if i < formals.len: formals[i].sym.name else: tmpName + var field = newSym(skField, fieldname, objType.owner, n.info) + + if argType.kind in {tyVarargs, tyOpenArray}: + # important special case: we always create a zero-copy slice: + let slice = newNodeI(nkCall, n.info, 4) + slice.typ = n.typ + slice.sons[0] = newSymNode(createMagic("slice", mSlice)) + var fieldB = newSym(skField, tmpName, objType.owner, n.info) + fieldB.typ = getSysType(tyInt) + objType.addField(fieldB) + + if getMagic(n) == mSlice: + let a = genAddrOf(n[1]) + field.typ = a.typ + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a) + + var fieldA = newSym(skField, tmpName, objType.owner, n.info) + fieldA.typ = getSysType(tyInt) + objType.addField(fieldA) + result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldA), n[2]) + result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldB), n[3]) + + let threadLocal = addLocalVar(varSection, objType.owner, fieldA.typ, + indirectAccess(castExpr, fieldA, n.info)) + slice.sons[2] = threadLocal.newSymNode + else: + let a = genAddrOf(n) + field.typ = a.typ + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a) + result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldB), genHigh(n)) + + slice.sons[2] = newIntLit(0) + # the array itself does not need to go through a thread local variable: + slice.sons[1] = genDeref(indirectAccess(castExpr, field, n.info)) + + let threadLocal = addLocalVar(varSection, objType.owner, fieldB.typ, + indirectAccess(castExpr, fieldB, n.info)) + slice.sons[3] = threadLocal.newSymNode + call.add slice + elif (let size = computeSize(argType); size < 0 or size > 16) and + n.getRoot != nil: + # it is more efficient to pass a pointer instead: + let a = genAddrOf(n) + field.typ = a.typ + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a) + let threadLocal = addLocalVar(varSection, objType.owner, field.typ, + indirectAccess(castExpr, field, n.info)) + call.add(genDeref(threadLocal.newSymNode)) + else: + # boring case + field.typ = argType + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n) + let threadLocal = addLocalVar(varSection, objType.owner, field.typ, + indirectAccess(castExpr, field, n.info)) + call.add(threadLocal.newSymNode) + +proc wrapProcForSpawn*(owner: PSym; n: PNode; retType: PType; + barrier, dest: PNode = nil): PNode = + # if 'barrier' != nil, then it is in a 'parallel' section and we + # generate quite different code + let spawnKind = spawnResult(retType, barrier!=nil) + case spawnKind + of srVoid: + internalAssert dest == nil + result = newNodeI(nkStmtList, n.info) + of srPromise: + internalAssert dest == nil + result = newNodeIT(nkStmtListExpr, n.info, retType) + of srByVar: + if dest == nil: localError(n.info, "'spawn' must not be discarded") + result = newNodeI(nkStmtList, n.info) + + if n.kind notin nkCallKinds: + localError(n.info, "'spawn' takes a call expression") return if optThreadAnalysis in gGlobalOptions: if {tfThread, tfNoSideEffect} * n[0].typ.flags == {}: @@ -162,6 +422,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = threadParam.typ = ptrType argsParam.typ = ptrType argsParam.position = 1 + var objType = createObj(owner, n.info) incl(objType.flags, tfFinal) let castExpr = createCastExpr(argsParam, objType) @@ -174,7 +435,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = varSectionB.addVar(scratchObj.newSymNode) result.add varSectionB - var call = newNodeI(nkCall, n.info) + var call = newNodeIT(nkCall, n.info, n.typ) var fn = n.sons[0] # templates and macros are in fact valid here due to the nature of # the transformation: @@ -194,35 +455,39 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = call.add(fn) var varSection = newNodeI(nkVarSection, n.info) - let formals = n[0].typ.n - let tmpName = getIdent(genPrefix) - for i in 1 .. <n.len: - # we pick n's type here, which hopefully is 'tyArray' and not - # 'tyOpenArray': - var argType = n[i].typ.skipTypes(abstractInst) - if i < formals.len and formals[i].typ.kind == tyVar: - localError(n[i].info, "'spawn'ed function cannot have a 'var' parameter") - elif containsTyRef(argType): - localError(n[i].info, "'spawn'ed function cannot refer to 'ref'/closure") + if barrier.isNil: + setupArgsForConcurrency(n, objType, scratchObj, castExpr, call, varSection, result) + else: + setupArgsForParallelism(n, objType, scratchObj, castExpr, call, varSection, result) - let fieldname = if i < formals.len: formals[i].sym.name else: tmpName - var field = newSym(skField, fieldname, owner, n.info) - field.typ = argType + var barrierAsExpr: PNode = nil + if barrier != nil: + let typ = newType(tyPtr, owner) + typ.rawAddSon(magicsys.getCompilerProc("Barrier").typ) + var field = newSym(skField, getIdent"barrier", owner, n.info) + field.typ = typ objType.addField(field) - result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n[i]) - - var temp = newSym(skTemp, tmpName, owner, n.info) - temp.typ = argType - incl(temp.flags, sfFromGeneric) - - var vpart = newNodeI(nkIdentDefs, n.info, 3) - vpart.sons[0] = newSymNode(temp) - vpart.sons[1] = ast.emptyNode - vpart.sons[2] = indirectAccess(castExpr, field, n.info) - varSection.add vpart + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), barrier) + barrierAsExpr = indirectAccess(castExpr, field, n.info) - call.add(newSymNode(temp)) + var promField, promAsExpr: PNode = nil + if spawnKind == srPromise: + var field = newSym(skField, getIdent"prom", owner, n.info) + field.typ = retType + objType.addField(field) + promField = newDotExpr(scratchObj, field) + promAsExpr = indirectAccess(castExpr, field, n.info) + elif spawnKind == srByVar: + var field = newSym(skField, getIdent"prom", owner, n.info) + field.typ = newType(tyPtr, objType.owner) + field.typ.rawAddSon(retType) + objType.addField(field) + promAsExpr = indirectAccess(castExpr, field, n.info) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), genAddrOf(dest)) - let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call) + let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call, + barrierAsExpr, promAsExpr, spawnKind) result.add callCodeGenProc("nimSpawn", wrapper.newSymNode, genAddrOf(scratchObj.newSymNode)) + + if spawnKind == srPromise: result.add promField diff --git a/compiler/sem.nim b/compiler/sem.nim index bdc2a0b3c..2e3b10a40 100644 --- a/compiler/sem.nim +++ b/compiler/sem.nim @@ -15,7 +15,8 @@ import magicsys, parser, nversion, nimsets, semfold, importer, procfind, lookups, rodread, pragmas, passes, semdata, semtypinst, sigmatch, intsets, transf, vmdef, vm, idgen, aliases, cgmeth, lambdalifting, - evaltempl, patterns, parampatterns, sempass2, pretty, semmacrosanity + evaltempl, patterns, parampatterns, sempass2, pretty, semmacrosanity, + semparallel # implementation diff --git a/compiler/semdata.nim b/compiler/semdata.nim index 987a70a41..19181d98e 100644 --- a/compiler/semdata.nim +++ b/compiler/semdata.nim @@ -91,6 +91,7 @@ type generics*: seq[TInstantiationPair] # pending list of instantiated generics to compile lastGenericIdx*: int # used for the generics stack hloLoopDetector*: int # used to prevent endless loops in the HLO + inParallelStmt*: int proc makeInstPair*(s: PSym, inst: PInstantiation): TInstantiationPair = result.genericSym = s diff --git a/compiler/semexprs.nim b/compiler/semexprs.nim index 99ffa9b54..cccb02502 100644 --- a/compiler/semexprs.nim +++ b/compiler/semexprs.nim @@ -1394,11 +1394,6 @@ proc semDefined(c: PContext, n: PNode, onlyCurrentScope: bool): PNode = result.info = n.info result.typ = getSysType(tyBool) -proc setMs(n: PNode, s: PSym): PNode = - result = n - n.sons[0] = newSymNode(s) - n.sons[0].info = n.info - proc expectMacroOrTemplateCall(c: PContext, n: PNode): PSym = ## The argument to the proc should be nkCall(...) or similar ## Returns the macro/template symbol @@ -1590,6 +1585,17 @@ proc semShallowCopy(c: PContext, n: PNode, flags: TExprFlags): PNode = else: result = semDirectOp(c, n, flags) +proc createPromise(c: PContext; t: PType; info: TLineInfo): PType = + result = newType(tyGenericInvokation, c.module) + addSonSkipIntLit(result, magicsys.getCompilerProc("Promise").typ) + addSonSkipIntLit(result, t) + result = instGenericContainer(c, info, result, allowMetaTypes = false) + +proc setMs(n: PNode, s: PSym): PNode = + result = n + n.sons[0] = newSymNode(s) + n.sons[0].info = n.info + proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode = # this is a hotspot in the compiler! # DON'T forget to update ast.SpecialSemMagics if you add a magic here! @@ -1611,6 +1617,21 @@ proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode = checkSonsLen(n, 2) result = newStrNodeT(renderTree(n[1], {renderNoComments}), n) result.typ = getSysType(tyString) + of mParallel: + result = setMs(n, s) + var x = n.lastSon + if x.kind == nkDo: x = x.sons[bodyPos] + inc c.inParallelStmt + result.sons[1] = semStmt(c, x) + dec c.inParallelStmt + of mSpawn: + result = setMs(n, s) + result.sons[1] = semExpr(c, n.sons[1]) + if not result[1].typ.isEmptyType: + if c.inParallelStmt > 0: + result.typ = result[1].typ + else: + result.typ = createPromise(c, result[1].typ, n.info) else: result = semDirectOp(c, n, flags) proc semWhen(c: PContext, n: PNode, semCheck = true): PNode = diff --git a/compiler/semmagic.nim b/compiler/semmagic.nim index 4caf1fb8e..f943e7006 100644 --- a/compiler/semmagic.nim +++ b/compiler/semmagic.nim @@ -1,7 +1,7 @@ # # # The Nimrod Compiler -# (c) Copyright 2013 Andreas Rumpf +# (c) Copyright 2014 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -131,4 +131,3 @@ proc magicsAfterOverloadResolution(c: PContext, n: PNode, of mNBindSym: result = semBindSym(c, n) of mLocals: result = semLocals(c, n) else: result = n - diff --git a/compiler/semparallel.nim b/compiler/semparallel.nim new file mode 100644 index 000000000..72def1137 --- /dev/null +++ b/compiler/semparallel.nim @@ -0,0 +1,465 @@ +# +# +# The Nimrod Compiler +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Semantic checking for 'parallel'. + +# - codegen needs to support mSlice (+) +# - lowerings must not perform unnecessary copies (+) +# - slices should become "nocopy" to openArray (+) +# - need to perform bound checks (+) +# +# - parallel needs to insert a barrier (+) +# - passed arguments need to be ensured to be "const" +# - what about 'f(a)'? --> f shouldn't have side effects anyway +# - passed arrays need to be ensured not to alias +# - passed slices need to be ensured to be disjoint (+) +# - output slices need special logic (+) + +import + ast, astalgo, idents, lowerings, magicsys, guards, sempass2, msgs, + renderer +from trees import getMagic +from strutils import `%` + +discard """ + +one major problem: + spawn f(a[i]) + inc i + spawn f(a[i]) +is valid, but + spawn f(a[i]) + spawn f(a[i]) + inc i +is not! However, + spawn f(a[i]) + if guard: inc i + spawn f(a[i]) +is not valid either! --> We need a flow dependent analysis here. + +However: + while foo: + spawn f(a[i]) + inc i + spawn f(a[i]) + +Is not valid either! --> We should really restrict 'inc' to loop endings? + +The heuristic that we implement here (that has no false positives) is: Usage +of 'i' in a slice *after* we determined the stride is invalid! +""" + +type + TDirection = enum + ascending, descending + MonotonicVar = object + v, alias: PSym # to support the ordinary 'countup' iterator + # we need to detect aliases + lower, upper, stride: PNode + dir: TDirection + blacklisted: bool # blacklisted variables that are not monotonic + AnalysisCtx = object + locals: seq[MonotonicVar] + slices: seq[tuple[x,a,b: PNode, spawnId: int, inLoop: bool]] + guards: TModel # nested guards + args: seq[PSym] # args must be deeply immutable + spawns: int # we can check that at last 1 spawn is used in + # the 'parallel' section + currentSpawnId: int + inLoop: int + +let opSlice = createMagic("slice", mSlice) + +proc initAnalysisCtx(): AnalysisCtx = + result.locals = @[] + result.slices = @[] + result.args = @[] + result.guards = @[] + +proc lookupSlot(c: AnalysisCtx; s: PSym): int = + for i in 0.. <c.locals.len: + if c.locals[i].v == s or c.locals[i].alias == s: return i + return -1 + +proc getSlot(c: var AnalysisCtx; v: PSym): ptr MonotonicVar = + let s = lookupSlot(c, v) + if s >= 0: return addr(c.locals[s]) + let L = c.locals.len + c.locals.setLen(L+1) + c.locals[L].v = v + return addr(c.locals[L]) + +proc gatherArgs(c: var AnalysisCtx; n: PNode) = + for i in 0.. <n.safeLen: + let root = getRoot n[i] + if root != nil: + block addRoot: + for r in items(c.args): + if r == root: break addRoot + c.args.add root + gatherArgs(c, n[i]) + +proc isSingleAssignable(n: PNode): bool = + n.kind == nkSym and (let s = n.sym; + s.kind in {skTemp, skForVar, skLet} and + {sfAddrTaken, sfGlobal} * s.flags == {}) + +proc isLocal(n: PNode): bool = + n.kind == nkSym and (let s = n.sym; + s.kind in {skResult, skTemp, skForVar, skVar, skLet} and + {sfAddrTaken, sfGlobal} * s.flags == {}) + +proc checkLocal(c: AnalysisCtx; n: PNode) = + if isLocal(n): + let s = c.lookupSlot(n.sym) + if s >= 0 and c.locals[s].stride != nil: + localError(n.info, "invalid usage of counter after increment") + else: + for i in 0 .. <n.safeLen: checkLocal(c, n.sons[i]) + +template `?`(x): expr = x.renderTree + +proc checkLe(c: AnalysisCtx; a, b: PNode) = + case proveLe(c.guards, a, b) + of impUnknown: + localError(a.info, "cannot prove: " & ?a & " <= " & ?b) + of impYes: discard + of impNo: + localError(a.info, "can prove: " & ?a & " > " & ?b) + +proc checkBounds(c: AnalysisCtx; arr, idx: PNode) = + checkLe(c, arr.lowBound, idx) + checkLe(c, idx, arr.highBound) + +proc addLowerBoundAsFacts(c: var AnalysisCtx) = + for v in c.locals: + if not v.blacklisted: + c.guards.addFactLe(v.lower, newSymNode(v.v)) + +proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: PNode) = + checkLocal(c, n) + let le = le.canon + let ri = ri.canon + # perform static bounds checking here; and not later! + let oldState = c.guards.len + addLowerBoundAsFacts(c) + c.checkBounds(x, le) + c.checkBounds(x, ri) + c.guards.setLen(oldState) + c.slices.add((x, le, ri, c.currentSpawnId, c.inLoop > 0)) + +proc overlap(m: TModel; x,y,c,d: PNode) = + # X..Y and C..D overlap iff (X <= D and C <= Y) + case proveLe(m, x, d) + of impUnknown: + localError(x.info, + "cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" % + [?x, ?d, ?x, ?y, ?c, ?d]) + of impYes: + case proveLe(m, c, y) + of impUnknown: + localError(x.info, + "cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" % + [?c, ?y, ?x, ?y, ?c, ?d]) + of impYes: + localError(x.info, "($#)..($#) not disjoint from ($#)..($#)" % [?x, ?y, ?c, ?d]) + of impNo: discard + of impNo: discard + +proc stride(c: AnalysisCtx; n: PNode): BiggestInt = + if isLocal(n): + let s = c.lookupSlot(n.sym) + if s >= 0 and c.locals[s].stride != nil: + result = c.locals[s].stride.intVal + else: + for i in 0 .. <n.safeLen: result += stride(c, n.sons[i]) + +proc subStride(c: AnalysisCtx; n: PNode): PNode = + # substitute with stride: + if isLocal(n): + let s = c.lookupSlot(n.sym) + if s >= 0 and c.locals[s].stride != nil: + result = n +@ c.locals[s].stride.intVal + else: + result = n + elif n.safeLen > 0: + result = shallowCopy(n) + for i in 0 .. <n.len: result.sons[i] = subStride(c, n.sons[i]) + else: + result = n + +proc checkSlicesAreDisjoint(c: var AnalysisCtx) = + # this is the only thing that we need to perform after we have traversed + # the whole tree so that the strides are available. + # First we need to add all the computed lower bounds: + addLowerBoundAsFacts(c) + # Every slice used in a loop needs to be disjoint with itself: + for x,a,b,id,inLoop in items(c.slices): + if inLoop: overlap(c.guards, a,b, c.subStride(a), c.subStride(b)) + # Another tricky example is: + # while true: + # spawn f(a[i]) + # spawn f(a[i+1]) + # inc i # inc i, 2 would be correct here + # + # Or even worse: + # while true: + # spawn f(a[i+1 .. i+3]) + # spawn f(a[i+4 .. i+5]) + # inc i, 4 + # Prove that i*k*stride + 3 != i*k'*stride + 5 + # For the correct example this amounts to + # i*k*2 != i*k'*2 + 1 + # which is true. + # For now, we don't try to prove things like that at all, even though it'd + # be feasible for many useful examples. Instead we attach the slice to + # a spawn and if the attached spawns differ, we bail out: + for i in 0 .. high(c.slices): + for j in i+1 .. high(c.slices): + let x = c.slices[i] + let y = c.slices[j] + if x.spawnId != y.spawnId and guards.sameTree(x.x, y.x): + if not x.inLoop or not y.inLoop: + # XXX strictly speaking, 'or' is not correct here and it needs to + # be 'and'. However this prevents too many obviously correct programs + # like f(a[0..x]); for i in x+1 .. a.high: f(a[i]) + overlap(c.guards, x.a, x.b, y.a, y.b) + elif (let k = simpleSlice(x.a, x.b); let m = simpleSlice(y.a, y.b); + k >= 0 and m >= 0): + # ah I cannot resist the temptation and add another sweet heuristic: + # if both slices have the form (i+k)..(i+k) and (i+m)..(i+m) we + # check they are disjoint and k < stride and m < stride: + overlap(c.guards, x.a, x.b, y.a, y.b) + let stride = min(c.stride(x.a), c.stride(y.a)) + if k < stride and m < stride: + discard + else: + localError(x.x.info, "cannot prove ($#)..($#) disjoint from ($#)..($#)" % + [?x.a, ?x.b, ?y.a, ?y.b]) + else: + localError(x.x.info, "cannot prove ($#)..($#) disjoint from ($#)..($#)" % + [?x.a, ?x.b, ?y.a, ?y.b]) + +proc analyse(c: var AnalysisCtx; n: PNode) + +proc analyseSons(c: var AnalysisCtx; n: PNode) = + for i in 0 .. <safeLen(n): analyse(c, n[i]) + +proc min(a, b: PNode): PNode = + if a.isNil: result = b + elif a.intVal < b.intVal: result = a + else: result = b + +proc fromSystem(op: PSym): bool = sfSystemModule in getModule(op).flags + +proc analyseCall(c: var AnalysisCtx; n: PNode; op: PSym) = + if op.magic == mSpawn: + inc c.spawns + let oldSpawnId = c.currentSpawnId + c.currentSpawnId = c.spawns + gatherArgs(c, n[1]) + analyseSons(c, n) + c.currentSpawnId = oldSpawnId + elif op.magic == mInc or (op.name.s == "+=" and op.fromSystem): + if n[1].isLocal: + let incr = n[2].skipConv + if incr.kind in {nkCharLit..nkUInt32Lit} and incr.intVal > 0: + let slot = c.getSlot(n[1].sym) + slot.stride = min(slot.stride, incr) + analyseSons(c, n) + elif op.name.s == "[]" and op.fromSystem: + c.addSlice(n, n[1], n[2][1], n[2][2]) + analyseSons(c, n) + elif op.name.s == "[]=" and op.fromSystem: + c.addSlice(n, n[1], n[2][1], n[2][2]) + analyseSons(c, n) + else: + analyseSons(c, n) + +proc analyseCase(c: var AnalysisCtx; n: PNode) = + analyse(c, n.sons[0]) + let oldFacts = c.guards.len + for i in 1.. <n.len: + let branch = n.sons[i] + setLen(c.guards, oldFacts) + addCaseBranchFacts(c.guards, n, i) + for i in 0 .. <branch.len: + analyse(c, branch.sons[i]) + setLen(c.guards, oldFacts) + +proc analyseIf(c: var AnalysisCtx; n: PNode) = + analyse(c, n.sons[0].sons[0]) + let oldFacts = c.guards.len + addFact(c.guards, canon(n.sons[0].sons[0])) + + analyse(c, n.sons[0].sons[1]) + for i in 1.. <n.len: + let branch = n.sons[i] + setLen(c.guards, oldFacts) + for j in 0..i-1: + addFactNeg(c.guards, canon(n.sons[j].sons[0])) + if branch.len > 1: + addFact(c.guards, canon(branch.sons[0])) + for i in 0 .. <branch.len: + analyse(c, branch.sons[i]) + setLen(c.guards, oldFacts) + +proc analyse(c: var AnalysisCtx; n: PNode) = + case n.kind + of nkAsgn, nkFastAsgn: + if n[0].isSingleAssignable and n[1].isLocal: + let slot = c.getSlot(n[1].sym) + slot.alias = n[0].sym + elif n[0].isLocal: + # since we already ensure sfAddrTaken is not in s.flags, we only need to + # prevent direct assignments to the monotonic variable: + let slot = c.getSlot(n[0].sym) + slot.blackListed = true + invalidateFacts(c.guards, n[0]) + analyseSons(c, n) + addAsgnFact(c.guards, n[0], n[1]) + of nkCallKinds: + # direct call: + if n[0].kind == nkSym: analyseCall(c, n, n[0].sym) + else: analyseSons(c, n) + of nkBracketExpr: + c.addSlice(n, n[0], n[1], n[1]) + analyseSons(c, n) + of nkReturnStmt, nkRaiseStmt, nkTryStmt: + localError(n.info, "invalid control flow for 'parallel'") + # 'break' that leaves the 'parallel' section is not valid either + # or maybe we should generate a 'try' XXX + of nkVarSection: + for it in n: + let value = it.lastSon + if value.kind != nkEmpty: + for j in 0 .. it.len-3: + if it[j].isLocal: + let slot = c.getSlot(it[j].sym) + if slot.lower.isNil: slot.lower = value + else: internalError(it.info, "slot already has a lower bound") + analyse(c, value) + of nkCaseStmt: analyseCase(c, n) + of nkIfStmt, nkIfExpr: analyseIf(c, n) + of nkWhileStmt: + analyse(c, n.sons[0]) + # 'while true' loop? + inc c.inLoop + if isTrue(n.sons[0]): + analyseSons(c, n.sons[1]) + else: + # loop may never execute: + let oldState = c.locals.len + let oldFacts = c.guards.len + addFact(c.guards, canon(n.sons[0])) + analyse(c, n.sons[1]) + setLen(c.locals, oldState) + setLen(c.guards, oldFacts) + # we know after the loop the negation holds: + if not hasSubnodeWith(n.sons[1], nkBreakStmt): + addFactNeg(c.guards, canon(n.sons[0])) + dec c.inLoop + of nkTypeSection, nkProcDef, nkConverterDef, nkMethodDef, nkIteratorDef, + nkMacroDef, nkTemplateDef, nkConstSection, nkPragma: + discard + else: + analyseSons(c, n) + +proc transformSlices(n: PNode): PNode = + if n.kind in nkCallKinds and n[0].kind == nkSym: + let op = n[0].sym + if op.name.s == "[]" and op.fromSystem: + result = copyNode(n) + result.add opSlice.newSymNode + result.add n[1] + result.add n[2][1] + result.add n[2][2] + return result + if n.safeLen > 0: + result = shallowCopy(n) + for i in 0 .. < n.len: + result.sons[i] = transformSlices(n.sons[i]) + else: + result = n + +proc transformSpawn(owner: PSym; n, barrier: PNode): PNode +proc transformSpawnSons(owner: PSym; n, barrier: PNode): PNode = + result = shallowCopy(n) + for i in 0 .. < n.len: + result.sons[i] = transformSpawn(owner, n.sons[i], barrier) + +proc transformSpawn(owner: PSym; n, barrier: PNode): PNode = + case n.kind + of nkVarSection: + result = nil + for it in n: + let b = it.lastSon + if getMagic(b) == mSpawn: + if it.len != 3: localError(it.info, "invalid context for 'spawn'") + let m = transformSlices(b) + if result.isNil: + result = newNodeI(nkStmtList, n.info) + result.add n + result.add wrapProcForSpawn(owner, m[1], b.typ, barrier, it[0]) + it.sons[it.len-1] = emptyNode + if result.isNil: result = n + of nkAsgn, nkFastAsgn: + let b = n[1] + if getMagic(b) == mSpawn: + let m = transformSlices(b) + return wrapProcForSpawn(owner, m[1], b.typ, barrier, n[0]) + result = transformSpawnSons(owner, n, barrier) + of nkCallKinds: + if getMagic(n) == mSpawn: + result = transformSlices(n) + return wrapProcForSpawn(owner, result[1], n.typ, barrier, nil) + result = transformSpawnSons(owner, n, barrier) + elif n.safeLen > 0: + result = transformSpawnSons(owner, n, barrier) + else: + result = n + +proc checkArgs(a: var AnalysisCtx; n: PNode) = + discard "too implement" + +proc generateAliasChecks(a: AnalysisCtx; result: PNode) = + discard "too implement" + +proc liftParallel*(owner: PSym; n: PNode): PNode = + # this needs to be called after the 'for' loop elimination + + # first pass: + # - detect monotonic local integer variables + # - detect used slices + # - detect used arguments + #echo "PAR ", renderTree(n) + + var a = initAnalysisCtx() + let body = n.lastSon + analyse(a, body) + if a.spawns == 0: + localError(n.info, "'parallel' section without 'spawn'") + checkSlicesAreDisjoint(a) + checkArgs(a, body) + + var varSection = newNodeI(nkVarSection, n.info) + var temp = newSym(skTemp, getIdent"barrier", owner, n.info) + temp.typ = magicsys.getCompilerProc("Barrier").typ + incl(temp.flags, sfFromGeneric) + let tempNode = newSymNode(temp) + varSection.addVar tempNode + + let barrier = genAddrOf(tempNode) + result = newNodeI(nkStmtList, n.info) + generateAliasChecks(a, result) + result.add varSection + result.add callCodeGenProc("openBarrier", barrier) + result.add transformSpawn(owner, body, barrier) + result.add callCodeGenProc("closeBarrier", barrier) + diff --git a/compiler/sempass2.nim b/compiler/sempass2.nim index 6afde5f05..c8ce5e787 100644 --- a/compiler/sempass2.nim +++ b/compiler/sempass2.nim @@ -89,7 +89,7 @@ proc initVarViaNew(a: PEffects, n: PNode) = if n.kind != nkSym: return let s = n.sym if {tfNeedsInit, tfNotNil} * s.typ.flags <= {tfNotNil}: - # 'x' is not nil, but that doesn't mean it's not nil children + # 'x' is not nil, but that doesn't mean its "not nil" children # are initialized: initVar(a, n) @@ -478,7 +478,7 @@ proc trackBlock(tracked: PEffects, n: PNode) = else: track(tracked, n) -proc isTrue(n: PNode): bool = +proc isTrue*(n: PNode): bool = n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or n.kind == nkIntLit and n.intVal != 0 diff --git a/compiler/semtypes.nim b/compiler/semtypes.nim index d88a95603..c328f133b 100644 --- a/compiler/semtypes.nim +++ b/compiler/semtypes.nim @@ -1084,8 +1084,10 @@ proc semTypeNode(c: PContext, n: PNode, prev: PType): PType = of nkCallKinds: if isRange(n): result = semRangeAux(c, n, prev) - elif n[0].kind == nkIdent: - let op = n.sons[0].ident + elif n[0].kind notin nkIdentKinds: + result = semTypeExpr(c, n) + else: + let op = considerAcc(n.sons[0]) if op.id in {ord(wAnd), ord(wOr)} or op.s == "|": checkSonsLen(n, 3) var @@ -1120,8 +1122,6 @@ proc semTypeNode(c: PContext, n: PNode, prev: PType): PType = result = semAnyRef(c, n, tyRef, prev) else: result = semTypeExpr(c, n) - else: - result = semTypeExpr(c, n) of nkWhenStmt: var whenResult = semWhen(c, n, false) if whenResult.kind == nkStmtList: whenResult.kind = nkStmtListType diff --git a/compiler/vm.nim b/compiler/vm.nim index 218369fa1..0c2c23987 100644 --- a/compiler/vm.nim +++ b/compiler/vm.nim @@ -131,8 +131,9 @@ proc createStrKeepNode(x: var TFullReg) = nfAllConst in x.node.flags: # XXX this is hacky; tests/txmlgen triggers it: x.node = newNode(nkStrLit) - # debug x.node - #assert x.node.kind in {nkStrLit..nkTripleStrLit} + # It not only hackey, it is also wrong for tgentemplate. The primary + # cause of bugs like these is that the VM does not properly distinguish + # between variable defintions (var foo = e) and variable updates (foo = e). template createStr(x) = x.node = newNode(nkStrLit) diff --git a/config/nimrod.cfg b/config/nimrod.cfg index 2817eac55..df3835ace 100644 --- a/config/nimrod.cfg +++ b/config/nimrod.cfg @@ -16,6 +16,7 @@ arm.linux.gcc.linkerexe = "arm-linux-gcc" path="$lib/core" path="$lib/pure" path="$lib/pure/collections" +path="$lib/pure/concurrency" path="$lib/impure" path="$lib/wrappers" # path="$lib/wrappers/cairo" diff --git a/doc/manual.txt b/doc/manual.txt index 5afe385bb..c6f50298c 100644 --- a/doc/manual.txt +++ b/doc/manual.txt @@ -2823,7 +2823,7 @@ The following builtin procs cannot be overloaded for reasons of implementation simplicity (they require specialized semantic checking):: defined, definedInScope, compiles, low, high, sizeOf, - is, of, echo, shallowCopy, getAst + is, of, echo, shallowCopy, getAst, spawn Thus they act more like keywords than like ordinary identifiers; unlike a keyword however, a redefinition may `shadow`:idx: the definition in diff --git a/lib/pure/concurrency/cpuinfo.nim b/lib/pure/concurrency/cpuinfo.nim new file mode 100644 index 000000000..dfa819f64 --- /dev/null +++ b/lib/pure/concurrency/cpuinfo.nim @@ -0,0 +1,58 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements procs to determine the number of CPUs / cores. + +include "system/inclrtl" + +import strutils, os + +when not defined(windows): + import posix + +when defined(linux): + import linux + +when defined(macosx) or defined(bsd): + const + CTL_HW = 6 + HW_AVAILCPU = 25 + HW_NCPU = 3 + proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer, + a: var csize, b: pointer, c: int): cint {. + importc: "sysctl", header: "<sys/sysctl.h>".} + +proc countProcessors*(): int {.rtl, extern: "ncpi$1".} = + ## returns the numer of the processors/cores the machine has. + ## Returns 0 if it cannot be detected. + when defined(windows): + var x = getEnv("NUMBER_OF_PROCESSORS") + if x.len > 0: result = parseInt(x.string) + elif defined(macosx) or defined(bsd): + var + mib: array[0..3, cint] + numCPU: int + len: csize + mib[0] = CTL_HW + mib[1] = HW_AVAILCPU + len = sizeof(numCPU) + discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) + if numCPU < 1: + mib[1] = HW_NCPU + discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) + result = numCPU + elif defined(hpux): + result = mpctl(MPC_GETNUMSPUS, nil, nil) + elif defined(irix): + var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint + result = sysconf(SC_NPROC_ONLN) + else: + result = sysconf(SC_NPROCESSORS_ONLN) + if result <= 0: result = 1 + diff --git a/lib/pure/concurrency/cpuload.nim b/lib/pure/concurrency/cpuload.nim new file mode 100644 index 000000000..3cf6a7392 --- /dev/null +++ b/lib/pure/concurrency/cpuload.nim @@ -0,0 +1,96 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a helper for a thread pool to determine whether +## creating a thread is a good idea. + +when defined(windows): + import winlean, os, strutils, math + + proc `-`(a, b: TFILETIME): int64 = a.rdFileTime - b.rdFileTime +elif defined(linux): + from cpuinfo import countProcessors + +type + ThreadPoolAdvice* = enum + doNothing, + doCreateThread, # create additional thread for throughput + doShutdownThread # too many threads are busy, shutdown one + + ThreadPoolState* = object + when defined(windows): + prevSysKernel, prevSysUser, prevProcKernel, prevProcUser: TFILETIME + calls*: int + +proc advice*(s: var ThreadPoolState): ThreadPoolAdvice = + when defined(windows): + var + sysIdle, sysKernel, sysUser, + procCreation, procExit, procKernel, procUser: TFILETIME + if getSystemTimes(sysIdle, sysKernel, sysUser) == 0 or + getProcessTimes(THandle(-1), procCreation, procExit, + procKernel, procUser) == 0: + return doNothing + if s.calls > 0: + let + sysKernelDiff = sysKernel - s.prevSysKernel + sysUserDiff = sysUser - s.prevSysUser + + procKernelDiff = procKernel - s.prevProcKernel + procUserDiff = procUser - s.prevProcUser + + sysTotal = int(sysKernelDiff + sysUserDiff) + procTotal = int(procKernelDiff + procUserDiff) + # total CPU usage < 85% --> create a new worker thread. + # Measurements show that 100% and often even 90% is not reached even + # if all my cores are busy. + if sysTotal == 0 or procTotal / sysTotal < 0.85: + result = doCreateThread + s.prevSysKernel = sysKernel + s.prevSysUser = sysUser + s.prevProcKernel = procKernel + s.prevProcUser = procUser + elif defined(linux): + proc fscanf(c: TFile, frmt: cstring) {.varargs, importc, + header: "<stdio.h>".} + + var f = open("/proc/loadavg") + var b: float + var busy, total: int + fscanf(f,"%lf %lf %lf %ld/%ld", + addr b, addr b, addr b, addr busy, addr total) + f.close() + let cpus = countProcessors() + if busy-1 < cpus: + result = doCreateThread + elif busy-1 >= cpus*2: + result = doShutdownThread + else: + result = doNothing + else: + # XXX implement this for other OSes + result = doNothing + inc s.calls + +when isMainModule: + proc busyLoop() = + while true: + discard random(80) + os.sleep(100) + + spawn busyLoop() + spawn busyLoop() + spawn busyLoop() + spawn busyLoop() + + var s: ThreadPoolState + + for i in 1 .. 70: + echo advice(s) + os.sleep(1000) diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim new file mode 100644 index 000000000..24cb9ccdd --- /dev/null +++ b/lib/pure/concurrency/threadpool.nim @@ -0,0 +1,347 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Implements Nimrod's 'spawn'. + +import cpuinfo, cpuload, locks + +{.push stackTrace:off.} + +type + CondVar = object + c: TCond + L: TLock + counter: int + +proc createCondVar(): CondVar = + initCond(result.c) + initLock(result.L) + +proc destroyCondVar(cv: var CondVar) {.inline.} = + deinitCond(cv.c) + deinitLock(cv.L) + +proc await(cv: var CondVar) = + acquire(cv.L) + while cv.counter <= 0: + wait(cv.c, cv.L) + dec cv.counter + release(cv.L) + +proc signal(cv: var CondVar) = + acquire(cv.L) + inc cv.counter + release(cv.L) + signal(cv.c) + +type + Barrier* {.compilerProc.} = object + counter: int + cv: CondVar + +proc barrierEnter*(b: ptr Barrier) {.compilerProc.} = + atomicInc b.counter + +proc barrierLeave*(b: ptr Barrier) {.compilerProc.} = + atomicDec b.counter + if b.counter <= 0: signal(b.cv) + +proc openBarrier*(b: ptr Barrier) {.compilerProc.} = + b.counter = 0 + b.cv = createCondVar() + +proc closeBarrier*(b: ptr Barrier) {.compilerProc.} = + while b.counter > 0: await(b.cv) + destroyCondVar(b.cv) + +{.pop.} + +# ---------------------------------------------------------------------------- + +type + foreign* = object ## a region that indicates the pointer comes from a + ## foreign thread heap. + AwaitInfo = object + cv: CondVar + idx: int + + RawPromise* = ptr RawPromiseObj ## untyped base class for 'Promise[T]' + RawPromiseObj {.inheritable.} = object # \ + # we allocate this with the thread local allocator; this + # is possible since we already need to do the GC_unref + # on the owning thread + ready, usesCondVar: bool + cv: CondVar #\ + # for 'awaitAny' support + ai: ptr AwaitInfo + idx: int + data: PObject # we incRef and unref it to keep it alive + owner: ptr Worker + next: RawPromise + align: float64 # a float for proper alignment + + Promise* {.compilerProc.} [T] = ptr object of RawPromiseObj + blob: T ## the underlying value, if available. Note that usually + ## you should not access this field directly! However it can + ## sometimes be more efficient than getting the value via ``^``. + + WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.} + Worker = object + taskArrived: CondVar + taskStarted: CondVar #\ + # task data: + f: WorkerProc + data: pointer + ready: bool # put it here for correct alignment! + initialized: bool # whether it has even been initialized + shutdown: bool # the pool requests to shut down this worker thread + promiseLock: TLock + head: RawPromise + +proc finished*(prom: RawPromise) = + ## This MUST be called for every created promise to free its associated + ## resources. Note that the default reading operation ``^`` is destructive + ## and calls ``finished``. + doAssert prom.ai.isNil, "promise is still attached to an 'awaitAny'" + assert prom.next == nil + let w = prom.owner + acquire(w.promiseLock) + prom.next = w.head + w.head = prom + release(w.promiseLock) + +proc cleanPromises(w: ptr Worker) = + var it = w.head + acquire(w.promiseLock) + while it != nil: + let nxt = it.next + if it.usesCondVar: destroyCondVar(it.cv) + if it.data != nil: GC_unref(it.data) + dealloc(it) + it = nxt + w.head = nil + release(w.promiseLock) + +proc nimCreatePromise(owner: pointer; blobSize: int): RawPromise {. + compilerProc.} = + result = cast[RawPromise](alloc0(RawPromiseObj.sizeof + blobSize)) + result.owner = cast[ptr Worker](owner) + +proc nimPromiseCreateCondVar(prom: RawPromise) {.compilerProc.} = + prom.cv = createCondVar() + prom.usesCondVar = true + +proc nimPromiseSignal(prom: RawPromise) {.compilerProc.} = + if prom.ai != nil: + acquire(prom.ai.cv.L) + prom.ai.idx = prom.idx + inc prom.ai.cv.counter + release(prom.ai.cv.L) + signal(prom.ai.cv.c) + if prom.usesCondVar: signal(prom.cv) + +proc await*[T](prom: Promise[T]) = + ## waits until the value for the promise arrives. + if prom.usesCondVar: await(prom.cv) + +proc awaitAndThen*[T](prom: Promise[T]; action: proc (x: T) {.closure.}) = + ## blocks until the value is available and then passes this value + ## to ``action``. Note that due to Nimrod's parameter passing semantics this + ## means that ``T`` doesn't need to be copied and so ``awaitAndThen`` can + ## sometimes be more efficient than ``^``. + if prom.usesCondVar: await(prom) + when T is string or T is seq: + action(cast[T](prom.data)) + elif T is ref: + {.error: "'awaitAndThen' not available for Promise[ref]".} + else: + action(prom.blob) + finished(prom) + +proc `^`*[T](prom: Promise[ref T]): foreign ptr T = + ## blocks until the value is available and then returns this value. Note + ## this reading is destructive for reasons of efficiency and convenience. + ## This calls ``finished(prom)``. + if prom.usesCondVar: await(prom) + result = cast[foreign ptr T](prom.data) + finished(prom) + +proc `^`*[T](prom: Promise[T]): T = + ## blocks until the value is available and then returns this value. Note + ## this reading is destructive for reasons of efficiency and convenience. + ## This calls ``finished(prom)``. + if prom.usesCondVar: await(prom) + when T is string or T is seq: + result = cast[T](prom.data) + else: + result = prom.blob + finished(prom) + +proc awaitAny*(promises: openArray[RawPromise]): int = + # awaits any of the given promises. Returns the index of one promise for which + ## a value arrived. A promise only supports one call to 'awaitAny' at the + ## same time. That means if you await([a,b]) and await([b,c]) the second + ## call will only await 'c'. If there is no promise left to be able to wait + ## on, -1 is returned. + ## **Note**: This results in non-deterministic behaviour and so should be + ## avoided. + var ai: AwaitInfo + ai.cv = createCondVar() + var conflicts = 0 + for i in 0 .. promises.high: + if cas(addr promises[i].ai, nil, addr ai): + promises[i].idx = i + else: + inc conflicts + if conflicts < promises.len: + await(ai.cv) + result = ai.idx + for i in 0 .. promises.high: + discard cas(addr promises[i].ai, addr ai, nil) + else: + result = -1 + destroyCondVar(ai.cv) + +proc nimArgsPassingDone(p: pointer) {.compilerProc.} = + let w = cast[ptr Worker](p) + signal(w.taskStarted) + +const + MaxThreadPoolSize* = 256 ## maximal size of the thread pool. 256 threads + ## should be good enough for anybody ;-) + +var + currentPoolSize: int + maxPoolSize = MaxThreadPoolSize + minPoolSize = 4 + gSomeReady = createCondVar() + readyWorker: ptr Worker + +proc slave(w: ptr Worker) {.thread.} = + while true: + w.ready = true + readyWorker = w + signal(gSomeReady) + await(w.taskArrived) + assert(not w.ready) + w.f(w, w.data) + if w.head != nil: w.cleanPromises + if w.shutdown: + w.shutdown = false + atomicDec currentPoolSize + +proc setMinPoolSize*(size: range[1..MaxThreadPoolSize]) = + ## sets the minimal thread pool size. The default value of this is 4. + minPoolSize = size + +proc setMaxPoolSize*(size: range[1..MaxThreadPoolSize]) = + ## sets the minimal thread pool size. The default value of this + ## is ``MaxThreadPoolSize``. + maxPoolSize = size + +var + workers: array[MaxThreadPoolSize, TThread[ptr Worker]] + workersData: array[MaxThreadPoolSize, Worker] + +proc activateThread(i: int) {.noinline.} = + workersData[i].taskArrived = createCondVar() + workersData[i].taskStarted = createCondVar() + initLock workersData[i].promiseLock + workersData[i].initialized = true + createThread(workers[i], slave, addr(workersData[i])) + +proc setup() = + currentPoolSize = min(countProcessors(), MaxThreadPoolSize) + readyWorker = addr(workersData[0]) + for i in 0.. <currentPoolSize: activateThread(i) + +proc preferSpawn*(): bool = + ## Use this proc to determine quickly if a 'spawn' or a direct call is + ## preferable. If it returns 'true' a 'spawn' may make sense. In general + ## it is not necessary to call this directly; use 'spawnX' instead. + result = gSomeReady.counter > 0 + +proc spawn*(call: expr): expr {.magic: "Spawn".} + ## always spawns a new task, so that the 'call' is never executed on + ## the calling thread. 'call' has to be proc call 'p(...)' where 'p' + ## is gcsafe and has 'void' as the return type. + +template spawnX*(call: expr): expr = + ## spawns a new task if a CPU core is ready, otherwise executes the + ## call in the calling thread. Usually it is advised to + ## use 'spawn' in order to not block the producer for an unknown + ## amount of time. 'call' has to be proc call 'p(...)' where 'p' + ## is gcsafe and has 'void' as the return type. + (if preferSpawn(): spawn call else: call) + +proc parallel*(body: stmt) {.magic: "Parallel".} + ## a parallel section can be used to execute a block in parallel. ``body`` + ## has to be in a DSL that is a particular subset of the language. Please + ## refer to the manual for further information. + +var + state: ThreadPoolState + stateLock: TLock + +initLock stateLock + +proc selectWorker(w: ptr Worker; fn: WorkerProc; data: pointer): bool = + if cas(addr w.ready, true, false): + w.data = data + w.f = fn + signal(w.taskArrived) + await(w.taskStarted) + result = true + +proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} = + # implementation of 'spawn' that is used by the code generator. + while true: + if selectWorker(readyWorker, fn, data): return + for i in 0.. <currentPoolSize: + if selectWorker(addr(workersData[i]), fn, data): return + # determine what to do, but keep in mind this is expensive too: + # state.calls < maxPoolSize: warmup phase + # (state.calls and 127) == 0: periodic check + if state.calls < maxPoolSize or (state.calls and 127) == 0: + # ensure the call to 'advice' is atomic: + if tryAcquire(stateLock): + case advice(state) + of doNothing: discard + of doCreateThread: + if currentPoolSize < maxPoolSize: + if not workersData[currentPoolSize].initialized: + activateThread(currentPoolSize) + let w = addr(workersData[currentPoolSize]) + atomicInc currentPoolSize + if selectWorker(w, fn, data): + release(stateLock) + return + # else we didn't succeed but some other thread, so do nothing. + of doShutdownThread: + if currentPoolSize > minPoolSize: + let w = addr(workersData[currentPoolSize-1]) + w.shutdown = true + # we don't free anything here. Too dangerous. + release(stateLock) + # else the acquire failed, but this means some + # other thread succeeded, so we don't need to do anything here. + await(gSomeReady) + +proc sync*() = + ## a simple barrier to wait for all spawn'ed tasks. If you need more elaborate + ## waiting, you have to use an explicit barrier. + while true: + var allReady = true + for i in 0 .. <currentPoolSize: + if not allReady: break + allReady = allReady and workersData[i].ready + if allReady: break + await(gSomeReady) + +setup() diff --git a/lib/pure/osproc.nim b/lib/pure/osproc.nim index cb9792f2b..04a0c2403 100644 --- a/lib/pure/osproc.nim +++ b/lib/pure/osproc.nim @@ -1,7 +1,7 @@ # # # Nimrod's Runtime Library -# (c) Copyright 2013 Andreas Rumpf +# (c) Copyright 2014 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -13,7 +13,7 @@ include "system/inclrtl" import - strutils, os, strtabs, streams + strutils, os, strtabs, streams, cpuinfo when defined(windows): import winlean @@ -225,42 +225,10 @@ proc errorHandle*(p: PProcess): TFileHandle {.rtl, extern: "nosp$1", ## it is closed when closing the PProcess ``p``. result = p.errHandle -when defined(macosx) or defined(bsd): - const - CTL_HW = 6 - HW_AVAILCPU = 25 - HW_NCPU = 3 - proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer, - a: var csize, b: pointer, c: int): cint {. - importc: "sysctl", header: "<sys/sysctl.h>".} - proc countProcessors*(): int {.rtl, extern: "nosp$1".} = ## returns the numer of the processors/cores the machine has. ## Returns 0 if it cannot be detected. - when defined(windows): - var x = getEnv("NUMBER_OF_PROCESSORS") - if x.len > 0: result = parseInt(x.string) - elif defined(macosx) or defined(bsd): - var - mib: array[0..3, cint] - numCPU: int - len: csize - mib[0] = CTL_HW - mib[1] = HW_AVAILCPU - len = sizeof(numCPU) - discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) - if numCPU < 1: - mib[1] = HW_NCPU - discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) - result = numCPU - elif defined(hpux): - result = mpctl(MPC_GETNUMSPUS, nil, nil) - elif defined(irix): - var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint - result = sysconf(SC_NPROC_ONLN) - else: - result = sysconf(SC_NPROCESSORS_ONLN) - if result <= 0: result = 1 + result = cpuinfo.countProcessors() proc execProcesses*(cmds: openArray[string], options = {poStdErrToStdOut, poParentStreams}, diff --git a/lib/system.nim b/lib/system.nim index c69a335e4..dcfe42f2c 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -42,7 +42,6 @@ type cstring* {.magic: Cstring.} ## built-in cstring (*compatible string*) type pointer* {.magic: Pointer.} ## built-in pointer type, use the ``addr`` ## operator to get a pointer to a variable - const on* = true ## alias for ``true`` off* = false ## alias for ``false`` @@ -51,6 +50,9 @@ const type Ordinal* {.magic: Ordinal.}[T] + `ptr`* {.magic: Pointer.}[T] ## built-in generic untraced pointer type + `ref`* {.magic: Pointer.}[T] ## built-in generic traced pointer type + `nil` {.magic: "Nil".} expr* {.magic: Expr.} ## meta type to denote an expression (for templates) stmt* {.magic: Stmt.} ## meta type to denote a statement (for templates) @@ -2948,6 +2950,3 @@ when not defined(booting): template isStatic*(x): expr = compiles(static(x)) # checks whether `x` is a value known at compile-time - -when hasThreadSupport: - when hostOS != "standalone": include "system/sysspawn" diff --git a/lib/system/assign.nim b/lib/system/assign.nim index 75c749633..2ae945fb1 100644 --- a/lib/system/assign.nim +++ b/lib/system/assign.nim @@ -179,7 +179,8 @@ when not defined(nimmixin): # internal proc used for destroying sequences and arrays for i in countup(0, r.len - 1): destroy(r[i]) else: - # XXX Why is this exported and no compilerproc? + # XXX Why is this exported and no compilerproc? -> compilerprocs cannot be + # generic for now proc nimDestroyRange*[T](r: T) = # internal proc used for destroying sequences and arrays mixin destroy diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim index b1a96b209..96246ba01 100644 --- a/lib/system/atomics.nim +++ b/lib/system/atomics.nim @@ -1,13 +1,14 @@ # # # Nimrod's Runtime Library -# (c) Copyright 2012 Andreas Rumpf +# (c) Copyright 2014 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## Atomic operations for Nimrod. +{.push stackTrace:off.} when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport: type @@ -203,3 +204,31 @@ proc atomicDec*(memLoc: var int, x: int = 1): int = else: dec(memLoc, x) result = memLoc + +when defined(windows) and not defined(gcc): + proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32 + {.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.} + + proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool = + interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0 + # XXX fix for 64 bit build +else: + # this is valid for GCC and Intel C++ + proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool + {.importc: "__sync_bool_compare_and_swap", nodecl.} + # XXX is this valid for 'int'? + + +when (defined(x86) or defined(amd64)) and defined(gcc): + proc cpuRelax {.inline.} = + {.emit: """asm volatile("pause" ::: "memory");""".} +elif (defined(x86) or defined(amd64)) and defined(vcc): + proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".} +elif defined(intelc): + proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".} +elif false: + from os import sleep + + proc cpuRelax {.inline.} = os.sleep(1) + +{.pop.} diff --git a/lib/system/sysspawn.nim b/lib/system/sysspawn.nim index dabf35a3e..95cdba65d 100644 --- a/lib/system/sysspawn.nim +++ b/lib/system/sysspawn.nim @@ -14,30 +14,6 @@ when not defined(NimString): {.push stackTrace:off.} -when (defined(x86) or defined(amd64)) and defined(gcc): - proc cpuRelax {.inline.} = - {.emit: """asm volatile("pause" ::: "memory");""".} -elif (defined(x86) or defined(amd64)) and defined(vcc): - proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".} -elif defined(intelc): - proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".} -elif false: - from os import sleep - - proc cpuRelax {.inline.} = os.sleep(1) - -when defined(windows) and not defined(gcc): - proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32 - {.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.} - - proc cas(p: ptr bool; oldValue, newValue: bool): bool = - interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0 - -else: - # this is valid for GCC and Intel C++ - proc cas(p: ptr bool; oldValue, newValue: bool): bool - {.importc: "__sync_bool_compare_and_swap", nodecl.} - # We declare our own condition variables here to get rid of the dummy lock # on Windows: @@ -54,6 +30,9 @@ proc createCondVar(): CondVar = initSysLock(result.stupidLock) #acquireSys(result.stupidLock) +proc destroyCondVar(c: var CondVar) {.inline.} = + deinitSysCond(c.c) + proc await(cv: var CondVar) = when defined(posix): acquireSys(cv.stupidLock) @@ -100,6 +79,26 @@ proc signal(cv: var FastCondVar) = #if cas(addr cv.slowPath, true, false): signal(cv.slow) +type + Barrier* {.compilerProc.} = object + counter: int + cv: CondVar + +proc barrierEnter*(b: ptr Barrier) {.compilerProc.} = + atomicInc b.counter + +proc barrierLeave*(b: ptr Barrier) {.compilerProc.} = + atomicDec b.counter + if b.counter <= 0: signal(b.cv) + +proc openBarrier*(b: ptr Barrier) {.compilerProc.} = + b.counter = 0 + b.cv = createCondVar() + +proc closeBarrier*(b: ptr Barrier) {.compilerProc.} = + await(b.cv) + destroyCondVar(b.cv) + {.pop.} # ---------------------------------------------------------------------------- diff --git a/tests/parallel/tdisjoint_slice1.nim b/tests/parallel/tdisjoint_slice1.nim new file mode 100644 index 000000000..c1d0e52f8 --- /dev/null +++ b/tests/parallel/tdisjoint_slice1.nim @@ -0,0 +1,21 @@ +discard """ + outputsub: "EVEN 28" +""" + +import threadpool + +proc odd(a: int) = echo "ODD ", a +proc even(a: int) = echo "EVEN ", a + +proc main() = + var a: array[0..30, int] + for i in low(a)..high(a): a[i] = i + parallel: + var i = 0 + while i <= 29: + spawn even(a[i]) + spawn odd(a[i+1]) + inc i, 2 + # is correct here + +main() diff --git a/tests/parallel/tdisjoint_slice2.nim b/tests/parallel/tdisjoint_slice2.nim new file mode 100644 index 000000000..1e86ea644 --- /dev/null +++ b/tests/parallel/tdisjoint_slice2.nim @@ -0,0 +1,33 @@ +discard """ + output: '''0 +1 +2 +3 +4 +5 +6 +7 +8''' + sortoutput: true +""" + +import threadpool + +proc f(a: openArray[int]) = + for x in a: echo x + +proc f(a: int) = echo a + +proc main() = + var a: array[0..9, int] = [0,1,2,3,4,5,6,7,8,9] + parallel: + spawn f(a[0..2]) + #spawn f(a[16..30]) + var i = 3 + while i <= 8: + spawn f(a[i]) + spawn f(a[i+1]) + inc i, 2 + # is correct here + +main() diff --git a/tests/parallel/tflowvar.nim b/tests/parallel/tflowvar.nim new file mode 100644 index 000000000..77fab14b5 --- /dev/null +++ b/tests/parallel/tflowvar.nim @@ -0,0 +1,17 @@ +discard """ + output: '''foobarfoobarbazbearbazbear''' + cmd: "nimrod $target --threads:on $options $file" +""" + +import threadpool + +proc computeSomething(a, b: string): string = a & b & a & b + +proc main = + let fvA = spawn computeSomething("foo", "bar") + let fvB = spawn computeSomething("baz", "bear") + + echo(^fvA, ^fvB) + +main() +sync() diff --git a/tests/parallel/tforstmt.nim b/tests/parallel/tforstmt.nim new file mode 100644 index 000000000..58de833f3 --- /dev/null +++ b/tests/parallel/tforstmt.nim @@ -0,0 +1,25 @@ +discard """ + output: '''3 +4 +5 +6 +7''' + sortoutput: true +""" + +import threadpool, os + +proc p(x: int) = + os.sleep(100 - x*10) + echo x + +proc testFor(a, b: int; foo: var openArray[int]) = + parallel: + for i in max(a, 0) .. min(b, foo.high): + spawn p(foo[i]) + +var arr = [0, 1, 2, 3, 4, 5, 6, 7] + +testFor(3, 10, arr) + + diff --git a/tests/parallel/tinvalid_array_bounds.nim b/tests/parallel/tinvalid_array_bounds.nim new file mode 100644 index 000000000..4c6065fd6 --- /dev/null +++ b/tests/parallel/tinvalid_array_bounds.nim @@ -0,0 +1,25 @@ +discard """ + errormsg: "can prove: i + 1 > 30" + line: 21 +""" + +import threadpool + +proc f(a: openArray[int]) = + for x in a: echo x + +proc f(a: int) = echo a + +proc main() = + var a: array[0..30, int] + parallel: + spawn f(a[0..15]) + spawn f(a[16..30]) + var i = 0 + while i <= 30: + spawn f(a[i]) + spawn f(a[i+1]) + inc i + #inc i # inc i, 2 would be correct here + +main() diff --git a/tests/parallel/tinvalid_counter_usage.nim b/tests/parallel/tinvalid_counter_usage.nim new file mode 100644 index 000000000..c6303c651 --- /dev/null +++ b/tests/parallel/tinvalid_counter_usage.nim @@ -0,0 +1,26 @@ +discard """ + errormsg: "invalid usage of counter after increment" + line: 21 +""" + +import threadpool + +proc f(a: openArray[int]) = + for x in a: echo x + +proc f(a: int) = echo a + +proc main() = + var a: array[0..30, int] + parallel: + spawn f(a[0..15]) + spawn f(a[16..30]) + var i = 0 + while i <= 30: + inc i + spawn f(a[i]) + inc i + #spawn f(a[i+1]) + #inc i # inc i, 2 would be correct here + +main() diff --git a/tests/parallel/tnon_disjoint_slice1.nim b/tests/parallel/tnon_disjoint_slice1.nim new file mode 100644 index 000000000..72d008bbd --- /dev/null +++ b/tests/parallel/tnon_disjoint_slice1.nim @@ -0,0 +1,25 @@ +discard """ + errormsg: "cannot prove (i)..(i) disjoint from (i + 1)..(i + 1)" + line: 20 +""" + +import threadpool + +proc f(a: openArray[int]) = + for x in a: echo x + +proc f(a: int) = echo a + +proc main() = + var a: array[0..30, int] + parallel: + #spawn f(a[0..15]) + #spawn f(a[16..30]) + var i = 0 + while i <= 29: + spawn f(a[i]) + spawn f(a[i+1]) + inc i + #inc i # inc i, 2 would be correct here + +main() diff --git a/tests/parallel/tpi.nim b/tests/parallel/tpi.nim new file mode 100644 index 000000000..1ef5c6aea --- /dev/null +++ b/tests/parallel/tpi.nim @@ -0,0 +1,26 @@ +discard """ + output: '''3.141792613595791 +3.141792613595791''' +""" + +import strutils, math, threadpool + +proc term(k: float): float = 4 * math.pow(-1, k) / (2*k + 1) + +proc piU(n: int): float = + var ch = newSeq[Promise[float]](n+1) + for k in 0..n: + ch[k] = spawn term(float(k)) + for k in 0..n: + result += ^ch[k] + +proc piS(n: int): float = + var ch = newSeq[float](n+1) + parallel: + for k in 0..ch.high: + ch[k] = spawn term(float(k)) + for k in 0..ch.high: + result += ch[k] + +echo formatFloat(piU(5000)) +echo formatFloat(piS(5000)) diff --git a/tests/system/tsysspawn.nim b/tests/parallel/tsysspawn.nim index 0388918aa..fc7921b0e 100644 --- a/tests/system/tsysspawn.nim +++ b/tests/parallel/tsysspawn.nim @@ -4,20 +4,22 @@ discard """ cmd: "nimrod $target --threads:on $options $file" """ +import threadpool + var x, y = 0 proc p1 = - for i in 0 .. 1_000_000: + for i in 0 .. 10_000: discard - inc x + atomicInc x proc p2 = - for i in 0 .. 1_000_000: + for i in 0 .. 10_000: discard - inc y, 2 + atomicInc y, 2 for i in 0.. 3: spawn(p1()) diff --git a/tests/parallel/tsysspawnbadarg.nim b/tests/parallel/tsysspawnbadarg.nim new file mode 100644 index 000000000..ad798a7d3 --- /dev/null +++ b/tests/parallel/tsysspawnbadarg.nim @@ -0,0 +1,9 @@ +discard """ + line: 9 + errormsg: "'spawn' takes a call expression" + cmd: "nimrod $target --threads:on $options $file" +""" + +import threadpool + +let foo = spawn(1) diff --git a/tests/system/tsysspawnbadarg.nim b/tests/system/tsysspawnbadarg.nim deleted file mode 100644 index ace074602..000000000 --- a/tests/system/tsysspawnbadarg.nim +++ /dev/null @@ -1,7 +0,0 @@ -discard """ - line: 7 - errormsg: "'spawn' takes a call expression of type void" - cmd: "nimrod $target --threads:on $options $file" -""" - -spawn(1) diff --git a/tests/testament/specs.nim b/tests/testament/specs.nim index 225ea1891..6e72f4b5e 100644 --- a/tests/testament/specs.nim +++ b/tests/testament/specs.nim @@ -46,7 +46,7 @@ type msg*: string ccodeCheck*: string err*: TResultEnum - substr*: bool + substr*, sortoutput*: bool targets*: set[TTarget] const @@ -113,6 +113,8 @@ proc parseSpec*(filename: string): TSpec = result.action = actionRun result.outp = e.value result.substr = true + of "sortoutput": + result.sortoutput = parseCfgBool(e.value) of "exitcode": discard parseInt(e.value, result.exitCode) of "msg": diff --git a/tests/testament/tester.nim b/tests/testament/tester.nim index 50d0e6eac..fc6b4ff95 100644 --- a/tests/testament/tester.nim +++ b/tests/testament/tester.nim @@ -11,7 +11,8 @@ import parseutils, strutils, pegs, os, osproc, streams, parsecfg, json, - marshal, backend, parseopt, specs, htmlgen, browsers, terminal + marshal, backend, parseopt, specs, htmlgen, browsers, terminal, + algorithm const resultsFile = "testresults.html" @@ -150,6 +151,11 @@ proc codegenCheck(test: TTest, check: string, given: var TSpec) = except EIO: given.err = reCodeNotFound +proc makeDeterministic(s: string): string = + var x = splitLines(s) + sort(x, system.cmp) + result = join(x, "\n") + proc testSpec(r: var TResults, test: TTest) = # major entry point for a single test let tname = test.name.addFileExt(".nim") @@ -191,8 +197,10 @@ proc testSpec(r: var TResults, test: TTest) = r.addResult(test, "exitcode: " & $expected.exitCode, "exitcode: " & $exitCode, reExitCodesDiffer) else: - if strip(buf.string) != strip(expected.outp): - if not (expected.substr and expected.outp in buf.string): + var bufB = strip(buf.string) + if expected.sortoutput: bufB = makeDeterministic(bufB) + if bufB != strip(expected.outp): + if not (expected.substr and expected.outp in bufB): given.err = reOutputsDiffer if given.err == reSuccess: codeGenCheck(test, expected.ccodeCheck, given) diff --git a/web/news.txt b/web/news.txt index 0bbae7b7b..b7403a3c7 100644 --- a/web/news.txt +++ b/web/news.txt @@ -2,6 +2,23 @@ News ==== +.. + 2014-06-29 Version 0.9.6 released + ================================= + + Changes affecting backwards compatibility + ----------------------------------------- + + - ``spawn`` now uses an elaborate self-adapting thread pool and as such + has been moved into its own module. So to use it, you now have to import + ``threadpool``. + + + Library Additions + ----------------- + + - Added module ``cpuinfo``. + - Added module ``threadpool``. 2014-04-21 Version 0.9.4 released |