From 03ffc344e129e38638a698fa0e83bb6c7b634913 Mon Sep 17 00:00:00 2001 From: Grzegorz Adam Hankiewicz Date: Sun, 20 Apr 2014 12:23:54 +0200 Subject: Version switch displays options used during `koch boot` --- lib/system/excpt.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/system') diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim index 612a9e729..e11a30e9b 100644 --- a/lib/system/excpt.nim +++ b/lib/system/excpt.nim @@ -71,7 +71,7 @@ proc popCurrentException {.compilerRtl, inl.} = # some platforms have native support for stack traces: const - nativeStackTraceSupported = (defined(macosx) or defined(linux)) and + nativeStackTraceSupported* = (defined(macosx) or defined(linux)) and not nimrodStackTrace hasSomeStackTrace = nimrodStackTrace or defined(nativeStackTrace) and nativeStackTraceSupported -- cgit 1.4.1-2-gfad0 From a146d6b2b9f4206ddb4acd00a0b5c3ed403fa136 Mon Sep 17 00:00:00 2001 From: Araq Date: Tue, 22 Apr 2014 21:40:12 +0200 Subject: building of nimrtl.dll should work again --- lib/system.nim | 9 +++++---- lib/system/inclrtl.nim | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'lib/system') diff --git a/lib/system.nim b/lib/system.nim index 4a5d46a7f..ecee7dad7 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -234,6 +234,11 @@ template `>` * (x, y: expr): expr {.immediate.} = ## "is greater" operator. This is the same as ``y < x``. y < x +const + appType* {.magic: "AppType"}: string = "" + ## a string that describes the application type. Possible values: + ## "console", "gui", "lib". + include "system/inclrtl" const NoFakeVars* = defined(NimrodVM) ## true if the backend doesn't support \ @@ -940,10 +945,6 @@ const ## a string that describes the host CPU. Possible values: ## "i386", "alpha", "powerpc", "sparc", "amd64", "mips", "arm". - appType* {.magic: "AppType"}: string = "" - ## a string that describes the application type. Possible values: - ## "console", "gui", "lib". - seqShallowFlag = low(int) proc compileOption*(option: string): bool {. diff --git a/lib/system/inclrtl.nim b/lib/system/inclrtl.nim index 12eb90162..5c82db4da 100644 --- a/lib/system/inclrtl.nim +++ b/lib/system/inclrtl.nim @@ -1,7 +1,7 @@ # # # Nimrod's Runtime Library -# (c) Copyright 2013 Andreas Rumpf +# (c) Copyright 2014 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. -- cgit 1.4.1-2-gfad0 From fab8cee13d305cc157a7332d00a49e1d48577949 Mon Sep 17 00:00:00 2001 From: Araq Date: Fri, 25 Apr 2014 19:54:42 +0200 Subject: minor tweaks; updated todo.txt --- compiler/semfold.nim | 1 + lib/pure/httpserver.nim | 2 +- lib/system/threads.nim | 2 +- todo.txt | 26 ++++++++++++-------------- tools/trimcc.nim | 2 +- 5 files changed, 16 insertions(+), 17 deletions(-) (limited to 'lib/system') diff --git a/compiler/semfold.nim b/compiler/semfold.nim index 79abfaf4d..c0c8a28c8 100644 --- a/compiler/semfold.nim +++ b/compiler/semfold.nim @@ -380,6 +380,7 @@ proc evalOp(m: TMagic, n, a, b, c: PNode): PNode = of mInSet: result = newIntNodeT(ord(inSet(a, b)), n) of mRepr: # BUGFIX: we cannot eval mRepr here for reasons that I forgot. + discard of mIntToStr, mInt64ToStr: result = newStrNodeT($(getOrdValue(a)), n) of mBoolToStr: if getOrdValue(a) == 0: result = newStrNodeT("false", n) diff --git a/lib/pure/httpserver.nim b/lib/pure/httpserver.nim index 8de708c5d..885742b64 100644 --- a/lib/pure/httpserver.nim +++ b/lib/pure/httpserver.nim @@ -192,7 +192,7 @@ when false: if path[path.len-1] == '/' or existsDir(path): path = path / "index.html" - if not ExistsFile(path): + if not existsFile(path): discardHeaders(client) notFound(client) else: diff --git a/lib/system/threads.nim b/lib/system/threads.nim index 0d52e4d09..d3b3aa457 100644 --- a/lib/system/threads.nim +++ b/lib/system/threads.nim @@ -243,7 +243,7 @@ when not defined(useNimRtl): # on UNIX, the GC uses ``SIGFREEZE`` to tell every thread to stop so that # the GC can examine the stacks? - proc stopTheWord() = nil + proc stopTheWord() = discard # We jump through some hops here to ensure that Nimrod thread procs can have # the Nimrod calling convention. This is needed because thread procs are diff --git a/todo.txt b/todo.txt index bc7c7979a..c2335dd32 100644 --- a/todo.txt +++ b/todo.txt @@ -1,6 +1,17 @@ -version 0.9.4 +version 0.9.6 ============= +- fix the bug that keeps 'defer' template from working +- make '--implicitStatic:on' the default +- fix the tuple unpacking in lambda bug +- make tuple unpacking work in a non-var/let context +- special rule for ``[]=`` +- ``=`` should be overloadable; requires specialization for ``=``; general + lift mechanism in the compiler is already implemented for 'fields' +- built-in 'getImpl' +- type API for macros; make 'spawn' a macro +- markAndSweepGC should expose an API for fibers + Bugs ==== @@ -16,7 +27,6 @@ Bugs version 0.9.x ============= -- fix the bug that keeps 'defer' template from working - pragmas need 'bindSym' support - pragmas need re-work: 'push' is dangerous, 'hasPragma' does not work reliably with user-defined pragmas @@ -28,18 +38,10 @@ version 0.9.x - ensure (ref T)(a, b) works as a type conversion and type constructor - optimize 'genericReset'; 'newException' leads to code bloat - stack-less GC -- make '--implicitStatic:on' the default - implicit deref for parameter matching -- special rule for ``[]=`` -- ``=`` should be overloadable; requires specialization for ``=``; general - lift mechanism in the compiler is already implemented for 'fields' -- built-in 'getImpl' - VM: optimize opcAsgnStr -- change comment handling in the AST; that's lots of work as c2nim and pas2nim - make use of the fact every node can have a comment! - version 0.9.X ============= @@ -111,10 +113,6 @@ GC - precise stack marking; embrace C++ code generation for that - marker procs for Boehm GC - hybrid GC -- GC: implement simple generational GC - * first version: mark black in write barrier - * second version: introduce fake roots instead of marking black - * third version: find some clever heuristic which is preferable - acyclic vs prunable; introduce GC hints - use big blocks in the allocator - object pooling support for *hard* realtime systems diff --git a/tools/trimcc.nim b/tools/trimcc.nim index fd15726a5..6271d2b9a 100644 --- a/tools/trimcc.nim +++ b/tools/trimcc.nim @@ -21,7 +21,7 @@ proc walker(dir: string) = moveFile(dest=path, sourc=newName(path)) of pcDir: walker(path) - else: nil + else: discard if paramCount() == 1: walker(paramStr(1)) -- cgit 1.4.1-2-gfad0 From ed1927e0013a72b1e33086b674d8546de4641a9b Mon Sep 17 00:00:00 2001 From: Araq Date: Wed, 30 Apr 2014 00:22:45 +0200 Subject: bugfix: MS GC acknowledges GC_ref/unref properly --- lib/system/gc_ms.nim | 51 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 6 deletions(-) (limited to 'lib/system') diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim index 3c99a57e1..db437b8d0 100644 --- a/lib/system/gc_ms.nim +++ b/lib/system/gc_ms.nim @@ -48,12 +48,15 @@ type # non-zero count table stackBottom: pointer cycleThreshold: int + when useCellIds: + idGenerator: int when withBitvectors: allocated, marked: TCellSet tempStack: TCellSeq # temporary stack for recursion elimination recGcLock: int # prevent recursion via finalizers; no thread lock region: TMemRegion # garbage collected region stat: TGcStat + additionalRoots: TCellSeq # dummy roots for GC_ref/unref var gch {.rtlThreadVar.}: TGcHeap @@ -131,13 +134,26 @@ proc prepareDealloc(cell: PCell) = (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell)) dec(gch.recGcLock) -proc nimGCref(p: pointer) {.compilerProc, inline.} = +proc nimGCref(p: pointer) {.compilerProc.} = # we keep it from being collected by pretending it's not even allocated: - when withBitvectors: excl(gch.allocated, usrToCell(p)) - else: usrToCell(p).refcount = rcBlack -proc nimGCunref(p: pointer) {.compilerProc, inline.} = - when withBitvectors: incl(gch.allocated, usrToCell(p)) - else: usrToCell(p).refcount = rcWhite + when false: + when withBitvectors: excl(gch.allocated, usrToCell(p)) + else: usrToCell(p).refcount = rcBlack + add(gch.additionalRoots, usrToCell(p)) + +proc nimGCunref(p: pointer) {.compilerProc.} = + let cell = usrToCell(p) + var L = gch.additionalRoots.len + var i = L + let d = gch.additionalRoots.d + while i >= 0: + if d[i] == cell: + d[i] = d[L] + dec gch.additionalRoots.len + break + when false: + when withBitvectors: incl(gch.allocated, usrToCell(p)) + else: usrToCell(p).refcount = rcWhite proc initGC() = when not defined(useNimRtl): @@ -146,6 +162,7 @@ proc initGC() = gch.stat.maxThreshold = 0 gch.stat.maxStackSize = 0 init(gch.tempStack) + init(gch.additionalRoots) when withBitvectors: Init(gch.allocated) init(gch.marked) @@ -212,8 +229,16 @@ proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer = res.refcount = 0 release(gch) when withBitvectors: incl(gch.allocated, res) + when useCellIds: + inc gch.idGenerator + res.id = gch.idGenerator result = cellToUsr(res) +when useCellIds: + proc getCellId*[T](x: ref T): int = + let p = usrToCell(cast[pointer](x)) + result = p.id + {.pop.} proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} = @@ -262,6 +287,9 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer = else: zeroMem(ol, sizeof(TCell)) when withBitvectors: incl(gch.allocated, res) + when useCellIds: + inc gch.idGenerator + res.id = gch.idGenerator release(gch) result = cellToUsr(res) when defined(memProfiler): nimProfile(newsize-oldsize) @@ -332,8 +360,19 @@ proc sweep(gch: var TGcHeap) = if c.refcount == rcBlack: c.refcount = rcWhite else: freeCyclicCell(gch, c) +when false: + proc newGcInvariant*() = + for x in allObjects(gch.region): + if isCell(x): + var c = cast[PCell](x) + if c.typ == nil: + writeStackTrace() + quit 1 + proc markGlobals(gch: var TGcHeap) = for i in 0 .. < globalMarkersLen: globalMarkers[i]() + let d = gch.additionalRoots.d + for i in 0 .. < gch.additionalRoots.len: mark(gch, d[i]) proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} = # the addresses are not as cells on the stack, so turn them to cells: -- cgit 1.4.1-2-gfad0 From 81d404979780386ecd1756a41dfc09497e194f26 Mon Sep 17 00:00:00 2001 From: Araq Date: Wed, 30 Apr 2014 01:05:52 +0200 Subject: bugfix: MS-GC GC_unref --- lib/system/gc_ms.nim | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/system') diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim index db437b8d0..410243528 100644 --- a/lib/system/gc_ms.nim +++ b/lib/system/gc_ms.nim @@ -143,7 +143,7 @@ proc nimGCref(p: pointer) {.compilerProc.} = proc nimGCunref(p: pointer) {.compilerProc.} = let cell = usrToCell(p) - var L = gch.additionalRoots.len + var L = gch.additionalRoots.len-1 var i = L let d = gch.additionalRoots.d while i >= 0: @@ -151,6 +151,7 @@ proc nimGCunref(p: pointer) {.compilerProc.} = d[i] = d[L] dec gch.additionalRoots.len break + dec(i) when false: when withBitvectors: incl(gch.allocated, usrToCell(p)) else: usrToCell(p).refcount = rcWhite -- cgit 1.4.1-2-gfad0 From d0438540d00c64fe4fa140a6b7c448a2ae4a7110 Mon Sep 17 00:00:00 2001 From: Araq Date: Fri, 2 May 2014 08:44:03 +0200 Subject: implemented builtin noncopying slice --- compiler/ast.nim | 2 +- compiler/ccgcalls.nim | 44 ++++++++++++++++++++++++++++++++------------ compiler/parampatterns.nim | 5 ++++- compiler/vmdef.nim | 2 +- compiler/vmgen.nim | 2 +- lib/system.nim | 4 ++-- lib/system/gc_ms.nim | 1 + 7 files changed, 42 insertions(+), 18 deletions(-) (limited to 'lib/system') diff --git a/compiler/ast.nim b/compiler/ast.nim index 97f48b253..172dd1fce 100644 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -597,7 +597,7 @@ const mIntToStr, mInt64ToStr, mFloatToStr, mCStrToStr, mStrToStr, mEnumToStr, mAnd, mOr, mEqStr, mLeStr, mLtStr, mEqSet, mLeSet, mLtSet, mMulSet, mPlusSet, mMinusSet, mSymDiffSet, mConStrStr, mConArrArr, mConArrT, - mConTArr, mConTT, mSlice, + mConTArr, mConTT, mAppendStrCh, mAppendStrStr, mAppendSeqElem, mInRange, mInSet, mRepr, mRand, diff --git a/compiler/ccgcalls.nim b/compiler/ccgcalls.nim index 84c5bf419..a7840305d 100644 --- a/compiler/ccgcalls.nim +++ b/compiler/ccgcalls.nim @@ -77,18 +77,38 @@ proc isInCurrentFrame(p: BProc, n: PNode): bool = proc openArrayLoc(p: BProc, n: PNode): PRope = var a: TLoc - initLocExpr(p, n, a) - case skipTypes(a.t, abstractVar).kind - of tyOpenArray, tyVarargs: - result = ropef("$1, $1Len0", [rdLoc(a)]) - of tyString, tySequence: - if skipTypes(n.typ, abstractInst).kind == tyVar: - result = ropef("(*$1)->data, (*$1)->$2", [a.rdLoc, lenField()]) - else: - result = ropef("$1->data, $1->$2", [a.rdLoc, lenField()]) - of tyArray, tyArrayConstr: - result = ropef("$1, $2", [rdLoc(a), toRope(lengthOrd(a.t))]) - else: internalError("openArrayLoc: " & typeToString(a.t)) + + let q = skipConv(n) + if getMagic(q) == mSlice: + # magic: pass slice to openArray: + var b, c: TLoc + initLocExpr(p, q[1], a) + initLocExpr(p, q[2], b) + initLocExpr(p, q[3], c) + let fmt = + case skipTypes(a.t, abstractVar).kind + of tyOpenArray, tyVarargs, tyArray, tyArrayConstr: + "($1)+($2), ($3)-($2)+1" + of tyString, tySequence: + if skipTypes(n.typ, abstractInst).kind == tyVar: + "(*$1)->data+($2), ($3)-($2)+1" + else: + "$1->data+($2), ($3)-($2)+1" + else: (internalError("openArrayLoc: " & typeToString(a.t)); "") + result = ropef(fmt, [rdLoc(a), rdLoc(b), rdLoc(c)]) + else: + initLocExpr(p, n, a) + case skipTypes(a.t, abstractVar).kind + of tyOpenArray, tyVarargs: + result = ropef("$1, $1Len0", [rdLoc(a)]) + of tyString, tySequence: + if skipTypes(n.typ, abstractInst).kind == tyVar: + result = ropef("(*$1)->data, (*$1)->$2", [a.rdLoc, lenField()]) + else: + result = ropef("$1->data, $1->$2", [a.rdLoc, lenField()]) + of tyArray, tyArrayConstr: + result = ropef("$1, $2", [rdLoc(a), toRope(lengthOrd(a.t))]) + else: internalError("openArrayLoc: " & typeToString(a.t)) proc genArgStringToCString(p: BProc, n: PNode): PRope {.inline.} = diff --git a/compiler/parampatterns.nim b/compiler/parampatterns.nim index e94068776..bbdba8c22 100644 --- a/compiler/parampatterns.nim +++ b/compiler/parampatterns.nim @@ -10,7 +10,7 @@ ## This module implements the pattern matching features for term rewriting ## macro support. -import strutils, ast, astalgo, types, msgs, idents, renderer, wordrecg +import strutils, ast, astalgo, types, msgs, idents, renderer, wordrecg, trees # we precompile the pattern here for efficiency into some internal # stack based VM :-) Why? Because it's fun; I did no benchmarks to see if that @@ -215,6 +215,9 @@ proc isAssignable*(owner: PSym, n: PNode): TAssignableResult = result = arLValue of nkObjUpConv, nkObjDownConv, nkCheckedFieldExpr: result = isAssignable(owner, n.sons[0]) + of nkCallKinds: + # builtin slice keeps lvalue-ness: + if getMagic(n) == mSlice: result = isAssignable(owner, n.sons[1]) else: discard diff --git a/compiler/vmdef.nim b/compiler/vmdef.nim index d0c38a2ad..c391d8415 100644 --- a/compiler/vmdef.nim +++ b/compiler/vmdef.nim @@ -207,7 +207,7 @@ const largeInstrs* = { # instructions which use 2 int32s instead of 1: opcSubStr, opcConv, opcCast, opcNewSeq, opcOf} slotSomeTemp* = slotTempUnknown - relativeJumps* = {opcTJmp, opcFJmp, opcJmp} + relativeJumps* = {opcTJmp, opcFJmp, opcJmp, opcJmpBack} template opcode*(x: TInstr): TOpcode {.immediate.} = TOpcode(x.uint32 and 0xff'u32) template regA*(x: TInstr): TRegister {.immediate.} = TRegister(x.uint32 shr 8'u32 and 0xff'u32) diff --git a/compiler/vmgen.nim b/compiler/vmgen.nim index 84577bb22..c5eb67025 100644 --- a/compiler/vmgen.nim +++ b/compiler/vmgen.nim @@ -1622,7 +1622,7 @@ proc genProc(c: PCtx; s: PSym): int = c.gABC(body, opcEof, eofInstr.regA) c.optimizeJumps(result) s.offset = c.prc.maxSlots - #if s.name.s == "addStuff": + #if s.name.s == "parse_until_symbol": # echo renderTree(body) # c.echoCode(result) c.prc = oldPrc diff --git a/lib/system.nim b/lib/system.nim index 6263f7b24..cfc8ceb6f 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -2620,7 +2620,7 @@ proc `[]=`*[Idx, T](a: var array[Idx, T], x: TSlice[int], b: openArray[T]) = if L == b.len: for i in 0 .. Date: Sat, 3 May 2014 13:25:41 -0500 Subject: Fixes #1168 --- lib/system/ansi_c.nim | 3 +++ lib/system/excpt.nim | 10 +++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'lib/system') diff --git a/lib/system/ansi_c.nim b/lib/system/ansi_c.nim index 2d33965e3..5111bc3cf 100644 --- a/lib/system/ansi_c.nim +++ b/lib/system/ansi_c.nim @@ -57,6 +57,7 @@ when not defined(SIGINT): SIGINT = cint(2) SIGSEGV = cint(11) SIGTERM = cint(15) + SIGPIPE = cint(13) else: {.error: "SIGABRT not ported to your platform".} else: @@ -66,6 +67,8 @@ when not defined(SIGINT): SIGABRT {.importc: "SIGABRT", nodecl.}: cint SIGFPE {.importc: "SIGFPE", nodecl.}: cint SIGILL {.importc: "SIGILL", nodecl.}: cint + when defined(macosx) or defined(linux): + var SIGPIPE {.importc: "SIGPIPE", nodecl.}: cint when defined(macosx): when NoFakeVars: diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim index 2dc134eaf..63a61183f 100644 --- a/lib/system/excpt.nim +++ b/lib/system/excpt.nim @@ -298,7 +298,13 @@ when not defined(noSignalHandler): elif s == SIGILL: action("SIGILL: Illegal operation.\n") elif s == SIGBUS: action("SIGBUS: Illegal storage access. (Attempt to read from nil?)\n") - else: action("unknown signal\n") + else: + block platformSpecificSignal: + when defined(SIGPIPE): + if s == SIGPIPE: + action("SIGPIPE: Pipe closed.\n") + break platformSpecificSignal + action("unknown signal\n") # print stack trace and quit when hasSomeStackTrace: @@ -323,6 +329,8 @@ when not defined(noSignalHandler): c_signal(SIGFPE, signalHandler) c_signal(SIGILL, signalHandler) c_signal(SIGBUS, signalHandler) + when defined(SIGPIPE): + c_signal(SIGPIPE, signalHandler) registerSignalHandler() # call it in initialization section -- cgit 1.4.1-2-gfad0 From 6195dbe491ccd864c5dcb59f87826291ac1f1ff4 Mon Sep 17 00:00:00 2001 From: Araq Date: Mon, 12 May 2014 11:12:37 +0200 Subject: initial non-compiling version of 'parallel' --- compiler/guards.nim | 191 +++++++++++++++-- compiler/lowerings.nim | 22 +- compiler/semparallel.nim | 414 ++++++++++++++++++++++++++++++++++++ compiler/sempass2.nim | 4 +- compiler/vm.nim | 5 +- config/nimrod.cfg | 1 + lib/pure/concurrency/cpuinfo.nim | 58 +++++ lib/pure/concurrency/cpuload.nim | 96 +++++++++ lib/pure/concurrency/threadpool.nim | 210 ++++++++++++++++++ lib/pure/osproc.nim | 38 +--- lib/system.nim | 3 - lib/system/atomics.nim | 31 ++- lib/system/sysspawn.nim | 47 ++-- tests/system/tsysspawn.nim | 10 +- tests/system/tsysspawnbadarg.nim | 2 + web/news.txt | 17 ++ 16 files changed, 1058 insertions(+), 91 deletions(-) create mode 100644 compiler/semparallel.nim create mode 100644 lib/pure/concurrency/cpuinfo.nim create mode 100644 lib/pure/concurrency/cpuload.nim create mode 100644 lib/pure/concurrency/threadpool.nim (limited to 'lib/system') diff --git a/compiler/guards.nim b/compiler/guards.nim index f475f5068..57cd73b11 100644 --- a/compiler/guards.nim +++ b/compiler/guards.nim @@ -9,7 +9,8 @@ ## This module implements the 'implies' relation for guards. -import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents +import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents, + saturate const someEq = {mEqI, mEqI64, mEqF64, mEqEnum, mEqCh, mEqB, mEqRef, mEqProc, @@ -25,6 +26,17 @@ const someIn = {mInRange, mInSet} + someHigh = {mHigh} + # we don't list unsigned here because wrap around semantics suck for + # proving anything: + someAdd = {mAddI, mAddI64, mAddF64, mSucc} + someSub = {mSubI, mSubI64, mSubF64, mPred} + someMul = {mMulI, mMulI64, mMulF64} + someDiv = {mDivI, mDivI64, mDivF64} + someMod = {mModI, mModI64} + someMax = {mMaxI, mMaxI64, mMaxF64} + someMin = {mMinI, mMinI64, mMinF64} + proc isValue(n: PNode): bool = n.kind in {nkCharLit..nkNilLit} proc isLocation(n: PNode): bool = not n.isValue @@ -69,19 +81,24 @@ proc isLetLocation(m: PNode, isApprox: bool): bool = proc interestingCaseExpr*(m: PNode): bool = isLetLocation(m, true) -proc getMagicOp(name: string, m: TMagic): PSym = +proc createMagic*(name: string, m: TMagic): PSym = result = newSym(skProc, getIdent(name), nil, unknownLineInfo()) result.magic = m let - opLe = getMagicOp("<=", mLeI) - opLt = getMagicOp("<", mLtI) - opAnd = getMagicOp("and", mAnd) - opOr = getMagicOp("or", mOr) - opNot = getMagicOp("not", mNot) - opIsNil = getMagicOp("isnil", mIsNil) - opContains = getMagicOp("contains", mInSet) - opEq = getMagicOp("==", mEqI) + opLe = createMagic("<=", mLeI) + opLt = createMagic("<", mLtI) + opAnd = createMagic("and", mAnd) + opOr = createMagic("or", mOr) + opNot = createMagic("not", mNot) + opIsNil = createMagic("isnil", mIsNil) + opContains = createMagic("contains", mInSet) + opEq = createMagic("==", mEqI) + opAdd = createMagic("+", mAddI) + opSub = createMagic("-", mSubI) + opMul = createMagic("*", mMulI) + opDiv = createMagic("div", mDivI) + opLen = createMagic("len", mLengthSeq) proc swapArgs(fact: PNode, newOp: PSym): PNode = result = newNodeI(nkCall, fact.info, 3) @@ -137,17 +154,118 @@ proc neg(n: PNode): PNode = result.sons[0] = newSymNode(opNot) result.sons[1] = n -proc buildIsNil(arg: PNode): PNode = - result = newNodeI(nkCall, arg.info, 2) - result.sons[0] = newSymNode(opIsNil) - result.sons[1] = arg +proc buildCall(op: PSym; a: PNode): PNode = + result = newNodeI(nkCall, a.info, 2) + result.sons[0] = newSymNode(op) + result.sons[1] = a + +proc buildCall(op: PSym; a, b: PNode): PNode = + result = newNodeI(nkCall, a.info, 3) + result.sons[0] = newSymNode(op) + result.sons[1] = a + result.sons[2] = b + +proc `+@`*(a: PNode; b: BiggestInt): PNode = + opAdd.buildCall(a, nkIntLit.newIntNode(b)) + +proc `|+|`(a, b: PNode): PNode = + result = copyNode(a) + if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |+| b.intVal + else: result.floatVal = a.floatVal + b.floatVal + +proc `|*|`(a, b: PNode): PNode = + result = copyNode(a) + if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |*| b.intVal + else: result.floatVal = a.floatVal * b.floatVal + +proc zero(): PNode = nkIntLit.newIntNode(0) +proc one(): PNode = nkIntLit.newIntNode(1) +proc minusOne(): PNode = nkIntLit.newIntNode(-1) + +proc lowBound*(x: PNode): PNode = nkIntLit.newIntNode(firstOrd(x.typ)) +proc highBound*(x: PNode): PNode = + if x.typ.skipTypes(abstractInst).kind == tyArray: + nkIntLit.newIntNode(lastOrd(x.typ)) + else: + opAdd.buildCall(opLen.buildCall(x), minusOne()) + +proc canon*(n: PNode): PNode = + # XXX for now only the new code in 'semparallel' uses this + if n.safeLen >= 1: + result = newNodeI(n.kind, n.info, n.len) + for i in 0 .. < n.safeLen: + result.sons[i] = canon(n.sons[i]) + else: + result = n + case result.getMagic + of someEq, someAdd, someMul, someMin, someMax: + # these are symmetric; put value as last: + if result.sons[1].isValue and not result.sons[2].isValue: + result = swapArgs(result, result.sons[0].sym) + # (4 + foo) + 2 --> (foo + 4) + 2 + of someHigh: + # high == len+(-1) + result = opAdd.buildCall(opLen.buildCall(result[1]), minusOne()) + of mUnaryMinusI, mUnaryMinusI64: + result = buildCall(opAdd, result[1], newIntNode(nkIntLit, -1)) + of someSub: + # x - 4 --> x + (-4) + var b = result[2] + if b.kind in {nkCharLit..nkUInt64Lit} and b.intVal != low(BiggestInt): + b = copyNode(b) + b.intVal = -b.intVal + result = buildCall(opAdd, result[1], b) + elif b.kind in {nkFloatLit..nkFloat64Lit}: + b = copyNode(b) + b.floatVal = -b.floatVal + result = buildCall(opAdd, result[1], b) + of someLen: + result.sons[0] = opLen.newSymNode + else: discard + + # re-association: + # (foo+5)+5 --> foo+10; same for '*' + case result.getMagic + of someAdd: + if result[2].isValue and + result[1].getMagic in someAdd and result[1][2].isValue: + result = opAdd.buildCall(result[1][1], result[1][2] |+| result[2]) + of someMul: + if result[2].isValue and + result[1].getMagic in someMul and result[1][2].isValue: + result = opAdd.buildCall(result[1][1], result[1][2] |*| result[2]) + else: discard + + # most important rule: (x-4) < a.len --> x < a.len+4 + case result.getMagic + of someLe, someLt: + let x = result[1] + let y = result[2] + if x.kind in nkCallKinds and x.len == 3 and x[2].isValue and + isLetLocation(x[1], true): + case x.getMagic + of someSub: + result = buildCall(result[0].sym, x[1], opAdd.buildCall(y, x[2])) + of someAdd: + result = buildCall(result[0].sym, x[1], opSub.buildCall(y, x[2])) + else: discard + elif y.kind in nkCallKinds and y.len == 3 and y[2].isValue and + isLetLocation(y[1], true): + # a.len < x-3 + case y.getMagic + of someSub: + result = buildCall(result[0].sym, y[1], opAdd.buildCall(x, y[2])) + of someAdd: + result = buildCall(result[0].sym, y[1], opSub.buildCall(x, y[2])) + else: discard + else: discard proc usefulFact(n: PNode): PNode = case n.getMagic of someEq: if skipConv(n.sons[2]).kind == nkNilLit and ( isLetLocation(n.sons[1], false) or isVar(n.sons[1])): - result = buildIsNil(n.sons[1]) + result = opIsNil.buildCall(n.sons[1]) else: if isLetLocation(n.sons[1], true) or isLetLocation(n.sons[2], true): # XXX algebraic simplifications! 'i-1 < a.len' --> 'i < a.len+1' @@ -217,7 +335,7 @@ proc addFactNeg*(m: var TModel, n: PNode) = let n = n.neg if n != nil: addFact(m, n) -proc sameTree(a, b: PNode): bool = +proc sameTree*(a, b: PNode): bool = result = false if a == b: result = true @@ -519,7 +637,46 @@ proc doesImply*(facts: TModel, prop: PNode): TImplication = if result != impUnknown: return proc impliesNotNil*(facts: TModel, arg: PNode): TImplication = - result = doesImply(facts, buildIsNil(arg).neg) + result = doesImply(facts, opIsNil.buildCall(arg).neg) + +proc proveLe*(m: TModel; a, b: PNode): TImplication = + let res = canon(opLe.buildCall(a, b)) + # we hardcode lots of axioms here: + let a = res[1] + let b = res[2] + # 0 <= 3 + if a.isValue and b.isValue: + return if leValue(a, b): impYes else: impNo + + # use type information too: x <= 4 iff high(x) <= 4 + if b.isValue and a.typ != nil and a.typ.isOrdinalType: + if lastOrd(a.typ) <= b.intVal: return impYes + # 3 <= x iff low(x) <= 3 + if a.isValue and b.typ != nil and b.typ.isOrdinalType: + if firstOrd(b.typ) <= a.intVal: return impYes + + # x <= x + if sameTree(a, b): return impYes + + # x <= x+c iff 0 <= c + if b.getMagic in someAdd and sameTree(a, b[1]): + return proveLe(m, zero(), b[2]) + + # x <= x*c if 1 <= c and 0 <= x: + if b.getMagic in someMul and sameTree(a, b[1]): + if proveLe(m, one(), b[2]) == impYes and proveLe(m, zero(), a) == impYes: + return impYes + + # x div c <= x if 1 <= c and 0 <= x: + if a.getMagic in someDiv and sameTree(a[1], b): + if proveLe(m, one(), a[2]) == impYes and proveLe(m, zero(), b) == impYes: + return impYes + + # use the knowledge base: + return doesImply(m, res) + +proc addFactLe*(m: var TModel; a, b: PNode) = + m.add canon(opLe.buildCall(a, b)) proc settype(n: PNode): PType = result = newType(tySet, n.typ.owner) diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index 1b9e5fe0f..93bfd8425 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -114,11 +114,15 @@ proc callCodegenProc*(name: string, arg1: PNode; if arg3 != nil: result.add arg3 proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; - varSection, call: PNode): PSym = + varSection, call, barrier: PNode): PSym = var body = newNodeI(nkStmtList, f.info) body.add varSection + if barrier != nil: + body.add callCodeGenProc("barrierEnter", barrier) body.add callCodeGenProc("nimArgsPassingDone", newSymNode(threadParam)) body.add call + if barrier != nil: + body.add callCodeGenProc("barrierLeave", barrier) var params = newNodeI(nkFormalParams, f.info) params.add emptyNode @@ -146,7 +150,7 @@ proc createCastExpr(argsParam: PSym; objType: PType): PNode = result.typ = newType(tyPtr, objType.owner) result.typ.rawAddSon(objType) -proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = +proc wrapProcForSpawn*(owner: PSym; n: PNode; barrier: PNode = nil): PNode = result = newNodeI(nkStmtList, n.info) if n.kind notin nkCallKinds or not n.typ.isEmptyType: localError(n.info, "'spawn' takes a call expression of type void") @@ -162,6 +166,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = threadParam.typ = ptrType argsParam.typ = ptrType argsParam.position = 1 + var objType = createObj(owner, n.info) incl(objType.flags, tfFinal) let castExpr = createCastExpr(argsParam, objType) @@ -223,6 +228,17 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = call.add(newSymNode(temp)) - let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call) + var barrierAsExpr: PNode = nil + if barrier != nil: + let typ = newType(tyPtr, owner) + typ.rawAddSon(magicsys.getCompilerProc("Barrier").typ) + var field = newSym(skField, getIdent"barrier", owner, n.info) + field.typ = typ + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), barrier) + barrierAsExpr = indirectAccess(castExpr, field, n.info) + + let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call, + barrierAsExpr) result.add callCodeGenProc("nimSpawn", wrapper.newSymNode, genAddrOf(scratchObj.newSymNode)) diff --git a/compiler/semparallel.nim b/compiler/semparallel.nim new file mode 100644 index 000000000..34a1f3af8 --- /dev/null +++ b/compiler/semparallel.nim @@ -0,0 +1,414 @@ +# +# +# The Nimrod Compiler +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Semantic checking for 'parallel'. + +# - slices should become "nocopy" to openArray (+) +# - need to perform bound checks (+) +# +# - parallel needs to insert a barrier (+) +# - passed arguments need to be ensured to be "const" +# - what about 'f(a)'? --> f shouldn't have side effects anyway +# - passed arrays need to be ensured not to alias +# - passed slices need to be ensured to be disjoint (+) +# - output slices need special logic + +import lowerings, guards, sempass2 + +discard """ + +one major problem: + spawn f(a[i]) + inc i + spawn f(a[i]) +is valid, but + spawn f(a[i]) + spawn f(a[i]) + inc i +is not! However, + spawn f(a[i]) + if guard: inc i + spawn f(a[i]) +is not valid either! --> We need a flow dependent analysis here. + +However: + while foo: + spawn f(a[i]) + inc i + spawn f(a[i]) + +Is not valid either! --> We should really restrict 'inc' to loop endings? + +The heuristic that we implement here (that has no false positives) is: Usage +of 'i' in a slice *after* we determined the stride is invalid! +""" + +type + TDirection = enum + ascending, descending + MonotonicVar = object + v: PSym + lower, upper, stride: PNode + dir: TDirection + blacklisted: bool # blacklisted variables that are not monotonic + AnalysisCtx = object + locals: seq[MonotonicVar] + slices: seq[tuple[x,a,b: PNode, spawnId: int, inLoop: bool]] + guards: TModel # nested guards + args: seq[PSym] # args must be deeply immutable + spawns: int # we can check that at last 1 spawn is used in + # the 'parallel' section + currentSpawnId: int + inLoop: int + +let opSlice = createMagic("slice", mSlice) + +proc initAnalysisCtx(): AnalysisCtx = + result.locals = @[] + result.slices = @[] + result.args = @[] + result.guards = @[] + +proc getSlot(c: var AnalysisCtx; s: PSym): ptr MonotonicVar = + var L = c.locals.len + for i in 0.. " & b.renderTree) + +proc checkBounds(c: AnalysisCtx; arr, idx: PNode) = + checkLe(c, arr.lowBound, idx) + checkLe(c, idx, arr.highBound) + +proc addLowerBoundAsFacts(c: var AnalysisCtx) = + for v in c.locals: + if not v.blacklisted: + c.guards.addFactLe(v.lower, newSymNode(v.v)) + +proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: int) = + checkLocal(c, n) + let le = n.sons[le] + let ri = n.sons[ri] + let x = n.sons[x] + # perform static bounds checking here; and not later! + let oldState = c.guards.len + addLowerBoundAsFacts(c) + c.checkBounds(x, le) + c.checkBounds(x, ri) + c.guards.setLen(oldState) + c.slices.add((x, le, ri, c.currentSpawnId, c.inLoop > 0)) + +template `?`(x): expr = x.renderTree + +proc overlap(m: TModel; x,y,c,d: PNode) = + # X..Y and C..D overlap iff (X <= D and Y >= C) + case proveLe(m, x, d) + of impUnkown: + localError(x.info, + "cannot prove: $# > $#; required for $#..$# disjoint from $#..$#" % + [?x, ?d, ?x, ?y, ?c, ?d]) + of impYes: + case proveLe(m, y, c) + of impUnknown: + localError(x.info, + "cannot prove: $# > $#; required for $#..$# disjoint from $#..$#" % + [?y, ?d, ?x, ?y, ?c, ?d]) + of impYes: + localError(x.info, "$#..$# not disjoint from $#..$#" % [?x, ?y, ?c, ?d]) + of impNo: discard + of impNo: discard + +proc stride(c: AnalysisCtx; n: PNode): BiggestInt = + # note: 0 if it cannot be determined is just right because then + # we analyse 'i..i' and 'i+0 .. i+0' and these are not disjoint! + if n.kind == nkSym and isLocal(n.sym): + let slot = c.getSlot(n[1].sym) + if slot.stride != nil: + result = slot.stride.intVal + else: + for i in 0 .. 0: + result = copyNode(n.kind, n.info, n.len) + for i in 0 .. < n.len: + result.sons[i] = transformSlices(n.sons[i]) + else: + result = n + +proc transformSpawn(owner: PSym; n, barrier: PNode): PNode = + if n.kind in nkCalls: + if n[0].kind == nkSym: + let op = n[0].sym + if op.magic == mSpawn: + result = transformSlices(n) + return wrapProcForSpawn(owner, result, barrier) + elif n.safeLen > 0: + result = copyNode(n.kind, n.info, n.len) + for i in 0 .. < n.len: + result.sons[i] = transformSpawn(owner, n.sons[i], barrier) + else: + result = n + +proc liftParallel*(owner: PSym; n: PNode): PNode = + # this needs to be called after the 'for' loop elimination + + # first pass: + # - detect monotonic local integer variables + # - detect used slices + # - detect used arguments + + var a = initAnalysisCtx() + let body = n.lastSon + analyse(a, body) + if a.spawns == 0: + localError(n.info, "'parallel' section without 'spawn'") + checkSlices(a) + checkArgs(a, body) + + var varSection = newNodeI(nkVarSection, n.info) + var temp = newSym(skTemp, "barrier", owner, n.info) + temp.typ = magicsys.getCompilerProc("Barrier").typ + incl(temp.flags, sfFromGeneric) + + var vpart = newNodeI(nkIdentDefs, n.info, 3) + vpart.sons[0] = newSymNode(temp) + vpart.sons[1] = ast.emptyNode + vpart.sons[2] = indirectAccess(castExpr, field, n.info) + varSection.add vpart + + barrier = genAddrOf(vpart[0]) + + result = newNodeI(nkStmtList, n.info) + generateAliasChecks(a, result) + result.add varSection + result.add callCodeGenProc("openBarrier", barrier) + result.add transformSpawn(owner, body, barrier) + result.add callCodeGenProc("closeBarrier", barrier) diff --git a/compiler/sempass2.nim b/compiler/sempass2.nim index 6afde5f05..c8ce5e787 100644 --- a/compiler/sempass2.nim +++ b/compiler/sempass2.nim @@ -89,7 +89,7 @@ proc initVarViaNew(a: PEffects, n: PNode) = if n.kind != nkSym: return let s = n.sym if {tfNeedsInit, tfNotNil} * s.typ.flags <= {tfNotNil}: - # 'x' is not nil, but that doesn't mean it's not nil children + # 'x' is not nil, but that doesn't mean its "not nil" children # are initialized: initVar(a, n) @@ -478,7 +478,7 @@ proc trackBlock(tracked: PEffects, n: PNode) = else: track(tracked, n) -proc isTrue(n: PNode): bool = +proc isTrue*(n: PNode): bool = n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or n.kind == nkIntLit and n.intVal != 0 diff --git a/compiler/vm.nim b/compiler/vm.nim index 218369fa1..0c2c23987 100644 --- a/compiler/vm.nim +++ b/compiler/vm.nim @@ -131,8 +131,9 @@ proc createStrKeepNode(x: var TFullReg) = nfAllConst in x.node.flags: # XXX this is hacky; tests/txmlgen triggers it: x.node = newNode(nkStrLit) - # debug x.node - #assert x.node.kind in {nkStrLit..nkTripleStrLit} + # It not only hackey, it is also wrong for tgentemplate. The primary + # cause of bugs like these is that the VM does not properly distinguish + # between variable defintions (var foo = e) and variable updates (foo = e). template createStr(x) = x.node = newNode(nkStrLit) diff --git a/config/nimrod.cfg b/config/nimrod.cfg index 2817eac55..df3835ace 100644 --- a/config/nimrod.cfg +++ b/config/nimrod.cfg @@ -16,6 +16,7 @@ arm.linux.gcc.linkerexe = "arm-linux-gcc" path="$lib/core" path="$lib/pure" path="$lib/pure/collections" +path="$lib/pure/concurrency" path="$lib/impure" path="$lib/wrappers" # path="$lib/wrappers/cairo" diff --git a/lib/pure/concurrency/cpuinfo.nim b/lib/pure/concurrency/cpuinfo.nim new file mode 100644 index 000000000..dfa819f64 --- /dev/null +++ b/lib/pure/concurrency/cpuinfo.nim @@ -0,0 +1,58 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements procs to determine the number of CPUs / cores. + +include "system/inclrtl" + +import strutils, os + +when not defined(windows): + import posix + +when defined(linux): + import linux + +when defined(macosx) or defined(bsd): + const + CTL_HW = 6 + HW_AVAILCPU = 25 + HW_NCPU = 3 + proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer, + a: var csize, b: pointer, c: int): cint {. + importc: "sysctl", header: "".} + +proc countProcessors*(): int {.rtl, extern: "ncpi$1".} = + ## returns the numer of the processors/cores the machine has. + ## Returns 0 if it cannot be detected. + when defined(windows): + var x = getEnv("NUMBER_OF_PROCESSORS") + if x.len > 0: result = parseInt(x.string) + elif defined(macosx) or defined(bsd): + var + mib: array[0..3, cint] + numCPU: int + len: csize + mib[0] = CTL_HW + mib[1] = HW_AVAILCPU + len = sizeof(numCPU) + discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) + if numCPU < 1: + mib[1] = HW_NCPU + discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) + result = numCPU + elif defined(hpux): + result = mpctl(MPC_GETNUMSPUS, nil, nil) + elif defined(irix): + var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "".}: cint + result = sysconf(SC_NPROC_ONLN) + else: + result = sysconf(SC_NPROCESSORS_ONLN) + if result <= 0: result = 1 + diff --git a/lib/pure/concurrency/cpuload.nim b/lib/pure/concurrency/cpuload.nim new file mode 100644 index 000000000..3cf6a7392 --- /dev/null +++ b/lib/pure/concurrency/cpuload.nim @@ -0,0 +1,96 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a helper for a thread pool to determine whether +## creating a thread is a good idea. + +when defined(windows): + import winlean, os, strutils, math + + proc `-`(a, b: TFILETIME): int64 = a.rdFileTime - b.rdFileTime +elif defined(linux): + from cpuinfo import countProcessors + +type + ThreadPoolAdvice* = enum + doNothing, + doCreateThread, # create additional thread for throughput + doShutdownThread # too many threads are busy, shutdown one + + ThreadPoolState* = object + when defined(windows): + prevSysKernel, prevSysUser, prevProcKernel, prevProcUser: TFILETIME + calls*: int + +proc advice*(s: var ThreadPoolState): ThreadPoolAdvice = + when defined(windows): + var + sysIdle, sysKernel, sysUser, + procCreation, procExit, procKernel, procUser: TFILETIME + if getSystemTimes(sysIdle, sysKernel, sysUser) == 0 or + getProcessTimes(THandle(-1), procCreation, procExit, + procKernel, procUser) == 0: + return doNothing + if s.calls > 0: + let + sysKernelDiff = sysKernel - s.prevSysKernel + sysUserDiff = sysUser - s.prevSysUser + + procKernelDiff = procKernel - s.prevProcKernel + procUserDiff = procUser - s.prevProcUser + + sysTotal = int(sysKernelDiff + sysUserDiff) + procTotal = int(procKernelDiff + procUserDiff) + # total CPU usage < 85% --> create a new worker thread. + # Measurements show that 100% and often even 90% is not reached even + # if all my cores are busy. + if sysTotal == 0 or procTotal / sysTotal < 0.85: + result = doCreateThread + s.prevSysKernel = sysKernel + s.prevSysUser = sysUser + s.prevProcKernel = procKernel + s.prevProcUser = procUser + elif defined(linux): + proc fscanf(c: TFile, frmt: cstring) {.varargs, importc, + header: "".} + + var f = open("/proc/loadavg") + var b: float + var busy, total: int + fscanf(f,"%lf %lf %lf %ld/%ld", + addr b, addr b, addr b, addr busy, addr total) + f.close() + let cpus = countProcessors() + if busy-1 < cpus: + result = doCreateThread + elif busy-1 >= cpus*2: + result = doShutdownThread + else: + result = doNothing + else: + # XXX implement this for other OSes + result = doNothing + inc s.calls + +when isMainModule: + proc busyLoop() = + while true: + discard random(80) + os.sleep(100) + + spawn busyLoop() + spawn busyLoop() + spawn busyLoop() + spawn busyLoop() + + var s: ThreadPoolState + + for i in 1 .. 70: + echo advice(s) + os.sleep(1000) diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim new file mode 100644 index 000000000..856820c6e --- /dev/null +++ b/lib/pure/concurrency/threadpool.nim @@ -0,0 +1,210 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Implements Nimrod's 'spawn'. + +import cpuinfo, cpuload, locks + +{.push stackTrace:off.} + +type + CondVar = object + c: TCond + L: TLock + counter: int + +proc createCondVar(): CondVar = + initCond(result.c) + initLock(result.L) + +proc destroyCondVar(cv: var CondVar) {.inline.} = + deinitCond(cv.c) + deinitLock(cv.L) + +proc await(cv: var CondVar) = + acquire(cv.L) + while cv.counter <= 0: + wait(cv.c, cv.L) + dec cv.counter + release(cv.L) + +proc signal(cv: var CondVar) = + acquire(cv.L) + inc cv.counter + release(cv.L) + signal(cv.c) + +type + Barrier* {.compilerProc.} = object + counter: int + cv: CondVar + +proc barrierEnter*(b: ptr Barrier) {.compilerProc.} = + atomicInc b.counter + +proc barrierLeave*(b: ptr Barrier) {.compilerProc.} = + atomicDec b.counter + if b.counter <= 0: signal(b.cv) + +proc openBarrier*(b: ptr Barrier) {.compilerProc.} = + b.counter = 0 + b.cv = createCondVar() + +proc closeBarrier*(b: ptr Barrier) {.compilerProc.} = + await(b.cv) + destroyCondVar(b.cv) + +{.pop.} + +# ---------------------------------------------------------------------------- + +type + WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.} + Worker = object + taskArrived: CondVar + taskStarted: CondVar #\ + # task data: + f: WorkerProc + data: pointer + ready: bool # put it here for correct alignment! + initialized: bool # whether it has even been initialized + +proc nimArgsPassingDone(p: pointer) {.compilerProc.} = + let w = cast[ptr Worker](p) + signal(w.taskStarted) + +var + gSomeReady = createCondVar() + readyWorker: ptr Worker + +proc slave(w: ptr Worker) {.thread.} = + while true: + w.ready = true + readyWorker = w + signal(gSomeReady) + await(w.taskArrived) + assert(not w.ready) + w.f(w, w.data) + +const + MaxThreadPoolSize* = 256 ## maximal size of the thread pool. 256 threads + ## should be good enough for anybody ;-) + +var + currentPoolSize: int + maxPoolSize = MaxThreadPoolSize + minPoolSize = 4 + +proc setMinPoolSize*(size: range[1..MaxThreadPoolSize]) = + ## sets the minimal thread pool size. The default value of this is 4. + minPoolSize = size + +proc setMaxPoolSize*(size: range[1..MaxThreadPoolSize]) = + ## sets the minimal thread pool size. The default value of this + ## is ``MaxThreadPoolSize``. + maxPoolSize = size + +var + workers: array[MaxThreadPoolSize, TThread[ptr Worker]] + workersData: array[MaxThreadPoolSize, Worker] + +proc activateThread(i: int) {.noinline.} = + workersData[i].taskArrived = createCondVar() + workersData[i].taskStarted = createCondVar() + workersData[i].initialized = true + createThread(workers[i], slave, addr(workersData[i])) + +proc setup() = + currentPoolSize = min(countProcessors(), MaxThreadPoolSize) + readyWorker = addr(workersData[0]) + for i in 0.. 0 + +proc spawn*(call: stmt) {.magic: "Spawn".} + ## always spawns a new task, so that the 'call' is never executed on + ## the calling thread. 'call' has to be proc call 'p(...)' where 'p' + ## is gcsafe and has 'void' as the return type. + +template spawnX*(call: stmt) = + ## spawns a new task if a CPU core is ready, otherwise executes the + ## call in the calling thread. Usually it is advised to + ## use 'spawn' in order to not block the producer for an unknown + ## amount of time. 'call' has to be proc call 'p(...)' where 'p' + ## is gcsafe and has 'void' as the return type. + if preferSpawn(): spawn call + else: call + +proc parallel*(body: stmt) {.magic: "Parallel".} + ## a parallel section can be used to execute a block in parallel. ``body`` + ## has to be in a DSL that is a particular subset of the language. Please + ## refer to the manual for further information. + +var + state: ThreadPoolState + stateLock: TLock + +initLock stateLock + +proc selectWorker(w: ptr Worker; fn: WorkerProc; data: pointer): bool = + if cas(addr w.ready, true, false): + w.data = data + w.f = fn + signal(w.taskArrived) + await(w.taskStarted) + result = true + +proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} = + # implementation of 'spawn' that is used by the code generator. + while true: + if selectWorker(readyWorker, fn, data): return + for i in 0.. minPoolSize: dec currentPoolSize + # we don't free anything here. Too dangerous. + release(stateLock) + # else the acquire failed, but this means some + # other thread succeeded, so we don't need to do anything here. + await(gSomeReady) + +proc sync*() = + ## a simple barrier to wait for all spawn'ed tasks. If you need more elaborate + ## waiting, you have to use an explicit barrier. + while true: + var allReady = true + for i in 0 .. ".} - proc countProcessors*(): int {.rtl, extern: "nosp$1".} = ## returns the numer of the processors/cores the machine has. ## Returns 0 if it cannot be detected. - when defined(windows): - var x = getEnv("NUMBER_OF_PROCESSORS") - if x.len > 0: result = parseInt(x.string) - elif defined(macosx) or defined(bsd): - var - mib: array[0..3, cint] - numCPU: int - len: csize - mib[0] = CTL_HW - mib[1] = HW_AVAILCPU - len = sizeof(numCPU) - discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) - if numCPU < 1: - mib[1] = HW_NCPU - discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) - result = numCPU - elif defined(hpux): - result = mpctl(MPC_GETNUMSPUS, nil, nil) - elif defined(irix): - var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "".}: cint - result = sysconf(SC_NPROC_ONLN) - else: - result = sysconf(SC_NPROCESSORS_ONLN) - if result <= 0: result = 1 + result = cpuinfo.countProcessors() proc execProcesses*(cmds: openArray[string], options = {poStdErrToStdOut, poParentStreams}, diff --git a/lib/system.nim b/lib/system.nim index ad98540a7..fbd905afa 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -2934,6 +2934,3 @@ when not defined(booting): template isStatic*(x): expr = compiles(static(x)) # checks whether `x` is a value known at compile-time - -when hasThreadSupport: - when hostOS != "standalone": include "system/sysspawn" diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim index b1a96b209..c6c603b19 100644 --- a/lib/system/atomics.nim +++ b/lib/system/atomics.nim @@ -1,13 +1,14 @@ # # # Nimrod's Runtime Library -# (c) Copyright 2012 Andreas Rumpf +# (c) Copyright 2014 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## Atomic operations for Nimrod. +{.push stackTrace:off.} when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport: type @@ -203,3 +204,31 @@ proc atomicDec*(memLoc: var int, x: int = 1): int = else: dec(memLoc, x) result = memLoc + +when defined(windows) and not defined(gcc): + proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32 + {.importc: "InterlockedCompareExchange", header: "", cdecl.} + + proc cas*[T: bool|int](p: ptr T; oldValue, newValue: T): bool = + interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0 + +else: + # this is valid for GCC and Intel C++ + proc cas*[T: bool|int](p: ptr T; oldValue, newValue: T): bool + {.importc: "__sync_bool_compare_and_swap", nodecl.} + # XXX is this valid for 'int'? + + +when (defined(x86) or defined(amd64)) and defined(gcc): + proc cpuRelax {.inline.} = + {.emit: """asm volatile("pause" ::: "memory");""".} +elif (defined(x86) or defined(amd64)) and defined(vcc): + proc cpuRelax {.importc: "YieldProcessor", header: "".} +elif defined(intelc): + proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".} +elif false: + from os import sleep + + proc cpuRelax {.inline.} = os.sleep(1) + +{.pop.} diff --git a/lib/system/sysspawn.nim b/lib/system/sysspawn.nim index dabf35a3e..95cdba65d 100644 --- a/lib/system/sysspawn.nim +++ b/lib/system/sysspawn.nim @@ -14,30 +14,6 @@ when not defined(NimString): {.push stackTrace:off.} -when (defined(x86) or defined(amd64)) and defined(gcc): - proc cpuRelax {.inline.} = - {.emit: """asm volatile("pause" ::: "memory");""".} -elif (defined(x86) or defined(amd64)) and defined(vcc): - proc cpuRelax {.importc: "YieldProcessor", header: "".} -elif defined(intelc): - proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".} -elif false: - from os import sleep - - proc cpuRelax {.inline.} = os.sleep(1) - -when defined(windows) and not defined(gcc): - proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32 - {.importc: "InterlockedCompareExchange", header: "", cdecl.} - - proc cas(p: ptr bool; oldValue, newValue: bool): bool = - interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0 - -else: - # this is valid for GCC and Intel C++ - proc cas(p: ptr bool; oldValue, newValue: bool): bool - {.importc: "__sync_bool_compare_and_swap", nodecl.} - # We declare our own condition variables here to get rid of the dummy lock # on Windows: @@ -54,6 +30,9 @@ proc createCondVar(): CondVar = initSysLock(result.stupidLock) #acquireSys(result.stupidLock) +proc destroyCondVar(c: var CondVar) {.inline.} = + deinitSysCond(c.c) + proc await(cv: var CondVar) = when defined(posix): acquireSys(cv.stupidLock) @@ -100,6 +79,26 @@ proc signal(cv: var FastCondVar) = #if cas(addr cv.slowPath, true, false): signal(cv.slow) +type + Barrier* {.compilerProc.} = object + counter: int + cv: CondVar + +proc barrierEnter*(b: ptr Barrier) {.compilerProc.} = + atomicInc b.counter + +proc barrierLeave*(b: ptr Barrier) {.compilerProc.} = + atomicDec b.counter + if b.counter <= 0: signal(b.cv) + +proc openBarrier*(b: ptr Barrier) {.compilerProc.} = + b.counter = 0 + b.cv = createCondVar() + +proc closeBarrier*(b: ptr Barrier) {.compilerProc.} = + await(b.cv) + destroyCondVar(b.cv) + {.pop.} # ---------------------------------------------------------------------------- diff --git a/tests/system/tsysspawn.nim b/tests/system/tsysspawn.nim index 0388918aa..fc7921b0e 100644 --- a/tests/system/tsysspawn.nim +++ b/tests/system/tsysspawn.nim @@ -4,20 +4,22 @@ discard """ cmd: "nimrod $target --threads:on $options $file" """ +import threadpool + var x, y = 0 proc p1 = - for i in 0 .. 1_000_000: + for i in 0 .. 10_000: discard - inc x + atomicInc x proc p2 = - for i in 0 .. 1_000_000: + for i in 0 .. 10_000: discard - inc y, 2 + atomicInc y, 2 for i in 0.. 3: spawn(p1()) diff --git a/tests/system/tsysspawnbadarg.nim b/tests/system/tsysspawnbadarg.nim index ace074602..ce3c5611b 100644 --- a/tests/system/tsysspawnbadarg.nim +++ b/tests/system/tsysspawnbadarg.nim @@ -4,4 +4,6 @@ discard """ cmd: "nimrod $target --threads:on $options $file" """ +import threadpool + spawn(1) diff --git a/web/news.txt b/web/news.txt index 0bbae7b7b..b7403a3c7 100644 --- a/web/news.txt +++ b/web/news.txt @@ -2,6 +2,23 @@ News ==== +.. + 2014-06-29 Version 0.9.6 released + ================================= + + Changes affecting backwards compatibility + ----------------------------------------- + + - ``spawn`` now uses an elaborate self-adapting thread pool and as such + has been moved into its own module. So to use it, you now have to import + ``threadpool``. + + + Library Additions + ----------------- + + - Added module ``cpuinfo``. + - Added module ``threadpool``. 2014-04-21 Version 0.9.4 released -- cgit 1.4.1-2-gfad0 From 502f7bffa35d25593ec822ddf8d8c7b7210b30dc Mon Sep 17 00:00:00 2001 From: EXetoC Date: Tue, 13 May 2014 15:03:58 +0200 Subject: Resolve type mismatches. --- compiler/ccgexprs.nim | 2 +- koch.nim | 4 ++-- lib/system/sets.nim | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'lib/system') diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim index 94a6f4781..39333a80d 100644 --- a/compiler/ccgexprs.nim +++ b/compiler/ccgexprs.nim @@ -484,7 +484,7 @@ proc unaryArithOverflow(p: BProc, e: PNode, d: var TLoc, m: TMagic) = opr: array[mUnaryMinusI..mAbsI64, string] = [ mUnaryMinusI: "((NI$2)-($1))", mUnaryMinusI64: "-($1)", - mAbsI: "(NI$2)abs($1)", + mAbsI: "($1 > 0? ($1) : -($1))", mAbsI64: "($1 > 0? ($1) : -($1))"] var a: TLoc diff --git a/koch.nim b/koch.nim index c203e0fd0..58c746ee8 100644 --- a/koch.nim +++ b/koch.nim @@ -152,7 +152,7 @@ proc boot(args: string) = copyExe(findStartNimrod(), 0.thVersion) for i in 0..2: echo "iteration: ", i+1 - exec i.thVersion & " cc $# $# compiler" / "nimrod.nim" % [bootOptions, args] + exec i.thVersion & " cpp $# $# compiler" / "nimrod.nim" % [bootOptions, args] if sameFileContent(output, i.thVersion): copyExe(output, finalDest) echo "executables are equal: SUCCESS!" @@ -282,7 +282,7 @@ proc tests(args: string) = proc temp(args: string) = var output = "compiler" / "nimrod".exe var finalDest = "bin" / "nimrod_temp".exe - exec("nimrod c compiler" / "nimrod") + exec("nimrod cpp compiler" / "nimrod") copyExe(output, finalDest) if args.len > 0: exec(finalDest & " " & args) diff --git a/lib/system/sets.nim b/lib/system/sets.nim index 043d37533..794c65cb8 100644 --- a/lib/system/sets.nim +++ b/lib/system/sets.nim @@ -10,7 +10,7 @@ # set handling type - TNimSet = array [0..4*2048-1, int8] + TNimSet = array [0..4*2048-1, uint8] proc countBits32(n: int32): int {.compilerproc.} = var v = n @@ -25,4 +25,4 @@ proc countBits64(n: int64): int {.compilerproc.} = proc cardSet(s: TNimSet, len: int): int {.compilerproc.} = result = 0 for i in countup(0, len-1): - inc(result, countBits32(int32(ze(s[i])))) + inc(result, countBits32(int32(s[i]))) -- cgit 1.4.1-2-gfad0 From f5ed8f3a1b1b23af2b06ba9d6fc4e4bc80e1f4cf Mon Sep 17 00:00:00 2001 From: Jostein Berre Eliassen Date: Tue, 20 May 2014 00:44:16 +0200 Subject: added getTotalSharedMem et al. --- lib/system.nim | 14 ++++++++++++++ lib/system/alloc.nim | 16 ++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'lib/system') diff --git a/lib/system.nim b/lib/system.nim index ad98540a7..c69a335e4 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -1434,6 +1434,20 @@ when not defined(nimrodVM) and hostOS != "standalone": proc getTotalMem*(): int {.rtl.} ## returns the number of bytes that are owned by the process. + when hasThreadSupport: + proc getOccupiedSharedMem*(): int {.rtl.} + ## returns the number of bytes that are owned by the process + ## on the shared heap and hold data. This is only available when + ## threads are enabled. + + proc getFreeSharedMem*(): int {.rtl.} + ## returns the number of bytes that are owned by the + ## process on the shared heap, but do not hold any meaningful data. + ## This is only available when threads are enabled. + + proc getTotalSharedMem*(): int {.rtl.} + ## returns the number of bytes on the shared heap that are owned by the + ## process. This is only available when threads are enabled. iterator countdown*[T](a, b: T, step = 1): T {.inline.} = ## Counts from ordinal value `a` down to `b` with the given diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim index eaef6cd95..602e5c7fa 100644 --- a/lib/system/alloc.nim +++ b/lib/system/alloc.nim @@ -835,4 +835,20 @@ template instantiateForRegion(allocator: expr) = else: result = realloc(p, newsize) + when hasThreadSupport: + + template sharedMemStatsShared(v: int) {.immediate.} = + acquireSys(heapLock) + result = v + releaseSys(heapLock) + + proc getFreeSharedMem(): int = + sharedMemStatsShared(sharedHeap.freeMem) + + proc getTotalSharedMem(): int = + sharedMemStatsShared(sharedHeap.currMem) + + proc getOccupiedSharedMem(): int = + sharedMemStatsShared(sharedHeap.currMem - sharedHeap.freeMem) + {.pop.} -- cgit 1.4.1-2-gfad0 From 417b9f5a1d13f26842b1337395a0f5b57827cc12 Mon Sep 17 00:00:00 2001 From: Araq Date: Thu, 22 May 2014 08:41:50 +0200 Subject: 'parallel' statement almost working --- compiler/ccgexprs.nim | 2 +- compiler/guards.nim | 58 ++++--- compiler/lowerings.nim | 286 ++++++++++++++++++++++++++----- compiler/semmagic.nim | 12 +- compiler/semparallel.nim | 89 ++++++---- doc/manual.txt | 2 +- lib/pure/concurrency/threadpool.nim | 112 ++++++++++++ lib/system/atomics.nim | 6 +- tests/parallel/tdisjoint_slice1.nim | 16 +- tests/parallel/tinvalid_array_bounds.nim | 2 +- 10 files changed, 470 insertions(+), 115 deletions(-) (limited to 'lib/system') diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim index 7fb6af896..34fdf5bf1 100644 --- a/compiler/ccgexprs.nim +++ b/compiler/ccgexprs.nim @@ -1636,7 +1636,7 @@ proc genMagicExpr(p: BProc, e: PNode, d: var TLoc, op: TMagic) = of mSlurp..mQuoteAst: localError(e.info, errXMustBeCompileTime, e.sons[0].sym.name.s) of mSpawn: - let n = lowerings.wrapProcForSpawn(p.module.module, e.sons[1]) + let n = lowerings.wrapProcForSpawn(p.module.module, e[1], e.typ, nil, nil) expr(p, n, d) of mParallel: let n = semparallel.liftParallel(p.module.module, e) diff --git a/compiler/guards.nim b/compiler/guards.nim index de0ce1dcc..3df3bd1a8 100644 --- a/compiler/guards.nim +++ b/compiler/guards.nim @@ -672,12 +672,8 @@ proc simpleSlice*(a, b: PNode): BiggestInt = else: result = -1 -proc proveLe*(m: TModel; a, b: PNode): TImplication = - let res = canon(opLe.buildCall(a, b)) - #echo renderTree(res) - # we hardcode lots of axioms here: - let a = res[1] - let b = res[2] +proc ple(m: TModel; a, b: PNode): TImplication = + template `<=?`(a,b): expr = ple(m,a,b) == impYes # 0 <= 3 if a.isValue and b.isValue: return if leValue(a, b): impYes else: impNo @@ -692,26 +688,46 @@ proc proveLe*(m: TModel; a, b: PNode): TImplication = # x <= x if sameTree(a, b): return impYes - # x <= x+c iff 0 <= c - if b.getMagic in someAdd and sameTree(a, b[1]): - return proveLe(m, zero(), b[2]) + # 0 <= x.len + if b.getMagic in someLen and a.isValue: + if a.intVal <= 0: return impYes + + # x <= y+c if 0 <= c and x <= y + if b.getMagic in someAdd and zero() <=? b[2] and a <=? b[1]: return impYes + + # x+c <= y if c <= 0 and x <= y + if a.getMagic in someAdd and a[2] <=? zero() and a[1] <=? b: return impYes - # x+c <= x iff c <= 0 - if a.getMagic in someAdd and sameTree(b, a[1]): - return proveLe(m, a[2], zero()) + # x <= y*c if 1 <= c and x <= y and 0 <= y + if b.getMagic in someMul: + if a <=? b[1] and one() <=? b[2] and zero() <=? b[1]: return impYes - # x <= x*c if 1 <= c and 0 <= x: - if b.getMagic in someMul and sameTree(a, b[1]): - if proveLe(m, one(), b[2]) == impYes and proveLe(m, zero(), a) == impYes: - return impYes + # x div c <= y if 1 <= c and 0 <= y and x <= y: + if a.getMagic in someDiv: + if one() <=? a[2] and zero() <=? b and a[1] <=? b: return impYes - # x div c <= x if 1 <= c and 0 <= x: - if a.getMagic in someDiv and sameTree(a[1], b): - if proveLe(m, one(), a[2]) == impYes and proveLe(m, zero(), b) == impYes: - return impYes + # slightly subtle: + # x <= max(y, z) iff x <= y or x <= z + # note that 'x <= max(x, z)' is a special case of the above rule + if b.getMagic in someMax: + if a <=? b[1] or a <=? b[2]: return impYes + + # min(x, y) <= z iff x <= z or y <= z + if a.getMagic in someMin: + if a[1] <=? b or a[2] <=? b: return impYes # use the knowledge base: - return doesImply(m, res) + return doesImply(m, opLe.buildCall(a, b)) + +proc proveLe*(m: TModel; a, b: PNode): TImplication = + #echo "ROOT ", renderTree(a), " <=? ", b.rendertree + let x = canon(opLe.buildCall(a, b)) + #echo renderTree(res) + result = ple(m, x[1], x[2]) + if result == impUnknown: + # try an alternative: a <= b iff not (b < a) iff not (b+1 <= a): + let y = canon(opLe.buildCall(opAdd.buildCall(b, one()), a)) + result = ~ple(m, y[1], y[2]) proc addFactLe*(m: var TModel; a, b: PNode) = m.add canon(opLe.buildCall(a, b)) diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index 704cfbcdd..2a1a8e577 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -13,6 +13,8 @@ const genPrefix* = ":tmp" # prefix for generated names import ast, astalgo, types, idents, magicsys, msgs, options +from guards import createMagic +from trees import getMagic proc newTupleAccess*(tup: PNode, i: int): PNode = result = newNodeIT(nkBracketExpr, tup.info, tup.typ.skipTypes( @@ -80,19 +82,23 @@ proc newDotExpr(obj, b: PSym): PNode = addSon(result, newSymNode(field)) result.typ = field.typ -proc indirectAccess*(a: PNode, b: PSym, info: TLineInfo): PNode = +proc indirectAccess*(a: PNode, b: string, info: TLineInfo): PNode = # returns a[].b as a node var deref = newNodeI(nkHiddenDeref, info) - deref.typ = a.typ.sons[0] + deref.typ = a.typ.skipTypes(abstractInst).sons[0] assert deref.typ.kind == tyObject - let field = getSymFromList(deref.typ.n, getIdent(b.name.s & $b.id)) - assert field != nil, b.name.s + let field = getSymFromList(deref.typ.n, getIdent(b)) + assert field != nil, b addSon(deref, a) result = newNodeI(nkDotExpr, info) addSon(result, deref) addSon(result, newSymNode(field)) result.typ = field.typ +proc indirectAccess*(a: PNode, b: PSym, info: TLineInfo): PNode = + # returns a[].b as a node + result = indirectAccess(a, b.name.s & $b.id, info) + proc indirectAccess*(a, b: PSym, info: TLineInfo): PNode = result = indirectAccess(newSymNode(a), b, info) @@ -102,6 +108,11 @@ proc genAddrOf*(n: PNode): PNode = result.typ = newType(tyPtr, n.typ.owner) result.typ.rawAddSon(n.typ) +proc genDeref*(n: PNode): PNode = + result = newNodeIT(nkHiddenDeref, n.info, + n.typ.skipTypes(abstractInst).sons[0]) + result.add n + proc callCodegenProc*(name: string, arg1: PNode; arg2, arg3: PNode = nil): PNode = result = newNodeI(nkCall, arg1.info) @@ -114,14 +125,83 @@ proc callCodegenProc*(name: string, arg1: PNode; if arg2 != nil: result.add arg2 if arg3 != nil: result.add arg3 +# we have 4 cases to consider: +# - a void proc --> nothing to do +# - a proc returning GC'ed memory --> requires a future +# - a proc returning non GC'ed memory --> pass as hidden 'var' parameter +# - not in a parallel environment --> requires a future for memory safety +type + TSpawnResult = enum + srVoid, srFuture, srByVar + TFutureKind = enum + futInvalid # invalid type T for 'Future[T]' + futGC # Future of a GC'ed type + futBlob # Future of a blob type + +proc spawnResult(t: PType; inParallel: bool): TSpawnResult = + if t.isEmptyType: srVoid + elif inParallel and not containsGarbageCollectedRef(t): srByVar + else: srFuture + +proc futureKind(t: PType): TFutureKind = + if t.skipTypes(abstractInst).kind in {tyRef, tyString, tySequence}: futGC + elif containsGarbageCollectedRef(t): futInvalid + else: futBlob + +discard """ +We generate roughly this: + +proc f_wrapper(args) = + var a = args.a # copy strings/seqs; thread transfer; not generated for + # the 'parallel' statement + var b = args.b + + args.fut = createFuture(thread, sizeof(T)) # optional + nimArgsPassingDone() # signal parent that the work is done + args.fut.blob = f(a, b, ...) + # - or - + f(a, b, ...) + +stmtList: + var scratchObj + scratchObj.a = a + scratchObj.b = b + + nimSpawn(f_wrapper, addr scratchObj) + scratchObj.fut # optional + +""" + +proc createNimCreateFutureCall(fut, threadParam: PNode): PNode = + let size = newNodeIT(nkCall, fut.info, getSysType(tyInt)) + size.add newSymNode(createMagic("sizeof", mSizeOf)) + assert fut.typ.kind == tyGenericInst + size.add newNodeIT(nkType, fut.info, fut.typ.sons[1]) + + let castExpr = newNodeIT(nkCast, fut.info, fut.typ) + castExpr.add emptyNode + castExpr.add callCodeGenProc("nimCreateFuture", threadParam, size) + result = newFastAsgnStmt(fut, castExpr) + proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; - varSection, call, barrier: PNode): PSym = + varSection, call, barrier, fut: PNode): PSym = var body = newNodeI(nkStmtList, f.info) body.add varSection if barrier != nil: body.add callCodeGenProc("barrierEnter", barrier) - body.add callCodeGenProc("nimArgsPassingDone", newSymNode(threadParam)) - body.add call + if fut != nil: + body.add createNimCreateFutureCall(fut, threadParam.newSymNode) + if barrier == nil: + body.add callCodeGenProc("nimFutureCreateCondVar", fut) + + body.add callCodeGenProc("nimArgsPassingDone", threadParam.newSymNode) + if fut != nil: + body.add newAsgnStmt(indirectAccess(fut, + if fut.typ.futureKind==futGC: "data" else: "blob", fut.info), call) + if barrier == nil: + body.add callCodeGenProc("nimFutureSignal", fut) + else: + body.add call if barrier != nil: body.add callCodeGenProc("barrierLeave", barrier) @@ -151,10 +231,148 @@ proc createCastExpr(argsParam: PSym; objType: PType): PNode = result.typ = newType(tyPtr, objType.owner) result.typ.rawAddSon(objType) -proc wrapProcForSpawn*(owner: PSym; n: PNode; barrier: PNode = nil): PNode = - result = newNodeI(nkStmtList, n.info) - if n.kind notin nkCallKinds or not n.typ.isEmptyType: - localError(n.info, "'spawn' takes a call expression of type void") +proc setupArgsForConcurrency(n: PNode; objType: PType; scratchObj: PSym, + castExpr, call, varSection, result: PNode) = + let formals = n[0].typ.n + let tmpName = getIdent(genPrefix) + for i in 1 .. 16) and + n.getRoot != nil: + # it is more efficient to pass a pointer instead: + let a = genAddrOf(n) + field.typ = a.typ + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a) + call.add(genDeref(indirectAccess(castExpr, field, n.info))) + else: + # boring case + field.typ = argType + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n) + call.add(indirectAccess(castExpr, field, n.info)) + +proc wrapProcForSpawn*(owner: PSym; n: PNode; retType: PType; + barrier, dest: PNode = nil): PNode = + # if 'barrier' != nil, then it is in a 'parallel' section and we + # generate quite different code + let spawnKind = spawnResult(retType, barrier!=nil) + case spawnKind + of srVoid: + internalAssert dest == nil + result = newNodeI(nkStmtList, n.info) + of srFuture: + internalAssert dest == nil + result = newNodeIT(nkStmtListExpr, n.info, retType) + of srByVar: + if dest == nil: localError(n.info, "'spawn' must not be discarded") + result = newNodeI(nkStmtList, n.info) + + if n.kind notin nkCallKinds: + localError(n.info, "'spawn' takes a call expression") return if optThreadAnalysis in gGlobalOptions: if {tfThread, tfNoSideEffect} * n[0].typ.flags == {}: @@ -180,7 +398,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode; barrier: PNode = nil): PNode = varSectionB.addVar(scratchObj.newSymNode) result.add varSectionB - var call = newNodeI(nkCall, n.info) + var call = newNodeIT(nkCall, n.info, n.typ) var fn = n.sons[0] # templates and macros are in fact valid here due to the nature of # the transformation: @@ -200,34 +418,10 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode; barrier: PNode = nil): PNode = call.add(fn) var varSection = newNodeI(nkVarSection, n.info) - let formals = n[0].typ.n - let tmpName = getIdent(genPrefix) - for i in 1 .. f shouldn't have side effects anyway # - passed arrays need to be ensured not to alias # - passed slices need to be ensured to be disjoint (+) -# - output slices need special logic +# - output slices need special logic (+) import ast, astalgo, idents, lowerings, magicsys, guards, sempass2, msgs, @@ -94,23 +94,6 @@ proc getSlot(c: var AnalysisCtx; v: PSym): ptr MonotonicVar = c.locals[L].v = v return addr(c.locals[L]) -proc getRoot(n: PNode): PSym = - ## ``getRoot`` takes a *path* ``n``. A path is an lvalue expression - ## like ``obj.x[i].y``. The *root* of a path is the symbol that can be - ## determined as the owner; ``obj`` in the example. - case n.kind - of nkSym: - if n.sym.kind in {skVar, skResult, skTemp, skLet, skForVar}: - result = n.sym - of nkDotExpr, nkBracketExpr, nkHiddenDeref, nkDerefExpr, - nkObjUpConv, nkObjDownConv, nkCheckedFieldExpr: - result = getRoot(n.sons[0]) - of nkHiddenStdConv, nkHiddenSubConv, nkConv: - result = getRoot(n.sons[1]) - of nkCallKinds: - if getMagic(n) == mSlice: result = getRoot(n.sons[1]) - else: discard - proc gatherArgs(c: var AnalysisCtx; n: PNode) = for i in 0.. = 0 and c.locals[s].stride != nil: @@ -193,6 +174,20 @@ proc stride(c: AnalysisCtx; n: PNode): BiggestInt = else: for i in 0 .. = 0 and c.locals[s].stride != nil: + result = n +@ c.locals[s].stride.intVal + else: + result = n + elif n.safeLen > 0: + result = shallowCopy(n) + for i in 0 .. 1: addFact(c.guards, branch.sons[0]) - #setLen(c.locals, oldState) for i in 0 .. 0: - result = shallowCopy(n) - for i in 0 .. < n.len: - result.sons[i] = transformSpawn(owner, n.sons[i], barrier) + result = transformSpawnSons(owner, n, barrier) else: result = n @@ -440,3 +452,4 @@ proc liftParallel*(owner: PSym; n: PNode): PNode = result.add callCodeGenProc("openBarrier", barrier) result.add transformSpawn(owner, body, barrier) result.add callCodeGenProc("closeBarrier", barrier) + diff --git a/doc/manual.txt b/doc/manual.txt index 39e2bad2a..b2e008969 100644 --- a/doc/manual.txt +++ b/doc/manual.txt @@ -2748,7 +2748,7 @@ The following builtin procs cannot be overloaded for reasons of implementation simplicity (they require specialized semantic checking):: defined, definedInScope, compiles, low, high, sizeOf, - is, of, echo, shallowCopy, getAst + is, of, echo, shallowCopy, getAst, spawn Thus they act more like keywords than like ordinary identifiers; unlike a keyword however, a redefinition may `shadow`:idx: the definition in diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim index 86819d25a..583c60c66 100644 --- a/lib/pure/concurrency/threadpool.nim +++ b/lib/pure/concurrency/threadpool.nim @@ -65,6 +65,30 @@ proc closeBarrier*(b: ptr Barrier) {.compilerProc.} = # ---------------------------------------------------------------------------- type + AwaitInfo = object + cv: CondVar + idx: int + + RawFuture* = ptr RawFutureObj ## untyped base class for 'Future[T]' + RawFutureObj {.inheritable.} = object # \ + # we allocate this with the thread local allocator; this + # is possible since we already need to do the GC_unref + # on the owning thread + ready, usesCondVar: bool + cv: CondVar #\ + # for 'awaitAny' support + ai: ptr AwaitInfo + idx: int + data: PObject # we incRef and unref it to keep it alive + owner: ptr Worker + next: RawFuture + align: float64 # a float for proper alignment + + Future* {.compilerProc.} [T] = ptr object of RawFutureObj + blob: T ## the underlying value, if available. Note that usually + ## you should not access this field directly! However it can + ## sometimes be more efficient than getting the value via ``^``. + WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.} Worker = object taskArrived: CondVar @@ -75,6 +99,92 @@ type ready: bool # put it here for correct alignment! initialized: bool # whether it has even been initialized shutdown: bool # the pool requests to shut down this worker thread + futureLock: TLock + head: RawFuture + +proc finished*(fut: RawFuture) = + ## This MUST be called for every created future to free its associated + ## resources. Note that the default reading operation ``^`` is destructive + ## and calls ``finished``. + doAssert fut.ai.isNil, "future is still attached to an 'awaitAny'" + assert fut.next == nil + let w = fut.owner + acquire(w.futureLock) + fut.next = w.head + w.head = fut + release(w.futureLock) + +proc cleanFutures(w: ptr Worker) = + var it = w.head + acquire(w.futureLock) + while it != nil: + let nxt = it.next + if it.usesCondVar: destroyCondVar(it.cv) + if it.data != nil: GC_unref(it.data) + dealloc(it) + it = nxt + w.head = nil + release(w.futureLock) + +proc nimCreateFuture(owner: pointer; blobSize: int): RawFuture {. + compilerProc.} = + result = cast[RawFuture](alloc0(RawFutureObj.sizeof + blobSize)) + result.owner = cast[ptr Worker](owner) + +proc nimFutureCreateCondVar(fut: RawFuture) {.compilerProc.} = + fut.cv = createCondVar() + fut.usesCondVar = true + +proc nimFutureSignal(fut: RawFuture) {.compilerProc.} = + assert fut.usesCondVar + signal(fut.cv) + +proc await*[T](fut: Future[T]) = + ## waits until the value for the future arrives. + if fut.usesCondVar: await(fut.cv) + +proc `^`*[T](fut: Future[T]): T = + ## blocks until the value is available and then returns this value. Note + ## this reading is destructive for reasons of efficiency and convenience. + ## This calls ``finished(fut)``. + await(fut) + when T is string or T is seq or T is ref: + result = cast[T](fut.data) + else: + result = fut.payload + finished(fut) + +proc notify*(fut: RawFuture) {.compilerproc.} = + if fut.ai != nil: + acquire(fut.ai.cv.L) + fut.ai.idx = fut.idx + inc fut.ai.cv.counter + release(fut.ai.cv.L) + signal(fut.ai.cv.c) + if fut.usesCondVar: signal(fut.cv) + +proc awaitAny*(futures: openArray[RawFuture]): int = + # awaits any of the given futures. Returns the index of one future for which + ## a value arrived. A future only supports one call to 'awaitAny' at the + ## same time. That means if you await([a,b]) and await([b,c]) the second + ## call will only await 'c'. If there is no future left to be able to wait + ## on, -1 is returned. + var ai: AwaitInfo + ai.cv = createCondVar() + var conflicts = 0 + for i in 0 .. futures.high: + if cas(addr futures[i].ai, nil, addr ai): + futures[i].idx = i + else: + inc conflicts + if conflicts < futures.len: + await(ai.cv) + result = ai.idx + for i in 0 .. futures.high: + discard cas(addr futures[i].ai, addr ai, nil) + else: + result = -1 + destroyCondVar(ai.cv) proc nimArgsPassingDone(p: pointer) {.compilerProc.} = let w = cast[ptr Worker](p) @@ -99,6 +209,7 @@ proc slave(w: ptr Worker) {.thread.} = await(w.taskArrived) assert(not w.ready) w.f(w, w.data) + if w.head != nil: w.cleanFutures if w.shutdown: w.shutdown = false atomicDec currentPoolSize @@ -119,6 +230,7 @@ var proc activateThread(i: int) {.noinline.} = workersData[i].taskArrived = createCondVar() workersData[i].taskStarted = createCondVar() + initLock workersData[i].futureLock workersData[i].initialized = true createThread(workers[i], slave, addr(workersData[i])) diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim index c6c603b19..96246ba01 100644 --- a/lib/system/atomics.nim +++ b/lib/system/atomics.nim @@ -209,12 +209,12 @@ when defined(windows) and not defined(gcc): proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32 {.importc: "InterlockedCompareExchange", header: "", cdecl.} - proc cas*[T: bool|int](p: ptr T; oldValue, newValue: T): bool = + proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool = interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0 - + # XXX fix for 64 bit build else: # this is valid for GCC and Intel C++ - proc cas*[T: bool|int](p: ptr T; oldValue, newValue: T): bool + proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool {.importc: "__sync_bool_compare_and_swap", nodecl.} # XXX is this valid for 'int'? diff --git a/tests/parallel/tdisjoint_slice1.nim b/tests/parallel/tdisjoint_slice1.nim index 2ca96d6ae..c1d0e52f8 100644 --- a/tests/parallel/tdisjoint_slice1.nim +++ b/tests/parallel/tdisjoint_slice1.nim @@ -1,20 +1,20 @@ +discard """ + outputsub: "EVEN 28" +""" import threadpool -proc f(a: openArray[int]) = - for x in a: echo x - -proc f(a: int) = echo a +proc odd(a: int) = echo "ODD ", a +proc even(a: int) = echo "EVEN ", a proc main() = var a: array[0..30, int] + for i in low(a)..high(a): a[i] = i parallel: - #spawn f(a[0..15]) - #spawn f(a[16..30]) var i = 0 while i <= 29: - spawn f(a[i]) - spawn f(a[i+1]) + spawn even(a[i]) + spawn odd(a[i+1]) inc i, 2 # is correct here diff --git a/tests/parallel/tinvalid_array_bounds.nim b/tests/parallel/tinvalid_array_bounds.nim index 337fae729..4c6065fd6 100644 --- a/tests/parallel/tinvalid_array_bounds.nim +++ b/tests/parallel/tinvalid_array_bounds.nim @@ -1,5 +1,5 @@ discard """ - errormsg: "cannot prove: i + 1 <= 30" + errormsg: "can prove: i + 1 > 30" line: 21 """ -- cgit 1.4.1-2-gfad0 From 030eac86c05427792d3c3c00b56fbe764d783a40 Mon Sep 17 00:00:00 2001 From: Araq Date: Sun, 25 May 2014 15:19:46 +0200 Subject: bugfix: regionized pointers in a generic context; renamed 'Future' to 'Promise' --- compiler/ast.nim | 2 + compiler/lowerings.nim | 88 +++++++++++------------ compiler/semexprs.nim | 8 +-- compiler/semtypes.nim | 8 +-- lib/pure/concurrency/threadpool.nim | 134 +++++++++++++++++++++--------------- lib/system.nim | 4 +- lib/system/assign.nim | 3 +- 7 files changed, 138 insertions(+), 109 deletions(-) (limited to 'lib/system') diff --git a/compiler/ast.nim b/compiler/ast.nim index c47407ee2..c3cb63df4 100644 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -885,6 +885,8 @@ const nkCallKinds* = {nkCall, nkInfix, nkPrefix, nkPostfix, nkCommand, nkCallStrLit, nkHiddenCallConv} + nkIdentKinds* = {nkIdent, nkSym, nkAccQuoted, nkOpenSymChoice, + nkClosedSymChoice} nkLiterals* = {nkCharLit..nkTripleStrLit} nkLambdaKinds* = {nkLambda, nkDo} diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index 047bdf832..13d4bf60e 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -134,26 +134,26 @@ proc callCodegenProc*(name: string, arg1: PNode; # we have 4 cases to consider: # - a void proc --> nothing to do -# - a proc returning GC'ed memory --> requires a future +# - a proc returning GC'ed memory --> requires a promise # - a proc returning non GC'ed memory --> pass as hidden 'var' parameter -# - not in a parallel environment --> requires a future for memory safety +# - not in a parallel environment --> requires a promise for memory safety type TSpawnResult = enum - srVoid, srFuture, srByVar - TFutureKind = enum - futInvalid # invalid type T for 'Future[T]' - futGC # Future of a GC'ed type - futBlob # Future of a blob type + srVoid, srPromise, srByVar + TPromiseKind = enum + promInvalid # invalid type T for 'Promise[T]' + promGC # Promise of a GC'ed type + promBlob # Promise of a blob type proc spawnResult(t: PType; inParallel: bool): TSpawnResult = if t.isEmptyType: srVoid elif inParallel and not containsGarbageCollectedRef(t): srByVar - else: srFuture + else: srPromise -proc futureKind(t: PType): TFutureKind = - if t.skipTypes(abstractInst).kind in {tyRef, tyString, tySequence}: futGC - elif containsGarbageCollectedRef(t): futInvalid - else: futBlob +proc promiseKind(t: PType): TPromiseKind = + if t.skipTypes(abstractInst).kind in {tyRef, tyString, tySequence}: promGC + elif containsGarbageCollectedRef(t): promInvalid + else: promBlob discard """ We generate roughly this: @@ -164,12 +164,12 @@ proc f_wrapper(args) = # the 'parallel' statement var b = args.b - args.fut = nimCreateFuture(thread, sizeof(T)) # optional - nimFutureCreateCondVar(args.fut) # optional + args.prom = nimCreatePromise(thread, sizeof(T)) # optional + nimPromiseCreateCondVar(args.prom) # optional nimArgsPassingDone() # signal parent that the work is done # - args.fut.blob = f(a, b, ...) - nimFutureSignal(args.fut) + args.prom.blob = f(a, b, ...) + nimPromiseSignal(args.prom) # - or - f(a, b, ...) @@ -181,42 +181,42 @@ stmtList: scratchObj.b = b nimSpawn(f_wrapper, addr scratchObj) - scratchObj.fut # optional + scratchObj.prom # optional """ -proc createNimCreateFutureCall(fut, threadParam: PNode): PNode = - let size = newNodeIT(nkCall, fut.info, getSysType(tyInt)) +proc createNimCreatePromiseCall(prom, threadParam: PNode): PNode = + let size = newNodeIT(nkCall, prom.info, getSysType(tyInt)) size.add newSymNode(createMagic("sizeof", mSizeOf)) - assert fut.typ.kind == tyGenericInst - size.add newNodeIT(nkType, fut.info, fut.typ.sons[1]) + assert prom.typ.kind == tyGenericInst + size.add newNodeIT(nkType, prom.info, prom.typ.sons[1]) - let castExpr = newNodeIT(nkCast, fut.info, fut.typ) + let castExpr = newNodeIT(nkCast, prom.info, prom.typ) castExpr.add emptyNode - castExpr.add callCodeGenProc("nimCreateFuture", threadParam, size) - result = newFastAsgnStmt(fut, castExpr) + castExpr.add callCodeGenProc("nimCreatePromise", threadParam, size) + result = newFastAsgnStmt(prom, castExpr) proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; - varSection, call, barrier, fut: PNode): PSym = + varSection, call, barrier, prom: PNode): PSym = var body = newNodeI(nkStmtList, f.info) body.add varSection if barrier != nil: body.add callCodeGenProc("barrierEnter", barrier) - if fut != nil: - body.add createNimCreateFutureCall(fut, threadParam.newSymNode) + if prom != nil: + body.add createNimCreatePromiseCall(prom, threadParam.newSymNode) if barrier == nil: - body.add callCodeGenProc("nimFutureCreateCondVar", fut) + body.add callCodeGenProc("nimPromiseCreateCondVar", prom) body.add callCodeGenProc("nimArgsPassingDone", threadParam.newSymNode) - if fut != nil: - let fk = fut.typ.sons[1].futureKind - if fk == futInvalid: - localError(f.info, "cannot create a future of type: " & - typeToString(fut.typ.sons[1])) - body.add newAsgnStmt(indirectAccess(fut, - if fk == futGC: "data" else: "blob", fut.info), call) + if prom != nil: + let fk = prom.typ.sons[1].promiseKind + if fk == promInvalid: + localError(f.info, "cannot create a promise of type: " & + typeToString(prom.typ.sons[1])) + body.add newAsgnStmt(indirectAccess(prom, + if fk == promGC: "data" else: "blob", prom.info), call) if barrier == nil: - body.add callCodeGenProc("nimFutureSignal", fut) + body.add callCodeGenProc("nimPromiseSignal", prom) else: body.add call if barrier != nil: @@ -381,7 +381,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode; retType: PType; of srVoid: internalAssert dest == nil result = newNodeI(nkStmtList, n.info) - of srFuture: + of srPromise: internalAssert dest == nil result = newNodeIT(nkStmtListExpr, n.info, retType) of srByVar: @@ -450,17 +450,17 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode; retType: PType; result.add newFastAsgnStmt(newDotExpr(scratchObj, field), barrier) barrierAsExpr = indirectAccess(castExpr, field, n.info) - var futField, futAsExpr: PNode = nil - if spawnKind == srFuture: - var field = newSym(skField, getIdent"fut", owner, n.info) + var promField, promAsExpr: PNode = nil + if spawnKind == srPromise: + var field = newSym(skField, getIdent"prom", owner, n.info) field.typ = retType objType.addField(field) - futField = newDotExpr(scratchObj, field) - futAsExpr = indirectAccess(castExpr, field, n.info) + promField = newDotExpr(scratchObj, field) + promAsExpr = indirectAccess(castExpr, field, n.info) let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call, - barrierAsExpr, futAsExpr) + barrierAsExpr, promAsExpr) result.add callCodeGenProc("nimSpawn", wrapper.newSymNode, genAddrOf(scratchObj.newSymNode)) - if spawnKind == srFuture: result.add futField + if spawnKind == srPromise: result.add promField diff --git a/compiler/semexprs.nim b/compiler/semexprs.nim index 4e3d2f3ce..8f4cce547 100644 --- a/compiler/semexprs.nim +++ b/compiler/semexprs.nim @@ -1579,9 +1579,9 @@ proc semShallowCopy(c: PContext, n: PNode, flags: TExprFlags): PNode = else: result = semDirectOp(c, n, flags) -proc createFuture(c: PContext; t: PType; info: TLineInfo): PType = +proc createPromise(c: PContext; t: PType; info: TLineInfo): PType = result = newType(tyGenericInvokation, c.module) - addSonSkipIntLit(result, magicsys.getCompilerProc("Future").typ) + addSonSkipIntLit(result, magicsys.getCompilerProc("Promise").typ) addSonSkipIntLit(result, t) result = instGenericContainer(c, info, result, allowMetaTypes = false) @@ -1619,9 +1619,9 @@ proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode = of mSpawn: result = setMs(n, s) result.sons[1] = semExpr(c, n.sons[1]) - # later passes may transform the type 'Future[T]' back into 'T' + # later passes may transform the type 'Promise[T]' back into 'T' if not result[1].typ.isEmptyType: - result.typ = createFuture(c, result[1].typ, n.info) + result.typ = createPromise(c, result[1].typ, n.info) else: result = semDirectOp(c, n, flags) proc semWhen(c: PContext, n: PNode, semCheck = true): PNode = diff --git a/compiler/semtypes.nim b/compiler/semtypes.nim index 8fcb6ea99..bb81cbe74 100644 --- a/compiler/semtypes.nim +++ b/compiler/semtypes.nim @@ -1084,8 +1084,10 @@ proc semTypeNode(c: PContext, n: PNode, prev: PType): PType = of nkCallKinds: if isRange(n): result = semRangeAux(c, n, prev) - elif n[0].kind == nkIdent: - let op = n.sons[0].ident + elif n[0].kind notin nkIdentKinds: + result = semTypeExpr(c, n) + else: + let op = considerAcc(n.sons[0]) if op.id in {ord(wAnd), ord(wOr)} or op.s == "|": checkSonsLen(n, 3) var @@ -1120,8 +1122,6 @@ proc semTypeNode(c: PContext, n: PNode, prev: PType): PType = result = semAnyRef(c, n, tyRef, prev) else: result = semTypeExpr(c, n) - else: - result = semTypeExpr(c, n) of nkWhenStmt: var whenResult = semWhen(c, n, false) if whenResult.kind == nkStmtList: whenResult.kind = nkStmtListType diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim index 41c1adca0..24cb9ccdd 100644 --- a/lib/pure/concurrency/threadpool.nim +++ b/lib/pure/concurrency/threadpool.nim @@ -65,12 +65,14 @@ proc closeBarrier*(b: ptr Barrier) {.compilerProc.} = # ---------------------------------------------------------------------------- type + foreign* = object ## a region that indicates the pointer comes from a + ## foreign thread heap. AwaitInfo = object cv: CondVar idx: int - RawFuture* = ptr RawFutureObj ## untyped base class for 'Future[T]' - RawFutureObj {.inheritable.} = object # \ + RawPromise* = ptr RawPromiseObj ## untyped base class for 'Promise[T]' + RawPromiseObj {.inheritable.} = object # \ # we allocate this with the thread local allocator; this # is possible since we already need to do the GC_unref # on the owning thread @@ -81,10 +83,10 @@ type idx: int data: PObject # we incRef and unref it to keep it alive owner: ptr Worker - next: RawFuture + next: RawPromise align: float64 # a float for proper alignment - Future* {.compilerProc.} [T] = ptr object of RawFutureObj + Promise* {.compilerProc.} [T] = ptr object of RawPromiseObj blob: T ## the underlying value, if available. Note that usually ## you should not access this field directly! However it can ## sometimes be more efficient than getting the value via ``^``. @@ -99,24 +101,24 @@ type ready: bool # put it here for correct alignment! initialized: bool # whether it has even been initialized shutdown: bool # the pool requests to shut down this worker thread - futureLock: TLock - head: RawFuture + promiseLock: TLock + head: RawPromise -proc finished*(fut: RawFuture) = - ## This MUST be called for every created future to free its associated +proc finished*(prom: RawPromise) = + ## This MUST be called for every created promise to free its associated ## resources. Note that the default reading operation ``^`` is destructive ## and calls ``finished``. - doAssert fut.ai.isNil, "future is still attached to an 'awaitAny'" - assert fut.next == nil - let w = fut.owner - acquire(w.futureLock) - fut.next = w.head - w.head = fut - release(w.futureLock) - -proc cleanFutures(w: ptr Worker) = + doAssert prom.ai.isNil, "promise is still attached to an 'awaitAny'" + assert prom.next == nil + let w = prom.owner + acquire(w.promiseLock) + prom.next = w.head + w.head = prom + release(w.promiseLock) + +proc cleanPromises(w: ptr Worker) = var it = w.head - acquire(w.futureLock) + acquire(w.promiseLock) while it != nil: let nxt = it.next if it.usesCondVar: destroyCondVar(it.cv) @@ -124,62 +126,84 @@ proc cleanFutures(w: ptr Worker) = dealloc(it) it = nxt w.head = nil - release(w.futureLock) + release(w.promiseLock) -proc nimCreateFuture(owner: pointer; blobSize: int): RawFuture {. +proc nimCreatePromise(owner: pointer; blobSize: int): RawPromise {. compilerProc.} = - result = cast[RawFuture](alloc0(RawFutureObj.sizeof + blobSize)) + result = cast[RawPromise](alloc0(RawPromiseObj.sizeof + blobSize)) result.owner = cast[ptr Worker](owner) -proc nimFutureCreateCondVar(fut: RawFuture) {.compilerProc.} = - fut.cv = createCondVar() - fut.usesCondVar = true - -proc nimFutureSignal(fut: RawFuture) {.compilerProc.} = - if fut.ai != nil: - acquire(fut.ai.cv.L) - fut.ai.idx = fut.idx - inc fut.ai.cv.counter - release(fut.ai.cv.L) - signal(fut.ai.cv.c) - if fut.usesCondVar: signal(fut.cv) +proc nimPromiseCreateCondVar(prom: RawPromise) {.compilerProc.} = + prom.cv = createCondVar() + prom.usesCondVar = true + +proc nimPromiseSignal(prom: RawPromise) {.compilerProc.} = + if prom.ai != nil: + acquire(prom.ai.cv.L) + prom.ai.idx = prom.idx + inc prom.ai.cv.counter + release(prom.ai.cv.L) + signal(prom.ai.cv.c) + if prom.usesCondVar: signal(prom.cv) + +proc await*[T](prom: Promise[T]) = + ## waits until the value for the promise arrives. + if prom.usesCondVar: await(prom.cv) + +proc awaitAndThen*[T](prom: Promise[T]; action: proc (x: T) {.closure.}) = + ## blocks until the value is available and then passes this value + ## to ``action``. Note that due to Nimrod's parameter passing semantics this + ## means that ``T`` doesn't need to be copied and so ``awaitAndThen`` can + ## sometimes be more efficient than ``^``. + if prom.usesCondVar: await(prom) + when T is string or T is seq: + action(cast[T](prom.data)) + elif T is ref: + {.error: "'awaitAndThen' not available for Promise[ref]".} + else: + action(prom.blob) + finished(prom) -proc await*[T](fut: Future[T]) = - ## waits until the value for the future arrives. - if fut.usesCondVar: await(fut.cv) +proc `^`*[T](prom: Promise[ref T]): foreign ptr T = + ## blocks until the value is available and then returns this value. Note + ## this reading is destructive for reasons of efficiency and convenience. + ## This calls ``finished(prom)``. + if prom.usesCondVar: await(prom) + result = cast[foreign ptr T](prom.data) + finished(prom) -proc `^`*[T](fut: Future[T]): T = +proc `^`*[T](prom: Promise[T]): T = ## blocks until the value is available and then returns this value. Note ## this reading is destructive for reasons of efficiency and convenience. - ## This calls ``finished(fut)``. - if fut.usesCondVar: await(fut) - when T is string or T is seq or T is ref: - result = cast[T](fut.data) + ## This calls ``finished(prom)``. + if prom.usesCondVar: await(prom) + when T is string or T is seq: + result = cast[T](prom.data) else: - result = fut.blob - finished(fut) + result = prom.blob + finished(prom) -proc awaitAny*(futures: openArray[RawFuture]): int = - # awaits any of the given futures. Returns the index of one future for which - ## a value arrived. A future only supports one call to 'awaitAny' at the +proc awaitAny*(promises: openArray[RawPromise]): int = + # awaits any of the given promises. Returns the index of one promise for which + ## a value arrived. A promise only supports one call to 'awaitAny' at the ## same time. That means if you await([a,b]) and await([b,c]) the second - ## call will only await 'c'. If there is no future left to be able to wait + ## call will only await 'c'. If there is no promise left to be able to wait ## on, -1 is returned. ## **Note**: This results in non-deterministic behaviour and so should be ## avoided. var ai: AwaitInfo ai.cv = createCondVar() var conflicts = 0 - for i in 0 .. futures.high: - if cas(addr futures[i].ai, nil, addr ai): - futures[i].idx = i + for i in 0 .. promises.high: + if cas(addr promises[i].ai, nil, addr ai): + promises[i].idx = i else: inc conflicts - if conflicts < futures.len: + if conflicts < promises.len: await(ai.cv) result = ai.idx - for i in 0 .. futures.high: - discard cas(addr futures[i].ai, addr ai, nil) + for i in 0 .. promises.high: + discard cas(addr promises[i].ai, addr ai, nil) else: result = -1 destroyCondVar(ai.cv) @@ -207,7 +231,7 @@ proc slave(w: ptr Worker) {.thread.} = await(w.taskArrived) assert(not w.ready) w.f(w, w.data) - if w.head != nil: w.cleanFutures + if w.head != nil: w.cleanPromises if w.shutdown: w.shutdown = false atomicDec currentPoolSize @@ -228,7 +252,7 @@ var proc activateThread(i: int) {.noinline.} = workersData[i].taskArrived = createCondVar() workersData[i].taskStarted = createCondVar() - initLock workersData[i].futureLock + initLock workersData[i].promiseLock workersData[i].initialized = true createThread(workers[i], slave, addr(workersData[i])) diff --git a/lib/system.nim b/lib/system.nim index fbd905afa..fc6f617a5 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -42,7 +42,6 @@ type cstring* {.magic: Cstring.} ## built-in cstring (*compatible string*) type pointer* {.magic: Pointer.} ## built-in pointer type, use the ``addr`` ## operator to get a pointer to a variable - const on* = true ## alias for ``true`` off* = false ## alias for ``false`` @@ -51,6 +50,9 @@ const type Ordinal* {.magic: Ordinal.}[T] + `ptr`* {.magic: Pointer.}[T] ## built-in generic untraced pointer type + `ref`* {.magic: Pointer.}[T] ## built-in generic traced pointer type + `nil` {.magic: "Nil".} expr* {.magic: Expr.} ## meta type to denote an expression (for templates) stmt* {.magic: Stmt.} ## meta type to denote a statement (for templates) diff --git a/lib/system/assign.nim b/lib/system/assign.nim index 75c749633..2ae945fb1 100644 --- a/lib/system/assign.nim +++ b/lib/system/assign.nim @@ -179,7 +179,8 @@ when not defined(nimmixin): # internal proc used for destroying sequences and arrays for i in countup(0, r.len - 1): destroy(r[i]) else: - # XXX Why is this exported and no compilerproc? + # XXX Why is this exported and no compilerproc? -> compilerprocs cannot be + # generic for now proc nimDestroyRange*[T](r: T) = # internal proc used for destroying sequences and arrays mixin destroy -- cgit 1.4.1-2-gfad0 From ce773b70a7725cecec14ed4dcbdeeac4c7c0db1c Mon Sep 17 00:00:00 2001 From: Reimer Behrends Date: Sat, 24 May 2014 07:04:34 +0200 Subject: Fixed readAllBuffer() to avoid adding garbage bytes at end. The function readAllBuffer() always returned a string that was a multiple of the BufSize in length, regardless of how many bytes were actually read, padding the result with garbage bytes on the last chunk. This fix properly trims the last chunk to its actual size. --- lib/system/sysio.nim | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'lib/system') diff --git a/lib/system/sysio.nim b/lib/system/sysio.nim index 02c17b92b..32d4c3e91 100644 --- a/lib/system/sysio.nim +++ b/lib/system/sysio.nim @@ -115,10 +115,14 @@ proc readAllBuffer(file: TFile): string = # bytes we need to read before the buffer is empty. result = "" var buffer = newString(BufSize) - var bytesRead = BufSize - while bytesRead == BufSize: - bytesRead = readBuffer(file, addr(buffer[0]), BufSize) - result.add(buffer) + while true: + var bytesRead = readBuffer(file, addr(buffer[0]), BufSize) + if bytesRead == BufSize: + result.add(buffer) + else: + buffer.setLen(bytesRead) + result.add(buffer) + break proc rawFileSize(file: TFile): int = # this does not raise an error opposed to `getFileSize` -- cgit 1.4.1-2-gfad0 From 2de99653d002b919c88322219bff6f33653081c5 Mon Sep 17 00:00:00 2001 From: Araq Date: Thu, 5 Jun 2014 08:46:29 +0200 Subject: Promises are now refs --- compiler/ccgexprs.nim | 2 +- compiler/lowerings.nim | 45 ++++++------- compiler/pragmas.nim | 11 ++-- compiler/semexprs.nim | 11 ++++ compiler/semparallel.nim | 6 +- lib/pure/concurrency/threadpool.nim | 124 +++++++++++++++++++----------------- lib/system/atomics.nim | 8 +++ todo.txt | 14 ++++ 8 files changed, 133 insertions(+), 88 deletions(-) (limited to 'lib/system') diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim index 34fdf5bf1..c0442711e 100644 --- a/compiler/ccgexprs.nim +++ b/compiler/ccgexprs.nim @@ -1636,7 +1636,7 @@ proc genMagicExpr(p: BProc, e: PNode, d: var TLoc, op: TMagic) = of mSlurp..mQuoteAst: localError(e.info, errXMustBeCompileTime, e.sons[0].sym.name.s) of mSpawn: - let n = lowerings.wrapProcForSpawn(p.module.module, e[1], e.typ, nil, nil) + let n = lowerings.wrapProcForSpawn(p.module.module, e, e.typ, nil, nil) expr(p, n, d) of mParallel: let n = semparallel.liftParallel(p.module.module, e) diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index af4daf785..327a18df5 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -86,7 +86,7 @@ proc indirectAccess*(a: PNode, b: string, info: TLineInfo): PNode = # returns a[].b as a node var deref = newNodeI(nkHiddenDeref, info) deref.typ = a.typ.skipTypes(abstractInst).sons[0] - var t = deref.typ + var t = deref.typ.skipTypes(abstractInst) var field: PSym while true: assert t.kind == tyObject @@ -94,6 +94,7 @@ proc indirectAccess*(a: PNode, b: string, info: TLineInfo): PNode = if field != nil: break t = t.sons[0] if t == nil: break + t = t.skipTypes(abstractInst) assert field != nil, b addSon(deref, a) result = newNodeI(nkDotExpr, info) @@ -132,6 +133,11 @@ proc callCodegenProc*(name: string, arg1: PNode; if arg3 != nil: result.add arg3 result.typ = sym.typ.sons[0] +proc callProc(a: PNode): PNode = + result = newNodeI(nkCall, a.info) + result.add a + result.typ = a.typ.sons[0] + # we have 4 cases to consider: # - a void proc --> nothing to do # - a proc returning GC'ed memory --> requires a promise @@ -169,14 +175,14 @@ proc addLocalVar(varSection: PNode; owner: PSym; typ: PType; v: PNode): PSym = discard """ We generate roughly this: -proc f_wrapper(args) = +proc f_wrapper(thread, args) = barrierEnter(args.barrier) # for parallel statement var a = args.a # thread transfer; deepCopy or shallowCopy or no copy # depending on whether we're in a 'parallel' statement var b = args.b + var prom = args.prom - args.prom = nimCreatePromise(thread, sizeof(T)) # optional - nimPromiseCreateCondVar(args.prom) # optional + prom.owner = thread # optional nimArgsPassingDone() # signal parent that the work is done # args.prom.blob = f(a, b, ...) @@ -196,17 +202,6 @@ stmtList: """ -proc createNimCreatePromiseCall(prom, threadParam: PNode): PNode = - let size = newNodeIT(nkCall, prom.info, getSysType(tyInt)) - size.add newSymNode(createMagic("sizeof", mSizeOf)) - assert prom.typ.kind == tyGenericInst - size.add newNodeIT(nkType, prom.info, prom.typ.sons[1]) - - let castExpr = newNodeIT(nkCast, prom.info, prom.typ) - castExpr.add emptyNode - castExpr.add callCodeGenProc("nimCreatePromise", threadParam, size) - result = castExpr - proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; varSection, call, barrier, prom: PNode; spawnKind: TSpawnResult): PSym = @@ -223,14 +218,14 @@ proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; threadLocalProm = addLocalVar(varSection, argsParam.owner, prom.typ, prom) elif prom != nil: internalAssert prom.typ.kind == tyGenericInst - threadLocalProm = addLocalVar(varSection, argsParam.owner, prom.typ, - createNimCreatePromiseCall(prom, threadParam.newSymNode)) + threadLocalProm = addLocalVar(varSection, argsParam.owner, prom.typ, prom) body.add varSection if prom != nil and spawnKind != srByVar: - body.add newFastAsgnStmt(prom, threadLocalProm.newSymNode) - if barrier == nil: - body.add callCodeGenProc("nimPromiseCreateCondVar", prom) + # generate: + # prom.owner = threadParam + body.add newAsgnStmt(indirectAccess(threadLocalProm.newSymNode, + "owner", prom.info), threadParam.newSymNode) body.add callCodeGenProc("nimArgsPassingDone", threadParam.newSymNode) if spawnKind == srByVar: @@ -404,10 +399,11 @@ proc setupArgsForParallelism(n: PNode; objType: PType; scratchObj: PSym; indirectAccess(castExpr, field, n.info)) call.add(threadLocal.newSymNode) -proc wrapProcForSpawn*(owner: PSym; n: PNode; retType: PType; +proc wrapProcForSpawn*(owner: PSym; spawnExpr: PNode; retType: PType; barrier, dest: PNode = nil): PNode = # if 'barrier' != nil, then it is in a 'parallel' section and we # generate quite different code + let n = spawnExpr[1] let spawnKind = spawnResult(retType, barrier!=nil) case spawnKind of srVoid: @@ -419,7 +415,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode; retType: PType; of srByVar: if dest == nil: localError(n.info, "'spawn' must not be discarded") result = newNodeI(nkStmtList, n.info) - + if n.kind notin nkCallKinds: localError(n.info, "'spawn' takes a call expression") return @@ -489,6 +485,11 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode; retType: PType; objType.addField(field) promField = newDotExpr(scratchObj, field) promAsExpr = indirectAccess(castExpr, field, n.info) + # create promise: + result.add newFastAsgnStmt(promField, callProc(spawnExpr[2])) + if barrier == nil: + result.add callCodeGenProc("nimPromiseCreateCondVar", promField) + elif spawnKind == srByVar: var field = newSym(skField, getIdent"prom", owner, n.info) field.typ = newType(tyPtr, objType.owner) diff --git a/compiler/pragmas.nim b/compiler/pragmas.nim index db9fe7cbe..aed0e1850 100644 --- a/compiler/pragmas.nim +++ b/compiler/pragmas.nim @@ -644,12 +644,13 @@ proc singlePragma(c: PContext, sym: PSym, n: PNode, i: int, incl(sym.flags, sfNoReturn) of wDynlib: processDynLib(c, it, sym) - of wCompilerproc: + of wCompilerproc: noVal(it) # compilerproc may not get a string! - makeExternExport(sym, "$1", it.info) - incl(sym.flags, sfCompilerProc) - incl(sym.flags, sfUsed) # suppress all those stupid warnings - registerCompilerProc(sym) + if sfFromGeneric notin sym.flags: + makeExternExport(sym, "$1", it.info) + incl(sym.flags, sfCompilerProc) + incl(sym.flags, sfUsed) # suppress all those stupid warnings + registerCompilerProc(sym) of wProcVar: noVal(it) incl(sym.flags, sfProcvar) diff --git a/compiler/semexprs.nim b/compiler/semexprs.nim index e507e711f..9e3785185 100644 --- a/compiler/semexprs.nim +++ b/compiler/semexprs.nim @@ -1585,6 +1585,16 @@ proc createPromise(c: PContext; t: PType; info: TLineInfo): PType = addSonSkipIntLit(result, t) result = instGenericContainer(c, info, result, allowMetaTypes = false) +proc instantiateCreatePromiseCall(c: PContext; t: PType; + info: TLineInfo): PSym = + let sym = magicsys.getCompilerProc("nimCreatePromise") + if sym == nil: + localError(info, errSystemNeeds, "nimCreatePromise") + var bindings: TIdTable + initIdTable(bindings) + bindings.idTablePut(sym.ast[genericParamsPos].sons[0].typ, t) + result = c.semGenerateInstance(c, sym, bindings, info) + proc setMs(n: PNode, s: PSym): PNode = result = n n.sons[0] = newSymNode(s) @@ -1626,6 +1636,7 @@ proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode = result.typ = result[1].typ else: result.typ = createPromise(c, result[1].typ, n.info) + result.add instantiateCreatePromiseCall(c, result[1].typ, n.info).newSymNode else: result = semDirectOp(c, n, flags) proc semWhen(c: PContext, n: PNode, semCheck = true): PNode = diff --git a/compiler/semparallel.nim b/compiler/semparallel.nim index 72def1137..c594a4788 100644 --- a/compiler/semparallel.nim +++ b/compiler/semparallel.nim @@ -406,19 +406,19 @@ proc transformSpawn(owner: PSym; n, barrier: PNode): PNode = if result.isNil: result = newNodeI(nkStmtList, n.info) result.add n - result.add wrapProcForSpawn(owner, m[1], b.typ, barrier, it[0]) + result.add wrapProcForSpawn(owner, m, b.typ, barrier, it[0]) it.sons[it.len-1] = emptyNode if result.isNil: result = n of nkAsgn, nkFastAsgn: let b = n[1] if getMagic(b) == mSpawn: let m = transformSlices(b) - return wrapProcForSpawn(owner, m[1], b.typ, barrier, n[0]) + return wrapProcForSpawn(owner, m, b.typ, barrier, n[0]) result = transformSpawnSons(owner, n, barrier) of nkCallKinds: if getMagic(n) == mSpawn: result = transformSlices(n) - return wrapProcForSpawn(owner, result[1], n.typ, barrier, nil) + return wrapProcForSpawn(owner, result, n.typ, barrier, nil) result = transformSpawnSons(owner, n, barrier) elif n.safeLen > 0: result = transformSpawnSons(owner, n, barrier) diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim index 92d5011f4..8129d03ae 100644 --- a/lib/pure/concurrency/threadpool.nim +++ b/lib/pure/concurrency/threadpool.nim @@ -85,25 +85,26 @@ type cv: CondVar idx: int - RawPromise* = ptr RawPromiseObj ## untyped base class for 'Promise[T]' - RawPromiseObj {.inheritable.} = object # \ - # we allocate this with the thread local allocator; this - # is possible since we already need to do the GC_unref - # on the owning thread + RawPromise* = ref RawPromiseObj ## untyped base class for 'Promise[T]' + RawPromiseObj = object of TObject ready, usesCondVar: bool cv: CondVar #\ # for 'awaitAny' support ai: ptr AwaitInfo idx: int - data: PObject # we incRef and unref it to keep it alive - owner: ptr Worker - next: RawPromise - align: float64 # a float for proper alignment + data: pointer # we incRef and unref it to keep it alive + owner: pointer # ptr Worker - Promise* {.compilerProc.} [T] = ptr object of RawPromiseObj - blob: T ## the underlying value, if available. Note that usually - ## you should not access this field directly! However it can - ## sometimes be more efficient than getting the value via ``^``. + PromiseObj[T] = object of RawPromiseObj + blob: T + + Promise*{.compilerProc.}[T] = ref PromiseObj[T] + + ToFreeQueue = object + len: int + lock: TLock + empty: TCond + data: array[512, pointer] WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.} Worker = object @@ -115,37 +116,55 @@ type ready: bool # put it here for correct alignment! initialized: bool # whether it has even been initialized shutdown: bool # the pool requests to shut down this worker thread - promiseLock: TLock - head: RawPromise + q: ToFreeQueue + +proc await*(prom: RawPromise) = + ## waits until the value for the promise arrives. Usually it is not necessary + ## to call this explicitly. + if prom.usesCondVar: + prom.usesCondVar = false + await(prom.cv) + destroyCondVar(prom.cv) -proc finished*(prom: RawPromise) = - ## This MUST be called for every created promise to free its associated - ## resources. Note that the default reading operation ``^`` is destructive - ## and calls ``finished``. +proc finished(prom: RawPromise) = doAssert prom.ai.isNil, "promise is still attached to an 'awaitAny'" - assert prom.next == nil - let w = prom.owner - acquire(w.promiseLock) - prom.next = w.head - w.head = prom - release(w.promiseLock) + # we have to protect against the rare cases where the owner of the promise + # simply disregards the promise and yet the "promiser" has not yet written + # anything to it: + await(prom) + if prom.data.isNil: return + let owner = cast[ptr Worker](prom.owner) + let q = addr(owner.q) + var waited = false + while true: + acquire(q.lock) + if q.len < q.data.len: + q.data[q.len] = prom.data + inc q.len + release(q.lock) + break + else: + # the queue is exhausted! We block until it has been cleaned: + release(q.lock) + wait(q.empty, q.lock) + waited = true + prom.data = nil + # wakeup other potentially waiting threads: + if waited: signal(q.empty) proc cleanPromises(w: ptr Worker) = - var it = w.head - acquire(w.promiseLock) - while it != nil: - let nxt = it.next - if it.usesCondVar: destroyCondVar(it.cv) - if it.data != nil: GC_unref(it.data) - dealloc(it) - it = nxt - w.head = nil - release(w.promiseLock) - -proc nimCreatePromise(owner: pointer; blobSize: int): RawPromise {. - compilerProc.} = - result = cast[RawPromise](alloc0(RawPromiseObj.sizeof + blobSize)) - result.owner = cast[ptr Worker](owner) + let q = addr(w.q) + acquire(q.lock) + for i in 0 .. Date: Fri, 6 Jun 2014 02:05:17 +0200 Subject: added 'fence' instructions to the barrier --- lib/pure/concurrency/threadpool.nim | 25 ++++++++++++++++--------- lib/system/atomics.nim | 35 +++++------------------------------ todo.txt | 3 +++ 3 files changed, 24 insertions(+), 39 deletions(-) (limited to 'lib/system') diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim index 8129d03ae..c4ed42c05 100644 --- a/lib/pure/concurrency/threadpool.nim +++ b/lib/pure/concurrency/threadpool.nim @@ -53,12 +53,15 @@ type interest: bool ## wether the master is interested in the "all done" event proc barrierEnter(b: ptr Barrier) {.compilerProc, inline.} = - ## due to the signaling between threads, it is ensured we are the only - ## one with access to 'entered' so we don't need 'atomicInc' here: + # due to the signaling between threads, it is ensured we are the only + # one with access to 'entered' so we don't need 'atomicInc' here: inc b.entered + # also we need no 'fence' instructions here as soon 'nimArgsPassingDone' + # will be called which already will perform a fence for us. proc barrierLeave(b: ptr Barrier) {.compilerProc, inline.} = atomicInc b.left + when not defined(x86): fence() if b.interest and b.left == b.entered: signal(b.cv) proc openBarrier(b: ptr Barrier) {.compilerProc, inline.} = @@ -67,10 +70,12 @@ proc openBarrier(b: ptr Barrier) {.compilerProc, inline.} = b.interest = false proc closeBarrier(b: ptr Barrier) {.compilerProc.} = + fence() if b.left != b.entered: b.cv = createCondVar() - b.interest = true # XXX we really need to ensure no re-orderings are done - # by the C compiler here + fence() + b.interest = true + fence() while b.left != b.entered: await(b.cv) destroyCondVar(b.cv) @@ -207,9 +212,9 @@ proc `^`*[T](prom: Promise[T]): T = result = prom.blob proc awaitAny*(promises: openArray[RawPromise]): int = - # awaits any of the given promises. Returns the index of one promise for which - ## a value arrived. A promise only supports one call to 'awaitAny' at the - ## same time. That means if you await([a,b]) and await([b,c]) the second + ## awaits any of the given promises. Returns the index of one promise for + ## which a value arrived. A promise only supports one call to 'awaitAny' at + ## the same time. That means if you await([a,b]) and await([b,c]) the second ## call will only await 'c'. If there is no promise left to be able to wait ## on, -1 is returned. ## **Note**: This results in non-deterministic behaviour and so should be @@ -294,14 +299,16 @@ proc preferSpawn*(): bool = proc spawn*(call: expr): expr {.magic: "Spawn".} ## always spawns a new task, so that the 'call' is never executed on ## the calling thread. 'call' has to be proc call 'p(...)' where 'p' - ## is gcsafe and has 'void' as the return type. + ## is gcsafe and has a return type that is either 'void' or compatible + ## with ``Promise[T]``. template spawnX*(call: expr): expr = ## spawns a new task if a CPU core is ready, otherwise executes the ## call in the calling thread. Usually it is advised to ## use 'spawn' in order to not block the producer for an unknown ## amount of time. 'call' has to be proc call 'p(...)' where 'p' - ## is gcsafe and has 'void' as the return type. + ## is gcsafe and has a return type that is either 'void' or compatible + ## with ``Promise[T]``. (if preferSpawn(): spawn call else: call) proc parallel*(body: stmt) {.magic: "Parallel".} diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim index 6e2bd9a97..43b3f0438 100644 --- a/lib/system/atomics.nim +++ b/lib/system/atomics.nim @@ -10,7 +10,9 @@ ## Atomic operations for Nimrod. {.push stackTrace:off.} -when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport: +const someGcc = defined(gcc) or defined(llvm_gcc) or defined(clang) + +when someGcc and hasThreadSupport: type AtomMemModel* = enum ATOMIC_RELAXED, ## No barriers or synchronization. @@ -163,33 +165,6 @@ else: inc(p[], val) result = p[] -# atomic compare and swap (CAS) funcitons to implement lock-free algorithms - -#if defined(windows) and not defined(gcc) and hasThreadSupport: -# proc InterlockedCompareExchangePointer(mem: ptr pointer, -# newValue: pointer, comparand: pointer) : pointer {.nodecl, -# importc: "InterlockedCompareExchangePointer", header:"windows.h".} - -# proc compareAndSwap*[T](mem: ptr T, -# expected: T, newValue: T): bool {.inline.}= -# ## Returns true if successfully set value at mem to newValue when value -# ## at mem == expected -# return InterlockedCompareExchangePointer(addr(mem), -# addr(newValue), addr(expected))[] == expected - -#elif not hasThreadSupport: -# proc compareAndSwap*[T](mem: ptr T, -# expected: T, newValue: T): bool {.inline.} = -# ## Returns true if successfully set value at mem to newValue when value -# ## at mem == expected -# var oldval = mem[] -# if oldval == expected: -# mem[] = newValue -# return true -# return false - - -# Some convenient functions proc atomicInc*(memLoc: var int, x: int = 1): int = when defined(gcc) and hasThreadSupport: result = atomic_add_fetch(memLoc.addr, x, ATOMIC_RELAXED) @@ -207,7 +182,7 @@ proc atomicDec*(memLoc: var int, x: int = 1): int = dec(memLoc, x) result = memLoc -when defined(windows) and not defined(gcc): +when defined(windows) and not someGcc: proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32 {.importc: "InterlockedCompareExchange", header: "", cdecl.} @@ -221,7 +196,7 @@ else: # XXX is this valid for 'int'? -when (defined(x86) or defined(amd64)) and defined(gcc): +when (defined(x86) or defined(amd64)) and (defined(gcc) or defined(llvm_gcc)): proc cpuRelax {.inline.} = {.emit: """asm volatile("pause" ::: "memory");""".} elif (defined(x86) or defined(amd64)) and defined(vcc): diff --git a/todo.txt b/todo.txt index 8a351e8a7..7d4eac1ad 100644 --- a/todo.txt +++ b/todo.txt @@ -8,8 +8,11 @@ Concurrency - implement 'deepCopy' builtin - implement 'foo[1..4] = spawn(f[4..7])' - the disjoint checker needs to deal with 'a = spawn f(); g = spawn f()' +- support for exception propagation - Minor: The copying of the 'ref Promise' into the thead local storage only happens to work due to the write barrier's implementation +- 'gcsafe' inferrence needs to be fixed +- implement lock levels --> first without the more complex race avoidance Misc -- cgit 1.4.1-2-gfad0 From 15909c7be25a5956d0af7bd8d975b23d51fba560 Mon Sep 17 00:00:00 2001 From: Araq Date: Thu, 12 Jun 2014 16:05:50 +0200 Subject: optimized method dispatchers --- compiler/ast.nim | 1 + compiler/ccgexprs.nim | 24 ++++++++++++++++++++---- compiler/ccgtypes.nim | 4 ++++ compiler/cgen.nim | 1 + compiler/cgendata.nim | 1 + lib/system/chcks.nim | 22 ++++++++++++++++++++++ todo.txt | 1 - 7 files changed, 49 insertions(+), 5 deletions(-) (limited to 'lib/system') diff --git a/compiler/ast.nim b/compiler/ast.nim index 80b9e9bb2..d74818e1f 100644 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -510,6 +510,7 @@ const tfUncheckedArray* = tfVarargs tfUnion* = tfNoSideEffect tfGcSafe* = tfThread + tfObjHasKids* = tfEnumHasHoles skError* = skUnknown # type flags that are essential for type equality: diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim index 39333a80d..13b3091fc 100644 --- a/compiler/ccgexprs.nim +++ b/compiler/ccgexprs.nim @@ -1144,6 +1144,24 @@ proc genNewFinalize(p: BProc, e: PNode) = genObjectInit(p, cpsStmts, bt, a, false) gcUsage(e) +proc genOfHelper(p: BProc; dest: PType; a: PRope): PRope = + # unfortunately 'genTypeInfo' sets tfObjHasKids as a side effect, so we + # have to call it here first: + let ti = genTypeInfo(p.module, dest) + if tfFinal in dest.flags or (p.module.objHasKidsValid and + tfObjHasKids notin dest.flags): + result = ropef("$1.m_type == $2", a, ti) + else: + discard cgsym(p.module, "TNimType") + inc p.module.labels + let cache = con("Nim_OfCheck_CACHE", p.module.labels.toRope) + appf(p.module.s[cfsVars], "static TNimType* $#[2];$n", cache) + result = rfmt(p.module, "#isObjWithCache($#.m_type, $#, $#)", a, ti, cache) + when false: + # former version: + result = rfmt(p.module, "#isObj($1.m_type, $2)", + a, genTypeInfo(p.module, dest)) + proc genOf(p: BProc, x: PNode, typ: PType, d: var TLoc) = var a: TLoc initLocExpr(p, x, a) @@ -1163,11 +1181,9 @@ proc genOf(p: BProc, x: PNode, typ: PType, d: var TLoc) = globalError(x.info, errGenerated, "no 'of' operator available for pure objects") if nilCheck != nil: - r = rfmt(p.module, "(($1) && #isObj($2.m_type, $3))", - nilCheck, r, genTypeInfo(p.module, dest)) + r = rfmt(p.module, "(($1) && ($2))", nilCheck, genOfHelper(p, dest, r)) else: - r = rfmt(p.module, "#isObj($1.m_type, $2)", - r, genTypeInfo(p.module, dest)) + r = rfmt(p.module, "($1)", genOfHelper(p, dest, r)) putIntoDest(p, d, getSysType(tyBool), r) proc genOf(p: BProc, n: PNode, d: var TLoc) = diff --git a/compiler/ccgtypes.nim b/compiler/ccgtypes.nim index 7a490082f..28c791e42 100644 --- a/compiler/ccgtypes.nim +++ b/compiler/ccgtypes.nim @@ -796,6 +796,10 @@ proc genObjectInfo(m: BModule, typ: PType, name: PRope) = var tmp = getNimNode(m) genObjectFields(m, typ, typ.n, tmp) appf(m.s[cfsTypeInit3], "$1.node = &$2;$n", [name, tmp]) + var t = typ.sons[0] + while t != nil: + t.skipTypes(abstractInst).flags.incl tfObjHasKids + t = t.sons[0] proc genTupleInfo(m: BModule, typ: PType, name: PRope) = genTypeInfoAuxBase(m, typ, name, toRope("0")) diff --git a/compiler/cgen.nim b/compiler/cgen.nim index 198b1187d..7094990f7 100644 --- a/compiler/cgen.nim +++ b/compiler/cgen.nim @@ -1383,6 +1383,7 @@ proc myClose(b: PPassContext, n: PNode): PNode = registerModuleToMain(m.module) if sfMainModule in m.module.flags: + m.objHasKidsValid = true var disp = generateMethodDispatchers() for i in 0..sonsLen(disp)-1: genProcAux(m, disp.sons[i].sym) genMainProc(m) diff --git a/compiler/cgendata.nim b/compiler/cgendata.nim index e7d818556..12041c55b 100644 --- a/compiler/cgendata.nim +++ b/compiler/cgendata.nim @@ -96,6 +96,7 @@ type # a frame var twice in an init proc isHeaderFile*: bool # C source file is the header file includesStringh*: bool # C source file already includes ```` + objHasKidsValid*: bool # whether we can rely on tfObjHasKids cfilename*: string # filename of the module (including path, # without extension) typeCache*: TIdTable # cache the generated types diff --git a/lib/system/chcks.nim b/lib/system/chcks.nim index f29e222e8..387b54ef1 100644 --- a/lib/system/chcks.nim +++ b/lib/system/chcks.nim @@ -67,6 +67,28 @@ proc chckObjAsgn(a, b: PNimType) {.compilerproc, inline.} = if a != b: sysFatal(EInvalidObjectAssignment, "invalid object assignment") +type ObjCheckCache = array[0..1, PNimType] + +proc isObjSlowPath(obj, subclass: PNimType; + cache: var ObjCheckCache): bool {.noinline.} = + # checks if obj is of type subclass: + var x = obj.base + while x != subclass: + if x == nil: + cache[0] = obj + return false + x = x.base + cache[1] = obj + return true + +proc isObjWithCache(obj, subclass: PNimType; + cache: var ObjCheckCache): bool {.compilerProc, inline.} = + if obj == subclass: return true + if obj.base == subclass: return true + if cache[0] == obj: return false + if cache[1] == obj: return true + return isObjSlowPath(obj, subclass, cache) + proc isObj(obj, subclass: PNimType): bool {.compilerproc.} = # checks if obj is of type subclass: var x = obj diff --git a/todo.txt b/todo.txt index 996067175..539089281 100644 --- a/todo.txt +++ b/todo.txt @@ -138,7 +138,6 @@ Not essential for 1.0.0 - implement the "snoopResult" pragma; no, make a strutils with string append semantics instead ... - implement "closure tuple consists of a single 'ref'" optimization -- optimize method dispatchers - new feature: ``distinct T with operations`` - arglist as a type (iterator chaining); variable length type lists for generics - implement marker procs for message passing -- cgit 1.4.1-2-gfad0