# # # Nim's Runtime Library # (c) Copyright 2015 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## Implements Nim's 'spawn'. when not compileOption("threads"): {.error: "Threadpool requires --threads:on option.".} import cpuinfo, cpuload, locks {.push stackTrace:off.} type Semaphore = object c: TCond L: TLock counter: int proc createSemaphore(): Semaphore = initCond(result.c) initLock(result.L) proc destroySemaphore(cv: var Semaphore) {.inline.} = deinitCond(cv.c) deinitLock(cv.L) proc await(cv: var Semaphore) = acquire(cv.L) while cv.counter <= 0: wait(cv.c, cv.L) dec cv.counter release(cv.L) proc signal(cv: var Semaphore) = acquire(cv.L) inc cv.counter release(cv.L) signal(cv.c) const CacheLineSize = 32 # true for most archs type Barrier {.compilerProc.} = object entered: int cv: Semaphore # Semaphore takes 3 words at least when sizeof(int) < 8: cacheAlign: array[CacheLineSize-4*sizeof(int), byte] left: int cacheAlign2: array[CacheLineSize-sizeof(int), byte] interest: bool ## wether the master is interested in the "all done" event proc barrierEnter(b: ptr Barrier) {.compilerProc, inline.} = # due to the signaling between threads, it is ensured we are the only # one with access to 'entered' so we don't need 'atomicInc' here: inc b.entered # also we need no 'fence' instructions here as soon 'nimArgsPassingDone' # will be called which already will perform a fence for us. proc barrierLeave(b: ptr Barrier) {.compilerProc, inline.} = atomicInc b.left when not defined(x86): fence() # We may not have seen the final value of b.entered yet, # so we need to check for >= instead of ==. if b.interest and b.left >= b.entered: signal(b.cv) proc openBarrier(b: ptr Barrier) {.compilerProc, inline.} = b.entered = 0 b.left = 0 b.interest = false proc closeBarrier(b: ptr Barrier) {.compilerProc.} = fence() if b.left != b.entered: b.cv = createSemaphore() fence() b.interest = true fence() while b.left != b.entered: await(b.cv) destroySemaphore(b.cv) {.pop.} # ---------------------------------------------------------------------------- type foreign* = object ## a region that indicates the pointer comes from a ## foreign thread heap. AwaitInfo = object cv: Semaphore idx: int FlowVarBase* = ref FlowVarBaseObj ## untyped base class for 'FlowVar[T]' FlowVarBaseObj = object of RootObj ready, usesSemaphore, awaited: bool cv: Semaphore #\ # for 'awaitAny' support ai: ptr AwaitInfo idx: int data: pointer # we incRef and unref it to keep it alive; note this MUST NOT # be RootRef here otherwise the wrong GC keeps track of it! owner: pointer # ptr Worker FlowVarObj[T] = object of FlowVarBaseObj blob: T FlowVar*{.compilerProc.}[T] = ref FlowVarObj[T] ## a data flow variable ToFreeQueue = object len: int lock: TLock empty: Semaphore data: array[128, pointer] WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.} Worker = object taskArrived: Semaphore taskStarted: Semaphore #\ # task data: f: WorkerProc data: pointer ready: bool # put it here for correct alignment! initialized: bool # whether it has even been initialized shutdown: bool # the pool requests to shut down this worker thread q: ToFreeQueue proc await*(fv: FlowVarBase) = ## waits until the value for the flowVar arrives. Usually it is not necessary ## to call this explicitly. if fv.usesSemaphore and not fv.awaited: fv.awaited = true await(fv.cv) destroySemaphore(fv.cv) proc selectWorker(w: ptr Worker; fn: WorkerProc; data: pointer): bool = if cas(addr w.ready, true, false): w.data = data w.f = fn signal(w.taskArrived) await(w.taskStarted) result = true proc cleanFlowVars(w: ptr Worker) = let q = addr(w.q) acquire(q.lock) for i in 0 .. maxPoolSize: for i in maxPoolSize..currentPoolSize-1: let w = addr(workersData[i]) w.shutdown = true proc activateWorkerThread(i: int) {.noinline.} = workersData[i].taskArrived = createSemaphore() workersData[i].taskStarted = createSemaphore() workersData[i].initialized = true workersData[i].q.empty = createSemaphore() initLock(workersData[i].q.lock) createThread(workers[i], slave, addr(workersData[i])) proc activateDistinguishedThread(i: int) {.noinline.} = distinguishedData[i].taskArrived = createSemaphore() distinguishedData[i].taskStarted = createSemaphore() distinguishedData[i].initialized = true distinguishedData[i].q.empty = createSemaphore() initLock(distinguishedData[i].q.lock) createThread(distinguished[i], distinguishedSlave, addr(distinguishedData[i])) proc setup() = currentPoolSize = min(countProcessors(), MaxThreadPoolSize) readyWorker = addr(workersData[0]) for i in 0.. 0 proc spawn*(call: expr): expr {.magic: "Spawn".} ## always spawns a new task, so that the 'call' is never executed on ## the calling thread. 'call' has to be proc call 'p(...)' where 'p' ## is gcsafe and has a return type that is either 'void' or compatible ## with ``FlowVar[T]``. proc pinnedSpawn*(id: ThreadId; call: expr): expr {.magic: "Spawn".} ## always spawns a new task on the worker thread with ``id``, so that ## the 'call' is **always** executed on ## the this thread. 'call' has to be proc call 'p(...)' where 'p' ## is gcsafe and has a return type that is either 'void' or compatible ## with ``FlowVar[T]``. template spawnX*(call: expr): expr = ## spawns a new task if a CPU core is ready, otherwise executes the ## call in the calling thread. Usually it is advised to ## use 'spawn' in order to not block the producer for an unknown ## amount of time. 'call' has to be proc call 'p(...)' where 'p' ## is gcsafe and has a return type that is either 'void' or compatible ## with ``FlowVar[T]``. (if preferSpawn(): spawn call else: call) proc parallel*(body: stmt) {.magic: "Parallel".} ## a parallel section can be used to execute a block in parallel. ``body`` ## has to be in a DSL that is a particular subset of the language. Please ## refer to the manual for further information. var state: ThreadPoolState stateLock: TLock initLock stateLock proc nimSpawn3(fn: WorkerProc; data: pointer) {.compilerProc.} = # implementation of 'spawn' that is used by the code generator. while true: if selectWorker(readyWorker, fn, data): return for i in 0.. minPoolSize: let w = addr(workersData[currentPoolSize-1]) w.shutdown = true # we don't free anything here. Too dangerous. release(stateLock) # else the acquire failed, but this means some # other thread succeeded, so we don't need to do anything here. await(gSomeReady) var distinguishedLock: TLock initLock distinguishedLock proc nimSpawn4(fn: WorkerProc; data: pointer; id: ThreadId) {.compilerProc.} = acquire(distinguishedLock) if not distinguishedData[id].initialized: activateDistinguishedThread(id) while true: if selectWorker(addr(distinguishedData[id]), fn, data): break cpuRelax() # XXX exponential backoff? release(distinguishedLock) proc sync*() = ## a simple barrier to wait for all spawn'ed tasks. If you need more elaborate ## waiting, you have to use an explicit barrier. while true: var allReady = true for i in 0 ..