diff options
Diffstat (limited to 'lib/system')
-rw-r--r-- | lib/system/alloc.nim | 7 | ||||
-rw-r--r-- | lib/system/avltree.nim | 4 | ||||
-rw-r--r-- | lib/system/cgprocs.nim | 4 | ||||
-rw-r--r-- | lib/system/excpt.nim | 12 | ||||
-rw-r--r-- | lib/system/gc.nim | 21 | ||||
-rw-r--r-- | lib/system/gc_ms.nim | 16 | ||||
-rw-r--r-- | lib/system/hti.nim | 2 | ||||
-rw-r--r-- | lib/system/syslocks.nim | 12 | ||||
-rw-r--r-- | lib/system/sysspawn.nim | 172 |
9 files changed, 213 insertions, 37 deletions
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim index 954485eb4..eaef6cd95 100644 --- a/lib/system/alloc.nim +++ b/lib/system/alloc.nim @@ -722,10 +722,13 @@ proc alloc0(allocator: var TMemRegion, size: int): pointer = zeroMem(result, size) proc dealloc(allocator: var TMemRegion, p: pointer) = + sysAssert(p != nil, "dealloc 0") var x = cast[pointer](cast[TAddress](p) -% sizeof(TFreeCell)) - sysAssert(cast[ptr TFreeCell](x).zeroField == 1, "dealloc 1") + sysAssert(x != nil, "dealloc 1") + sysAssert(isAccessible(allocator, x), "is not accessible") + sysAssert(cast[ptr TFreeCell](x).zeroField == 1, "dealloc 2") rawDealloc(allocator, x) - sysAssert(not isAllocatedPtr(allocator, x), "dealloc 2") + sysAssert(not isAllocatedPtr(allocator, x), "dealloc 3") proc realloc(allocator: var TMemRegion, p: pointer, newsize: int): pointer = if newsize > 0: diff --git a/lib/system/avltree.nim b/lib/system/avltree.nim index fc965d6aa..bced15d6a 100644 --- a/lib/system/avltree.nim +++ b/lib/system/avltree.nim @@ -51,7 +51,7 @@ proc split(t: var PAvlNode) = t.link[0] = temp inc t.level -proc add(a: var TMemRegion, t: var PAvlNode, key, upperBound: int) = +proc add(a: var TMemRegion, t: var PAvlNode, key, upperBound: int) {.gcsafe.} = if t == bottom: t = allocAvlNode(a, key, upperBound) else: @@ -64,7 +64,7 @@ proc add(a: var TMemRegion, t: var PAvlNode, key, upperBound: int) = skew(t) split(t) -proc del(a: var TMemRegion, t: var PAvlNode, x: int) = +proc del(a: var TMemRegion, t: var PAvlNode, x: int) {.gcsafe.} = if t == bottom: return a.last = t if x <% t.key: diff --git a/lib/system/cgprocs.nim b/lib/system/cgprocs.nim index e30cfa469..d483c61bd 100644 --- a/lib/system/cgprocs.nim +++ b/lib/system/cgprocs.nim @@ -9,7 +9,7 @@ # Headers for procs that the code generator depends on ("compilerprocs") -proc addChar(s: NimString, c: char): NimString {.compilerProc.} +proc addChar(s: NimString, c: char): NimString {.compilerProc, gcsafe.} type TLibHandle = pointer # private type @@ -21,5 +21,5 @@ proc nimGetProcAddr(lib: TLibHandle, name: cstring): TProcAddr {.compilerproc.} proc nimLoadLibraryError(path: string) {.compilerproc, noinline.} -proc setStackBottom(theStackBottom: pointer) {.compilerRtl, noinline.} +proc setStackBottom(theStackBottom: pointer) {.compilerRtl, noinline, gcsafe.} diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim index e50ba7b9f..612a9e729 100644 --- a/lib/system/excpt.nim +++ b/lib/system/excpt.nim @@ -11,7 +11,7 @@ # use the heap (and nor exceptions) do not include the GC or memory allocator. var - errorMessageWriter*: (proc(msg: string) {.tags: [FWriteIO].}) + errorMessageWriter*: (proc(msg: string) {.tags: [FWriteIO], gcsafe.}) ## Function that will be called ## instead of stdmsg.write when printing stacktrace. ## Unstable API. @@ -32,10 +32,10 @@ proc showErrorMessage(data: cstring) = else: writeToStdErr(data) -proc chckIndx(i, a, b: int): int {.inline, compilerproc.} -proc chckRange(i, a, b: int): int {.inline, compilerproc.} -proc chckRangeF(x, a, b: float): float {.inline, compilerproc.} -proc chckNil(p: pointer) {.noinline, compilerproc.} +proc chckIndx(i, a, b: int): int {.inline, compilerproc, gcsafe.} +proc chckRange(i, a, b: int): int {.inline, compilerproc, gcsafe.} +proc chckRangeF(x, a, b: float): float {.inline, compilerproc, gcsafe.} +proc chckNil(p: pointer) {.noinline, compilerproc, gcsafe.} var framePtr {.rtlThreadVar.}: PFrame @@ -322,5 +322,5 @@ when not defined(noSignalHandler): proc setControlCHook(hook: proc () {.noconv.}) = # ugly cast, but should work on all architectures: - type TSignalHandler = proc (sig: cint) {.noconv.} + type TSignalHandler = proc (sig: cint) {.noconv, gcsafe.} c_signal(SIGINT, cast[TSignalHandler](hook)) diff --git a/lib/system/gc.nim b/lib/system/gc.nim index ec1760914..3b85fe600 100644 --- a/lib/system/gc.nim +++ b/lib/system/gc.nim @@ -51,7 +51,7 @@ type waZctDecRef, waPush, waCycleDecRef, waMarkGray, waScan, waScanBlack, waCollectWhite, - TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall.} + TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall, gcsafe.} # A ref type can have a finalizer that is called before the object's # storage is freed. @@ -152,11 +152,11 @@ template gcTrace(cell, state: expr): stmt {.immediate.} = when traceGC: traceCell(cell, state) # forward declarations: -proc collectCT(gch: var TGcHeap) -proc isOnStack*(p: pointer): bool {.noinline.} -proc forAllChildren(cell: PCell, op: TWalkOp) -proc doOperation(p: pointer, op: TWalkOp) -proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp) +proc collectCT(gch: var TGcHeap) {.gcsafe.} +proc isOnStack*(p: pointer): bool {.noinline, gcsafe.} +proc forAllChildren(cell: PCell, op: TWalkOp) {.gcsafe.} +proc doOperation(p: pointer, op: TWalkOp) {.gcsafe.} +proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp) {.gcsafe.} # we need the prototype here for debugging purposes when hasThreadSupport and hasSharedHeap: @@ -294,7 +294,7 @@ proc initGC() = when useMarkForDebug or useBackupGc: type - TGlobalMarkerProc = proc () {.nimcall.} + TGlobalMarkerProc = proc () {.nimcall, gcsafe.} var globalMarkersLen: int globalMarkers: array[0.. 7_000, TGlobalMarkerProc] @@ -311,7 +311,7 @@ proc cellsetReset(s: var TCellSet) = deinit(s) init(s) -proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) = +proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) {.gcsafe.} = var d = cast[TAddress](dest) case n.kind of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op) @@ -680,10 +680,11 @@ proc doOperation(p: pointer, op: TWalkOp) = proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} = doOperation(d, TWalkOp(op)) -proc collectZCT(gch: var TGcHeap): bool +proc collectZCT(gch: var TGcHeap): bool {.gcsafe.} when useMarkForDebug or useBackupGc: - proc markStackAndRegistersForSweep(gch: var TGcHeap) {.noinline, cdecl.} + proc markStackAndRegistersForSweep(gch: var TGcHeap) {.noinline, cdecl, + gcsafe.} proc collectRoots(gch: var TGcHeap) = for s in elements(gch.cycleRoots): diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim index e78a4e5cd..3c99a57e1 100644 --- a/lib/system/gc_ms.nim +++ b/lib/system/gc_ms.nim @@ -32,11 +32,11 @@ type # local waMarkPrecise # fast precise marking - TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall.} + TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall, gcsafe.} # A ref type can have a finalizer that is called before the object's # storage is freed. - TGlobalMarkerProc = proc () {.nimcall.} + TGlobalMarkerProc = proc () {.nimcall, gcsafe.} TGcStat = object collections: int # number of performed full collections @@ -113,11 +113,11 @@ when BitsPerPage mod (sizeof(int)*8) != 0: {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".} # forward declarations: -proc collectCT(gch: var TGcHeap) -proc isOnStack*(p: pointer): bool {.noinline.} -proc forAllChildren(cell: PCell, op: TWalkOp) -proc doOperation(p: pointer, op: TWalkOp) -proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp) +proc collectCT(gch: var TGcHeap) {.gcsafe.} +proc isOnStack*(p: pointer): bool {.noinline, gcsafe.} +proc forAllChildren(cell: PCell, op: TWalkOp) {.gcsafe.} +proc doOperation(p: pointer, op: TWalkOp) {.gcsafe.} +proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp) {.gcsafe.} # we need the prototype here for debugging purposes proc prepareDealloc(cell: PCell) = @@ -150,7 +150,7 @@ proc initGC() = Init(gch.allocated) init(gch.marked) -proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) = +proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) {.gcsafe.} = var d = cast[TAddress](dest) case n.kind of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op) diff --git a/lib/system/hti.nim b/lib/system/hti.nim index 9d8ece7df..64174e60f 100644 --- a/lib/system/hti.nim +++ b/lib/system/hti.nim @@ -85,7 +85,7 @@ type base: ptr TNimType node: ptr TNimNode # valid for tyRecord, tyObject, tyTuple, tyEnum finalizer: pointer # the finalizer for the type - marker: proc (p: pointer, op: int) {.nimcall.} # marker proc for GC + marker: proc (p: pointer, op: int) {.nimcall, gcsafe.} # marker proc for GC PNimType = ptr TNimType # node.len may be the ``first`` element of a set diff --git a/lib/system/syslocks.nim b/lib/system/syslocks.nim index 5e3b04b7f..b8ed29cfc 100644 --- a/lib/system/syslocks.nim +++ b/lib/system/syslocks.nim @@ -52,7 +52,7 @@ when defined(Windows): proc closeHandle(hObject: THandle) {.stdcall, noSideEffect, dynlib: "kernel32", importc: "CloseHandle".} proc waitForSingleObject(hHandle: THandle, dwMilliseconds: int32): int32 {. - stdcall, dynlib: "kernel32", importc: "WaitForSingleObject".} + stdcall, dynlib: "kernel32", importc: "WaitForSingleObject", noSideEffect.} proc signalSysCond(hEvent: TSysCond) {.stdcall, noSideEffect, dynlib: "kernel32", importc: "SetEvent".} @@ -89,16 +89,16 @@ else: proc releaseSys(L: var TSysLock) {.noSideEffect, importc: "pthread_mutex_unlock", header: "<pthread.h>".} - proc deinitSys(L: var TSysLock) {. + proc deinitSys(L: var TSysLock) {.noSideEffect, importc: "pthread_mutex_destroy", header: "<pthread.h>".} proc initSysCond(cond: var TSysCond, cond_attr: pointer = nil) {. - importc: "pthread_cond_init", header: "<pthread.h>".} + importc: "pthread_cond_init", header: "<pthread.h>", noSideEffect.} proc waitSysCond(cond: var TSysCond, lock: var TSysLock) {. - importc: "pthread_cond_wait", header: "<pthread.h>".} + importc: "pthread_cond_wait", header: "<pthread.h>", noSideEffect.} proc signalSysCond(cond: var TSysCond) {. - importc: "pthread_cond_signal", header: "<pthread.h>".} + importc: "pthread_cond_signal", header: "<pthread.h>", noSideEffect.} - proc deinitSysCond(cond: var TSysCond) {. + proc deinitSysCond(cond: var TSysCond) {.noSideEffect, importc: "pthread_cond_destroy", header: "<pthread.h>".} diff --git a/lib/system/sysspawn.nim b/lib/system/sysspawn.nim new file mode 100644 index 000000000..3a641aba6 --- /dev/null +++ b/lib/system/sysspawn.nim @@ -0,0 +1,172 @@ +# Implements Nimrod's 'spawn'. + +{.push stackTrace:off.} +include system.syslocks + +when (defined(x86) or defined(amd64)) and defined(gcc): + proc cpuRelax {.inline.} = + {.emit: """asm volatile("pause" ::: "memory");""".} +elif (defined(x86) or defined(amd64)) and defined(vcc): + proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".} +elif defined(intelc): + proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".} +else: + from os import sleep + + proc cpuRelax {.inline.} = os.sleep(1) + +when defined(windows): + proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32 + {.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.} + + proc cas(p: ptr bool; oldValue, newValue: bool): bool = + interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0 + +else: + # this is valid for GCC and Intel C++ + proc cas(p: ptr bool; oldValue, newValue: bool): bool + {.importc: "__sync_bool_compare_and_swap", nodecl.} + +# We declare our own condition variables here to get rid of the dummy lock +# on Windows: + +type + CondVar = object + c: TSysCond + when defined(posix): + stupidLock: TSysLock + +proc createCondVar(): CondVar = + initSysCond(result.c) + when defined(posix): + initSysLock(result.stupidLock) + acquireSys(result.stupidLock) + +proc await(cv: var CondVar) = + when defined(posix): + waitSysCond(cv.c, cv.stupidLock) + else: + waitSysCondWindows(cv.c) + +proc signal(cv: var CondVar) = signalSysCond(cv.c) + +type + FastCondVar = object + event, slowPath: bool + slow: CondVar + +proc createFastCondVar(): FastCondVar = + initSysCond(result.slow.c) + when defined(posix): + initSysLock(result.slow.stupidLock) + acquireSys(result.slow.stupidLock) + result.event = false + result.slowPath = false + +proc await(cv: var FastCondVar) = + #for i in 0 .. 50: + # if cas(addr cv.event, true, false): + # # this is a HIT: Triggers > 95% in my tests. + # return + # cpuRelax() + #cv.slowPath = true + await(cv.slow) + cv.event = false + +proc signal(cv: var FastCondVar) = + cv.event = true + #if cas(addr cv.slowPath, true, false): + signal(cv.slow) + +{.pop.} + +# ---------------------------------------------------------------------------- + +type + WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.} + Worker = object + taskArrived: CondVar + taskStarted: FastCondVar #\ + # task data: + f: WorkerProc + data: pointer + ready: bool # put it here for correct alignment! + +proc nimArgsPassingDone(p: pointer) {.compilerProc.} = + let w = cast[ptr Worker](p) + signal(w.taskStarted) + +var gSomeReady = createFastCondVar() + +proc slave(w: ptr Worker) {.thread.} = + while true: + w.ready = true # If we instead signal "workerReady" we need the scheduler + # to notice this. The scheduler could then optimize the + # layout of the worker threads (e.g. keep the list sorted) + # so that no search for a "ready" thread is necessary. + # This might be implemented later, but is more tricky than + # it looks because 'spawn' itself can run concurrently. + signal(gSomeReady) + await(w.taskArrived) + assert(not w.ready) + if w.data != nil: + w.f(w, w.data) + w.data = nil + +const NumThreads = 4 + +var + workers: array[NumThreads, TThread[ptr Worker]] + workersData: array[NumThreads, Worker] + +proc setup() = + for i in 0.. <NumThreads: + workersData[i].taskArrived = createCondVar() + workersData[i].taskStarted = createFastCondVar() + createThread(workers[i], slave, addr(workersData[i])) + +proc preferSpawn*(): bool = + ## Use this proc to determine quickly if a 'spawn' or a direct call is + ## preferable. If it returns 'true' a 'spawn' may make sense. In general + ## it is not necessary to call this directly; use 'spawnX' instead. + result = gSomeReady.event + +proc spawn*(call: stmt) {.magic: "Spawn".} + ## always spawns a new task, so that the 'call' is never executed on + ## the calling thread. 'call' has to be proc call 'p(...)' where 'p' + ## is gcsafe and has 'void' as the return type. + +template spawnX*(call: stmt) = + ## spawns a new task if a CPU core is ready, otherwise executes the + ## call in the calling thread. Usually it is advised to + ## use 'spawn' in order to not block the producer for an unknown + ## amount of time. 'call' has to be proc call 'p(...)' where 'p' + ## is gcsafe and has 'void' as the return type. + if preferSpawn(): spawn call + else: call + +proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} = + # implementation of 'spawn' that is used by the code generator. + while true: + for i in 0.. high(workers): + let w = addr(workersData[i]) + if cas(addr w.ready, true, false): + w.data = data + w.f = fn + signal(w.taskArrived) + await(w.taskStarted) + return + await(gSomeReady) + +proc sync*() = + ## a simple barrier to wait for all spawn'ed tasks. If you need more elaborate + ## waiting, you have to use an explicit barrier. + while true: + var allReady = true + for i in 0 .. high(workers): + if not allReady: break + allReady = allReady and workersData[i].ready + if allReady: break + await(gSomeReady) + +setup() |