summary refs log tree commit diff stats
path: root/lib/system
diff options
context:
space:
mode:
Diffstat (limited to 'lib/system')
-rw-r--r--lib/system/alloc.nim7
-rw-r--r--lib/system/avltree.nim4
-rw-r--r--lib/system/cgprocs.nim4
-rw-r--r--lib/system/excpt.nim12
-rw-r--r--lib/system/gc.nim21
-rw-r--r--lib/system/gc_ms.nim16
-rw-r--r--lib/system/hti.nim2
-rw-r--r--lib/system/syslocks.nim12
-rw-r--r--lib/system/sysspawn.nim172
9 files changed, 213 insertions, 37 deletions
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim
index 954485eb4..eaef6cd95 100644
--- a/lib/system/alloc.nim
+++ b/lib/system/alloc.nim
@@ -722,10 +722,13 @@ proc alloc0(allocator: var TMemRegion, size: int): pointer =
   zeroMem(result, size)
 
 proc dealloc(allocator: var TMemRegion, p: pointer) =
+  sysAssert(p != nil, "dealloc 0")
   var x = cast[pointer](cast[TAddress](p) -% sizeof(TFreeCell))
-  sysAssert(cast[ptr TFreeCell](x).zeroField == 1, "dealloc 1")
+  sysAssert(x != nil, "dealloc 1")
+  sysAssert(isAccessible(allocator, x), "is not accessible")
+  sysAssert(cast[ptr TFreeCell](x).zeroField == 1, "dealloc 2")
   rawDealloc(allocator, x)
-  sysAssert(not isAllocatedPtr(allocator, x), "dealloc 2")
+  sysAssert(not isAllocatedPtr(allocator, x), "dealloc 3")
 
 proc realloc(allocator: var TMemRegion, p: pointer, newsize: int): pointer =
   if newsize > 0:
diff --git a/lib/system/avltree.nim b/lib/system/avltree.nim
index fc965d6aa..bced15d6a 100644
--- a/lib/system/avltree.nim
+++ b/lib/system/avltree.nim
@@ -51,7 +51,7 @@ proc split(t: var PAvlNode) =
     t.link[0] = temp
     inc t.level
 
-proc add(a: var TMemRegion, t: var PAvlNode, key, upperBound: int) =
+proc add(a: var TMemRegion, t: var PAvlNode, key, upperBound: int) {.gcsafe.} =
   if t == bottom:
     t = allocAvlNode(a, key, upperBound)
   else:
@@ -64,7 +64,7 @@ proc add(a: var TMemRegion, t: var PAvlNode, key, upperBound: int) =
     skew(t)
     split(t)
 
-proc del(a: var TMemRegion, t: var PAvlNode, x: int) =
+proc del(a: var TMemRegion, t: var PAvlNode, x: int) {.gcsafe.} =
   if t == bottom: return
   a.last = t
   if x <% t.key:
diff --git a/lib/system/cgprocs.nim b/lib/system/cgprocs.nim
index e30cfa469..d483c61bd 100644
--- a/lib/system/cgprocs.nim
+++ b/lib/system/cgprocs.nim
@@ -9,7 +9,7 @@
 
 # Headers for procs that the code generator depends on ("compilerprocs")
 
-proc addChar(s: NimString, c: char): NimString {.compilerProc.}
+proc addChar(s: NimString, c: char): NimString {.compilerProc, gcsafe.}
 
 type
   TLibHandle = pointer       # private type
@@ -21,5 +21,5 @@ proc nimGetProcAddr(lib: TLibHandle, name: cstring): TProcAddr {.compilerproc.}
 
 proc nimLoadLibraryError(path: string) {.compilerproc, noinline.}
 
-proc setStackBottom(theStackBottom: pointer) {.compilerRtl, noinline.}
+proc setStackBottom(theStackBottom: pointer) {.compilerRtl, noinline, gcsafe.}
 
diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim
index e50ba7b9f..612a9e729 100644
--- a/lib/system/excpt.nim
+++ b/lib/system/excpt.nim
@@ -11,7 +11,7 @@
 # use the heap (and nor exceptions) do not include the GC or memory allocator.
 
 var
-  errorMessageWriter*: (proc(msg: string) {.tags: [FWriteIO].})
+  errorMessageWriter*: (proc(msg: string) {.tags: [FWriteIO], gcsafe.})
     ## Function that will be called
     ## instead of stdmsg.write when printing stacktrace.
     ## Unstable API.
@@ -32,10 +32,10 @@ proc showErrorMessage(data: cstring) =
   else:
     writeToStdErr(data)
 
-proc chckIndx(i, a, b: int): int {.inline, compilerproc.}
-proc chckRange(i, a, b: int): int {.inline, compilerproc.}
-proc chckRangeF(x, a, b: float): float {.inline, compilerproc.}
-proc chckNil(p: pointer) {.noinline, compilerproc.}
+proc chckIndx(i, a, b: int): int {.inline, compilerproc, gcsafe.}
+proc chckRange(i, a, b: int): int {.inline, compilerproc, gcsafe.}
+proc chckRangeF(x, a, b: float): float {.inline, compilerproc, gcsafe.}
+proc chckNil(p: pointer) {.noinline, compilerproc, gcsafe.}
 
 var
   framePtr {.rtlThreadVar.}: PFrame
@@ -322,5 +322,5 @@ when not defined(noSignalHandler):
 
 proc setControlCHook(hook: proc () {.noconv.}) =
   # ugly cast, but should work on all architectures:
-  type TSignalHandler = proc (sig: cint) {.noconv.}
+  type TSignalHandler = proc (sig: cint) {.noconv, gcsafe.}
   c_signal(SIGINT, cast[TSignalHandler](hook))
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index ec1760914..3b85fe600 100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -51,7 +51,7 @@ type
     waZctDecRef, waPush, waCycleDecRef, waMarkGray, waScan, waScanBlack, 
     waCollectWhite,
 
-  TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall.}
+  TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall, gcsafe.}
     # A ref type can have a finalizer that is called before the object's
     # storage is freed.
 
@@ -152,11 +152,11 @@ template gcTrace(cell, state: expr): stmt {.immediate.} =
   when traceGC: traceCell(cell, state)
 
 # forward declarations:
-proc collectCT(gch: var TGcHeap)
-proc isOnStack*(p: pointer): bool {.noinline.}
-proc forAllChildren(cell: PCell, op: TWalkOp)
-proc doOperation(p: pointer, op: TWalkOp)
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp)
+proc collectCT(gch: var TGcHeap) {.gcsafe.}
+proc isOnStack*(p: pointer): bool {.noinline, gcsafe.}
+proc forAllChildren(cell: PCell, op: TWalkOp) {.gcsafe.}
+proc doOperation(p: pointer, op: TWalkOp) {.gcsafe.}
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp) {.gcsafe.}
 # we need the prototype here for debugging purposes
 
 when hasThreadSupport and hasSharedHeap:
@@ -294,7 +294,7 @@ proc initGC() =
 
 when useMarkForDebug or useBackupGc:
   type
-    TGlobalMarkerProc = proc () {.nimcall.}
+    TGlobalMarkerProc = proc () {.nimcall, gcsafe.}
   var
     globalMarkersLen: int
     globalMarkers: array[0.. 7_000, TGlobalMarkerProc]
@@ -311,7 +311,7 @@ proc cellsetReset(s: var TCellSet) =
   deinit(s)
   init(s)
 
-proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
+proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) {.gcsafe.} =
   var d = cast[TAddress](dest)
   case n.kind
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
@@ -680,10 +680,11 @@ proc doOperation(p: pointer, op: TWalkOp) =
 proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
   doOperation(d, TWalkOp(op))
 
-proc collectZCT(gch: var TGcHeap): bool
+proc collectZCT(gch: var TGcHeap): bool {.gcsafe.}
 
 when useMarkForDebug or useBackupGc:
-  proc markStackAndRegistersForSweep(gch: var TGcHeap) {.noinline, cdecl.}
+  proc markStackAndRegistersForSweep(gch: var TGcHeap) {.noinline, cdecl,
+                                                         gcsafe.}
 
 proc collectRoots(gch: var TGcHeap) =
   for s in elements(gch.cycleRoots):
diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim
index e78a4e5cd..3c99a57e1 100644
--- a/lib/system/gc_ms.nim
+++ b/lib/system/gc_ms.nim
@@ -32,11 +32,11 @@ type
                    # local 
     waMarkPrecise  # fast precise marking
 
-  TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall.}
+  TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall, gcsafe.}
     # A ref type can have a finalizer that is called before the object's
     # storage is freed.
   
-  TGlobalMarkerProc = proc () {.nimcall.}
+  TGlobalMarkerProc = proc () {.nimcall, gcsafe.}
 
   TGcStat = object
     collections: int         # number of performed full collections
@@ -113,11 +113,11 @@ when BitsPerPage mod (sizeof(int)*8) != 0:
   {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
 
 # forward declarations:
-proc collectCT(gch: var TGcHeap)
-proc isOnStack*(p: pointer): bool {.noinline.}
-proc forAllChildren(cell: PCell, op: TWalkOp)
-proc doOperation(p: pointer, op: TWalkOp)
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp)
+proc collectCT(gch: var TGcHeap) {.gcsafe.}
+proc isOnStack*(p: pointer): bool {.noinline, gcsafe.}
+proc forAllChildren(cell: PCell, op: TWalkOp) {.gcsafe.}
+proc doOperation(p: pointer, op: TWalkOp) {.gcsafe.}
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp) {.gcsafe.}
 # we need the prototype here for debugging purposes
 
 proc prepareDealloc(cell: PCell) =
@@ -150,7 +150,7 @@ proc initGC() =
       Init(gch.allocated)
       init(gch.marked)
 
-proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
+proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) {.gcsafe.} =
   var d = cast[TAddress](dest)
   case n.kind
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
diff --git a/lib/system/hti.nim b/lib/system/hti.nim
index 9d8ece7df..64174e60f 100644
--- a/lib/system/hti.nim
+++ b/lib/system/hti.nim
@@ -85,7 +85,7 @@ type
     base: ptr TNimType
     node: ptr TNimNode # valid for tyRecord, tyObject, tyTuple, tyEnum
     finalizer: pointer # the finalizer for the type
-    marker: proc (p: pointer, op: int) {.nimcall.} # marker proc for GC
+    marker: proc (p: pointer, op: int) {.nimcall, gcsafe.} # marker proc for GC
   PNimType = ptr TNimType
   
 # node.len may be the ``first`` element of a set
diff --git a/lib/system/syslocks.nim b/lib/system/syslocks.nim
index 5e3b04b7f..b8ed29cfc 100644
--- a/lib/system/syslocks.nim
+++ b/lib/system/syslocks.nim
@@ -52,7 +52,7 @@ when defined(Windows):
   proc closeHandle(hObject: THandle) {.stdcall, noSideEffect,
     dynlib: "kernel32", importc: "CloseHandle".}
   proc waitForSingleObject(hHandle: THandle, dwMilliseconds: int32): int32 {.
-    stdcall, dynlib: "kernel32", importc: "WaitForSingleObject".}
+    stdcall, dynlib: "kernel32", importc: "WaitForSingleObject", noSideEffect.}
 
   proc signalSysCond(hEvent: TSysCond) {.stdcall, noSideEffect,
     dynlib: "kernel32", importc: "SetEvent".}
@@ -89,16 +89,16 @@ else:
 
   proc releaseSys(L: var TSysLock) {.noSideEffect,
     importc: "pthread_mutex_unlock", header: "<pthread.h>".}
-  proc deinitSys(L: var TSysLock) {.
+  proc deinitSys(L: var TSysLock) {.noSideEffect,
     importc: "pthread_mutex_destroy", header: "<pthread.h>".}
 
   proc initSysCond(cond: var TSysCond, cond_attr: pointer = nil) {.
-    importc: "pthread_cond_init", header: "<pthread.h>".}
+    importc: "pthread_cond_init", header: "<pthread.h>", noSideEffect.}
   proc waitSysCond(cond: var TSysCond, lock: var TSysLock) {.
-    importc: "pthread_cond_wait", header: "<pthread.h>".}
+    importc: "pthread_cond_wait", header: "<pthread.h>", noSideEffect.}
   proc signalSysCond(cond: var TSysCond) {.
-    importc: "pthread_cond_signal", header: "<pthread.h>".}
+    importc: "pthread_cond_signal", header: "<pthread.h>", noSideEffect.}
   
-  proc deinitSysCond(cond: var TSysCond) {.
+  proc deinitSysCond(cond: var TSysCond) {.noSideEffect,
     importc: "pthread_cond_destroy", header: "<pthread.h>".}
   
diff --git a/lib/system/sysspawn.nim b/lib/system/sysspawn.nim
new file mode 100644
index 000000000..3a641aba6
--- /dev/null
+++ b/lib/system/sysspawn.nim
@@ -0,0 +1,172 @@
+# Implements Nimrod's 'spawn'.
+
+{.push stackTrace:off.}
+include system.syslocks
+
+when (defined(x86) or defined(amd64)) and defined(gcc):
+  proc cpuRelax {.inline.} =
+    {.emit: """asm volatile("pause" ::: "memory");""".}
+elif (defined(x86) or defined(amd64)) and defined(vcc):
+  proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".}
+elif defined(intelc):
+  proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".}
+else:
+  from os import sleep
+
+  proc cpuRelax {.inline.} = os.sleep(1)
+
+when defined(windows):
+  proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32
+    {.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.}
+
+  proc cas(p: ptr bool; oldValue, newValue: bool): bool =
+    interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0
+
+else:
+  # this is valid for GCC and Intel C++
+  proc cas(p: ptr bool; oldValue, newValue: bool): bool
+    {.importc: "__sync_bool_compare_and_swap", nodecl.}
+
+# We declare our own condition variables here to get rid of the dummy lock
+# on Windows:
+
+type
+  CondVar = object
+    c: TSysCond
+    when defined(posix):
+      stupidLock: TSysLock
+
+proc createCondVar(): CondVar =
+  initSysCond(result.c)
+  when defined(posix):
+    initSysLock(result.stupidLock)
+    acquireSys(result.stupidLock)
+
+proc await(cv: var CondVar) =
+  when defined(posix):
+    waitSysCond(cv.c, cv.stupidLock)
+  else:
+    waitSysCondWindows(cv.c)
+
+proc signal(cv: var CondVar) = signalSysCond(cv.c)
+
+type
+  FastCondVar = object
+    event, slowPath: bool
+    slow: CondVar
+
+proc createFastCondVar(): FastCondVar =
+  initSysCond(result.slow.c)
+  when defined(posix):
+    initSysLock(result.slow.stupidLock)
+    acquireSys(result.slow.stupidLock)
+  result.event = false
+  result.slowPath = false
+
+proc await(cv: var FastCondVar) =
+  #for i in 0 .. 50:
+  #  if cas(addr cv.event, true, false):
+  #    # this is a HIT: Triggers > 95% in my tests.
+  #    return
+  #  cpuRelax()
+  #cv.slowPath = true
+  await(cv.slow)
+  cv.event = false
+
+proc signal(cv: var FastCondVar) =
+  cv.event = true
+  #if cas(addr cv.slowPath, true, false):
+  signal(cv.slow)
+
+{.pop.}
+
+# ----------------------------------------------------------------------------
+
+type
+  WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.}
+  Worker = object
+    taskArrived: CondVar
+    taskStarted: FastCondVar #\
+    # task data:
+    f: WorkerProc
+    data: pointer
+    ready: bool # put it here for correct alignment!
+
+proc nimArgsPassingDone(p: pointer) {.compilerProc.} =
+  let w = cast[ptr Worker](p)
+  signal(w.taskStarted)
+
+var gSomeReady = createFastCondVar()
+
+proc slave(w: ptr Worker) {.thread.} =
+  while true:
+    w.ready = true # If we instead signal "workerReady" we need the scheduler
+                   # to notice this. The scheduler could then optimize the
+                   # layout of the worker threads (e.g. keep the list sorted)
+                   # so that no search for a "ready" thread is necessary.
+                   # This might be implemented later, but is more tricky than
+                   # it looks because 'spawn' itself can run concurrently.
+    signal(gSomeReady)
+    await(w.taskArrived)
+    assert(not w.ready)
+    if w.data != nil:
+      w.f(w, w.data)
+      w.data = nil
+
+const NumThreads = 4
+
+var
+  workers: array[NumThreads, TThread[ptr Worker]]
+  workersData: array[NumThreads, Worker]
+
+proc setup() =
+  for i in 0.. <NumThreads:
+    workersData[i].taskArrived = createCondVar()
+    workersData[i].taskStarted = createFastCondVar()
+    createThread(workers[i], slave, addr(workersData[i]))
+
+proc preferSpawn*(): bool =
+  ## Use this proc to determine quickly if a 'spawn' or a direct call is
+  ## preferable. If it returns 'true' a 'spawn' may make sense. In general
+  ## it is not necessary to call this directly; use 'spawnX' instead.
+  result = gSomeReady.event
+
+proc spawn*(call: stmt) {.magic: "Spawn".}
+  ## always spawns a new task, so that the 'call' is never executed on
+  ## the calling thread. 'call' has to be proc call 'p(...)' where 'p'
+  ## is gcsafe and has 'void' as the return type.
+
+template spawnX*(call: stmt) =
+  ## spawns a new task if a CPU core is ready, otherwise executes the
+  ## call in the calling thread. Usually it is advised to
+  ## use 'spawn' in order to not block the producer for an unknown
+  ## amount of time. 'call' has to be proc call 'p(...)' where 'p'
+  ## is gcsafe and has 'void' as the return type.
+  if preferSpawn(): spawn call
+  else: call
+
+proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} =
+  # implementation of 'spawn' that is used by the code generator.
+  while true:
+    for i in 0.. high(workers):
+      let w = addr(workersData[i])
+      if cas(addr w.ready, true, false):
+        w.data = data
+        w.f = fn
+        signal(w.taskArrived)
+        await(w.taskStarted)
+        return
+    await(gSomeReady)
+
+proc sync*() =
+  ## a simple barrier to wait for all spawn'ed tasks. If you need more elaborate
+  ## waiting, you have to use an explicit barrier.
+  while true:
+    var allReady = true
+    for i in 0 .. high(workers):
+      if not allReady: break
+      allReady = allReady and workersData[i].ready
+    if allReady: break
+    await(gSomeReady)
+
+setup()