summary refs log tree commit diff stats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rwxr-xr-xlib/core/threads.nim227
-rwxr-xr-xlib/system.nim3
-rwxr-xr-xlib/system/alloc.nim25
-rwxr-xr-xlib/system/cellsets.nim22
-rwxr-xr-xlib/system/excpt.nim33
-rwxr-xr-xlib/system/gc.nim26
-rwxr-xr-xlib/system/mmdisp.nim19
-rwxr-xr-xlib/system/repr.nim4
-rwxr-xr-xlib/system/systhread.nim6
9 files changed, 191 insertions, 174 deletions
diff --git a/lib/core/threads.nim b/lib/core/threads.nim
index f6ebb40dd..77253af20 100755
--- a/lib/core/threads.nim
+++ b/lib/core/threads.nim
@@ -8,8 +8,9 @@
 #
 
 ## Basic thread support for Nimrod. Note that Nimrod's default GC is still
-## single-threaded. This means that either your threads should not allocate
-## GC'ed memory, or you should compile with ``--gc:none`` or ``--gc:boehm``.
+## single-threaded. This means that you MUST turn off the GC while multiple
+## threads are executing that allocate GC'ed memory. The alternative is to
+## compile with ``--gc:none`` or ``--gc:boehm``.
 ##
 ## Example:
 ##
@@ -27,16 +28,20 @@
 ##
 ##  InitLock(L)
 ##
+##  GC_disable() # native GC does not support multiple thready yet :-(
 ##  for i in 0..high(thr):
 ##    createThread(thr[i], threadFunc, (i*10, i*10+5))
 ##  for i in 0..high(thr):
 ##    joinThread(thr[i])
+##  GC_enable()
 
 when not compileOption("threads"):
   {.error: "Thread support requires ``--threads:on`` commandline switch".}
 
 when not defined(boehmgc) and not defined(nogc) and false:
   {.error: "Thread support requires --gc:boehm or --gc:none".}
+  
+include "lib/system/systhread"
 
 # We jump through some hops here to ensure that Nimrod thread procs can have
 # the Nimrod calling convention. This is needed because thread procs are 
@@ -93,6 +98,10 @@ when defined(windows):
 else:
   type
     TSysThread {.importc: "pthread_t", header: "<sys/types.h>".} = int
+    Ttimespec {.importc: "struct timespec",
+                header: "<time.h>", final, pure.} = object
+      tv_sec: int
+      tv_nsec: int
 
   proc pthread_create(a1: var TSysThread, a2: ptr int,
             a3: proc (x: pointer) {.noconv.}, 
@@ -104,6 +113,17 @@ else:
   proc pthread_cancel(a1: TSysThread): cint {.
     importc: "pthread_cancel", header: "<pthread.h>".}
 
+  proc AquireSysTimeoutAux(L: var TSysLock, timeout: var Ttimespec): cint {.
+    importc: "pthread_mutex_timedlock", header: "<time.h>".}
+
+  proc AquireSysTimeout(L: var TSysLock, msTimeout: int) {.inline.} =
+    var a: Ttimespec
+    a.tv_sec = msTimeout div 1000
+    a.tv_nsec = (msTimeout mod 1000) * 1000
+    var res = AquireSysTimeoutAux(L, a)
+    if res != 0'i32:
+      raise newException(EResourceExhausted, $strerror(res))
+
   {.push stack_trace:off.}
   proc threadProcWrapper[TParam](closure: pointer) {.noconv.} = 
     var c = cast[ptr TThreadProcClosure[TParam]](closure)
@@ -114,121 +134,119 @@ else:
 
 const
   noDeadlocks = false # compileOption("deadlockPrevention")
-  
-include "lib/system/systhread"
 
-when noDeadLocks:
-  type
-    TLock* {.pure, final.} = object ## Standard Nimrod Lock type.
-      key: int       # used for identity and global order!
-      sys: TSysLock
-      next: ptr TLock
-else:
-  type 
-    TLock* = TSysLock    
-    
 type
+  TLock* = TSysLock
   TThread* {.pure, final.}[TParam] = object ## Nimrod thread.
     sys: TSysThread
     c: TThreadProcClosure[TParam]
 
 when nodeadlocks:
-  var 
-    lockList {.threadvar.}: ptr TLock
+  var
     deadlocksPrevented* = 0  ## counts the number of times a 
                              ## deadlock has been prevented
 
-proc InitLock*(L: var TLock) {.inline.} =
-  ## Initializes the lock `L`.
-  when noDeadlocks:
-    InitSysLock(L.sys)
-    L.key = cast[int](addr(L))
-  else:
-    InitSysLock(L)
+proc InitLock*(lock: var TLock) {.inline.} =
+  ## Initializes the lock `lock`.
+  InitSysLock(lock)
+
+proc OrderedLocks(g: PGlobals): bool = 
+  for i in 0 .. g.locksLen-2:
+    if g.locks[i] >= g.locks[i+1]: return false
+  result = true
 
-proc TryAquire*(L: var TLock): bool {.inline.} = 
-  ## Try to aquires the lock `L`. Returns `true` on success.
+proc TryAquire*(lock: var TLock): bool {.inline.} = 
+  ## Try to aquires the lock `lock`. Returns `true` on success.
   when noDeadlocks:
-    result = TryAquireSys(L.sys)
+    result = TryAquireSys(lock)
+    if not result: return
+    # we have to add it to the ordered list. Oh, and we might fail if there#
+    # there is no space in the array left ...
+    var g = GetGlobals()
+    if g.locksLen >= len(g.locks):
+      ReleaseSys(lock)
+      raise newException(EResourceExhausted, "cannot aquire additional lock")
+    # find the position to add:
+    var p = addr(lock)
+    var L = g.locksLen-1
+    var i = 0
+    while i <= L:
+      assert g.locks[i] != nil
+      if g.locks[i] < p: inc(i) # in correct order
+      elif g.locks[i] == p: return # thread already holds lock
+      else:
+        # do the crazy stuff here:
+        while L >= i:
+          g.locks[L+1] = g.locks[L]
+          dec L
+        g.locks[i] = p
+        inc(g.locksLen)
+        assert OrderedLocks(g)
+        return
+    # simply add to the end:
+    g.locks[g.locksLen] = p
+    inc(g.locksLen)
+    assert OrderedLocks(g)
   else:
-    result = TryAquireSys(L)
+    result = TryAquireSys(lock)
 
-proc Aquire*(L: var TLock) =
-  ## Aquires the lock `L`.
+proc Aquire*(lock: var TLock) =
+  ## Aquires the lock `lock`.
   when nodeadlocks:
-    # Note: we MUST NOT change the linked list of locks before we have aquired
-    # the proper locks! This is because the pointer to the next lock is part
-    # of the lock itself!
-    assert L.key != 0
-    var p = lockList
-    if p == nil:
-      # simple case: no lock aquired yet:
-      AquireSys(L.sys)
-      locklist = addr(L)
-      L.next = nil
-    else:
-      # check where to put L into the list:
-      var r = p
-      var last: ptr TLock = nil
-      while L.key < r.key: 
-        if r.next == nil: 
-          # best case: L needs to be aquired as last lock, so we can 
-          # skip a good amount of work: 
-          AquireSys(L.sys)
-          r.next = addr(L)
-          L.next = nil
-          return
-        last = r
-        r = r.next
-      # special case: thread already holds L!
-      if L.key == r.key: return
-      
-      # bad case: L needs to be somewhere in between
-      # release all locks after L: 
-      var rollback = r
-      while r != nil:
-        ReleaseSys(r.sys)
-        r = r.next
-      # and aquire them in the correct order again:
-      AquireSys(L.sys)
-      r = rollback
-      while r != nil:
-        assert r.key < L.key
-        AquireSys(r.sys)
-        r = r.next
-      # now that we have all the locks we need, we can insert L 
-      # into our list:
-      if last != nil:
-        L.next = last.next
-        last.next = addr(L)
+    var g = GetGlobals()
+    var p = addr(lock)
+    var L = g.locksLen-1
+    var i = 0
+    while i <= L:
+      assert g.locks[i] != nil
+      if g.locks[i] < p: inc(i) # in correct order
+      elif g.locks[i] == p: return # thread already holds lock
       else:
-        L.next = lockList
-        lockList = addr(L)
-      inc(deadlocksPrevented)
+        # do the crazy stuff here:
+        if g.locksLen >= len(g.locks):
+          raise newException(EResourceExhausted, "cannot aquire additional lock")
+        while L >= i:
+          ReleaseSys(cast[ptr TSysLock](g.locks[L])[])
+          g.locks[L+1] = g.locks[L]
+          dec L
+        # aquire the current lock:
+        AquireSys(lock)
+        g.locks[i] = p
+        inc(g.locksLen)
+        # aquire old locks in proper order again:
+        L = g.locksLen-1
+        inc i
+        while i <= L:
+          AquireSys(cast[ptr TSysLock](g.locks[i])[])
+          inc(i)
+        # DANGER: We can only modify this global var if we gained every lock!
+        # NO! We need an atomic increment. Crap.
+        discard system.atomicInc(deadlocksPrevented, 1)
+        assert OrderedLocks(g)
+        return
+        
+    # simply add to the end:
+    if g.locksLen >= len(g.locks):
+      raise newException(EResourceExhausted, "cannot aquire additional lock")
+    AquireSys(lock)
+    g.locks[g.locksLen] = p
+    inc(g.locksLen)
+    assert OrderedLocks(g)
   else:
-    AquireSys(L)
+    AquireSys(lock)
   
-proc Release*(L: var TLock) =
-  ## Releases the lock `L`.
+proc Release*(lock: var TLock) =
+  ## Releases the lock `lock`.
   when nodeadlocks:
-    assert L.key != 0
-    var p = lockList
-    var last: ptr TLock = nil
-    while true:
-      # if we don't find the lock, die by reading from nil!
-      if p.key == L.key: 
-        if last != nil:
-          last.next = p.next
-        else:
-          assert p == lockList
-          lockList = locklist.next
-        L.next = nil
+    var g = GetGlobals()
+    var p = addr(lock)
+    var L = g.locksLen
+    for i in countdown(L-1, 0):
+      if g.locks[i] == p: 
+        for j in i..L-2: g.locks[j] = g.locks[j+1]
+        dec g.locksLen
         break
-      last = p
-      p = p.next
-    ReleaseSys(L.sys)
-  else:
-    ReleaseSys(L)
+  ReleaseSys(lock)
 
 proc joinThread*[TParam](t: TThread[TParam]) {.inline.} = 
   ## waits for the thread `t` until it has terminated.
@@ -257,7 +275,7 @@ proc createThread*[TParam](t: var TThread[TParam],
     var dummyThreadId: int32
     t.sys = CreateThread(nil, 0'i32, threadProcWrapper[TParam], 
                          addr(t.c), 0'i32, dummyThreadId)
-  else: 
+  else:
     if pthread_create(t.sys, nil, threadProcWrapper[TParam], addr(t.c)) != 0:
       raise newException(EIO, "cannot create thread")
 
@@ -265,12 +283,12 @@ when isMainModule:
   import os
   
   var
-    thr: array [0..1, TThread[tuple[a,b: int]]]
+    thr: array [0..5, TThread[tuple[a, b: int]]]
     L, M, N: TLock
   
   proc doNothing() = nil
   
-  proc threadFunc(interval: tuple[a,b: int]) {.procvar.} = 
+  proc threadFunc(interval: tuple[a, b: int]) {.procvar.} = 
     doNothing()
     for i in interval.a..interval.b: 
       when nodeadlocks:
@@ -302,16 +320,15 @@ when isMainModule:
         else: assert false
       else:
         Aquire(L) # lock stdout
-        Aquire(M)
-        Aquire(N)
         
       echo i
       os.sleep(10)
       when nodeadlocks:
         echo "deadlocks prevented: ", deadlocksPrevented
+      when nodeadlocks:
+        Release(N)
+        Release(M)
       Release(L)
-      Release(M)
-      Release(N)
 
   InitLock(L)
   InitLock(M)
@@ -323,5 +340,7 @@ when isMainModule:
     for i in 0..high(thr):
       joinThread(thr[i])
 
+  GC_disable() 
   main()
+  GC_enable()
 
diff --git a/lib/system.nim b/lib/system.nim
index b8093cdad..9a9e4fb06 100755
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -1461,7 +1461,8 @@ when not defined(EcmaScript) and not defined(NimrodVM):
     # Linux 64bit system. Very strange, but we are at the will of GCC's 
     # optimizer...
     var locals {.volatile.}: pointer
-    setStackBottom(addr(locals))
+    locals = addr(locals)
+    setStackBottom(locals)
 
   var
     strDesc: TNimType
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim
index c385aa6fe..2280415e1 100755
--- a/lib/system/alloc.nim
+++ b/lib/system/alloc.nim
@@ -525,22 +525,39 @@ proc isAllocatedPtr(a: TAllocator, p: pointer): bool =
 # ---------------------- interface to programs -------------------------------
 
 when not defined(useNimRtl):
-  proc alloc(size: int): pointer =
+  var heapLock: TSysLock
+  InitSysLock(HeapLock)
+
+  proc unlockedAlloc(size: int): pointer {.inline.} =
     result = rawAlloc(allocator, size+sizeof(TFreeCell))
     cast[ptr TFreeCell](result).zeroField = 1 # mark it as used
     assert(not isAllocatedPtr(allocator, result))
     result = cast[pointer](cast[TAddress](result) +% sizeof(TFreeCell))
 
-  proc alloc0(size: int): pointer =
-    result = alloc(size)
+  proc unlockedAlloc0(size: int): pointer {.inline.} =
+    result = unlockedAlloc(size)
     zeroMem(result, size)
 
-  proc dealloc(p: pointer) =
+  proc unlockedDealloc(p: pointer) {.inline.} =
     var x = cast[pointer](cast[TAddress](p) -% sizeof(TFreeCell))
     assert(cast[ptr TFreeCell](x).zeroField == 1)
     rawDealloc(allocator, x)
     assert(not isAllocatedPtr(allocator, x))
 
+  proc alloc(size: int): pointer =
+    when hasThreadSupport: AquireSys(HeapLock)
+    result = unlockedAlloc(size)
+    when hasThreadSupport: ReleaseSys(HeapLock)
+
+  proc alloc0(size: int): pointer =
+    result = alloc(size)
+    zeroMem(result, size)
+
+  proc dealloc(p: pointer) =
+    when hasThreadSupport: AquireSys(HeapLock)
+    unlockedDealloc(p)
+    when hasThreadSupport: ReleaseSys(HeapLock)
+
   proc ptrSize(p: pointer): int =
     var x = cast[pointer](cast[TAddress](p) -% sizeof(TFreeCell))
     result = pageAddr(x).size - sizeof(TFreeCell)
diff --git a/lib/system/cellsets.nim b/lib/system/cellsets.nim
index 0ce83864c..e262d4b77 100755
--- a/lib/system/cellsets.nim
+++ b/lib/system/cellsets.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nimrod's Runtime Library
-#        (c) Copyright 2009 Andreas Rumpf
+#        (c) Copyright 2011 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -47,9 +47,9 @@ proc contains(s: TCellSeq, c: PCell): bool {.inline.} =
 proc add(s: var TCellSeq, c: PCell) {.inline.} =
   if s.len >= s.cap:
     s.cap = s.cap * 3 div 2
-    var d = cast[PCellArray](alloc(s.cap * sizeof(PCell)))
+    var d = cast[PCellArray](unlockedAlloc(s.cap * sizeof(PCell)))
     copyMem(d, s.d, s.len * sizeof(PCell))
-    dealloc(s.d)
+    unlockedDealloc(s.d)
     s.d = d
     # XXX: realloc?
   s.d[s.len] = c
@@ -58,10 +58,10 @@ proc add(s: var TCellSeq, c: PCell) {.inline.} =
 proc init(s: var TCellSeq, cap: int = 1024) =
   s.len = 0
   s.cap = cap
-  s.d = cast[PCellArray](alloc0(cap * sizeof(PCell)))
+  s.d = cast[PCellArray](unlockedAlloc0(cap * sizeof(PCell)))
 
 proc deinit(s: var TCellSeq) = 
-  dealloc(s.d)
+  unlockedDealloc(s.d)
   s.d = nil
   s.len = 0
   s.cap = 0
@@ -70,7 +70,7 @@ const
   InitCellSetSize = 1024 # must be a power of two!
 
 proc Init(s: var TCellSet) =
-  s.data = cast[PPageDescArray](alloc0(InitCellSetSize * sizeof(PPageDesc)))
+  s.data = cast[PPageDescArray](unlockedAlloc0(InitCellSetSize * sizeof(PPageDesc)))
   s.max = InitCellSetSize-1
   s.counter = 0
   s.head = nil
@@ -79,10 +79,10 @@ proc Deinit(s: var TCellSet) =
   var it = s.head
   while it != nil:
     var n = it.next
-    dealloc(it)
+    unlockedDealloc(it)
     it = n
   s.head = nil # play it safe here
-  dealloc(s.data)
+  unlockedDealloc(s.data)
   s.data = nil
   s.counter = 0
 
@@ -110,11 +110,11 @@ proc CellSetRawInsert(t: TCellSet, data: PPageDescArray, desc: PPageDesc) =
 proc CellSetEnlarge(t: var TCellSet) =
   var oldMax = t.max
   t.max = ((t.max+1)*2)-1
-  var n = cast[PPageDescArray](alloc0((t.max + 1) * sizeof(PPageDesc)))
+  var n = cast[PPageDescArray](unlockedAlloc0((t.max + 1) * sizeof(PPageDesc)))
   for i in 0 .. oldmax:
     if t.data[i] != nil:
       CellSetRawInsert(t, n, t.data[i])
-  dealloc(t.data)
+  unlockedDealloc(t.data)
   t.data = n
 
 proc CellSetPut(t: var TCellSet, key: TAddress): PPageDesc =
@@ -132,7 +132,7 @@ proc CellSetPut(t: var TCellSet, key: TAddress): PPageDesc =
   while t.data[h] != nil: h = nextTry(h, t.max)
   assert(t.data[h] == nil)
   # the new page descriptor goes into result
-  result = cast[PPageDesc](alloc0(sizeof(TPageDesc)))
+  result = cast[PPageDesc](unlockedAlloc0(sizeof(TPageDesc)))
   result.next = t.head
   result.key = key
   t.head = result
diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim
index 01604b2c9..12069ae37 100755
--- a/lib/system/excpt.nim
+++ b/lib/system/excpt.nim
@@ -10,6 +10,9 @@
 # Exception handling code. This is difficult because it has
 # to work if there is no more memory (but it doesn't yet!).
 
+const
+  MaxLocksPerThread = 10
+
 var
   stackTraceNewLine* = "\n" ## undocumented feature; it is replaced by ``<br>``
                             ## for CGI applications
@@ -81,11 +84,9 @@ when hasThreadSupport:
     proc pthread_setspecific(a1: Tpthread_key, a2: pointer): int32 {.
       importc: "pthread_setspecific", header: "<pthread.h>".}
     
-    proc specificDestroy(mem: pointer) {.noconv.} = 
-      #aquireSys(heapLock)
-      #dealloc(mem)
-      #releaseSys(heapLock)
-      #c_free(mem)
+    proc specificDestroy(mem: pointer) {.noconv.} =
+      # we really need a thread-safe 'dealloc' here:
+      dealloc(mem)
 
     proc ThreadVarAlloc(): TThreadVarSlot {.compilerproc, inline.} =
       discard pthread_key_create(addr(result), specificDestroy)
@@ -96,10 +97,12 @@ when hasThreadSupport:
       result = pthread_getspecific(s)
       
   type
-    TGlobals {.final, pure.} = object
+    TGlobals* {.final, pure.} = object
       excHandler: PSafePoint
       currException: ref E_Base
       framePtr: PFrame
+      locksLen*: int
+      locks*: array [0..MaxLocksPerThread-1, pointer]
       buf: string       # cannot be allocated on the stack!
       assertBuf: string # we need a different buffer for
                         # assert, as it raises an exception and
@@ -107,22 +110,16 @@ when hasThreadSupport:
       gAssertionFailed: ref EAssertionFailed
       tempFrames: array [0..127, PFrame] # cannot be allocated on the stack!
       data: float # compiler should add thread local variables here!
-    PGlobals = ptr TGlobals
+    PGlobals* = ptr TGlobals
   
   # it's more efficient to not use a global variable for the thread storage 
   # slot, but to rely on the implementation to assign slot 0 for us... ;-)
-  var globalsSlot = ThreadVarAlloc()
-  #const globalsSlot = TThreadVarSlot(0)
-  #assert checkSlot.int == globalsSlot.int
-
-  proc AtomicAlloc0(size: int): pointer =
-    #AquireSys(heapLock)
-    result = c_malloc(size)
-    zeroMem(result, size)
-    #ReleaseSys(heapLock)
+  var checkSlot = ThreadVarAlloc()
+  const globalsSlot = TThreadVarSlot(0)
+  assert checkSlot.int == globalsSlot.int
 
   proc NewGlobals(): PGlobals = 
-    result = cast[PGlobals](AtomicAlloc0(sizeof(TGlobals)))
+    result = cast[PGlobals](alloc0(sizeof(TGlobals)))
     new(result.gAssertionFailed)
     result.buf = newStringOfCap(2000)
     result.assertBuf = newStringOfCap(2000)
@@ -134,7 +131,7 @@ when hasThreadSupport:
   proc SetThreadLocalStorage*(p: pointer) {.inl.} =
     ThreadVarSetValue(globalsSlot, p)
     
-  proc GetGlobals(): PGlobals {.compilerRtl, inl.} =
+  proc GetGlobals*(): PGlobals {.compilerRtl, inl.} =
     result = cast[PGlobals](ThreadVarGetValue(globalsSlot))
 
   # create for the main thread:
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index 15d2df70e..ab8f19674 100755
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -61,9 +61,6 @@ type
     decStack: TCellSeq       # cells in the stack that are to decref again
     cycleRoots: TCellSet
     tempStack: TCellSeq      # temporary stack for recursion elimination
-    when hasThreadSupport:
-      cycleRootsLock: TSysLock
-      zctLock: TSysLock
     stat: TGcStat
 
 var
@@ -80,13 +77,11 @@ var
 
 proc aquire(gch: var TGcHeap) {.inline.} = 
   when hasThreadSupport:
-    aquireSys(gch.zctLock)
-    aquireSys(gch.cycleRootsLock)
+    AquireSys(HeapLock)
 
 proc release(gch: var TGcHeap) {.inline.} = 
   when hasThreadSupport:
-    releaseSys(gch.cycleRootsLock)
-    releaseSys(gch.zctLock)
+    releaseSys(HeapLock)
 
 proc addZCT(s: var TCellSeq, c: PCell) {.noinline.} =
   if (c.refcount and rcZct) == 0:
@@ -205,18 +200,18 @@ proc prepareDealloc(cell: PCell) =
 proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} = 
   # we MUST access gch as a global here, because this crosses DLL boundaries!
   when hasThreadSupport:
-    AquireSys(gch.cycleRootsLock)
+    AquireSys(HeapLock)
   incl(gch.cycleRoots, c)
   when hasThreadSupport:  
-    ReleaseSys(gch.cycleRootsLock)
+    ReleaseSys(HeapLock)
 
 proc rtlAddZCT(c: PCell) {.rtl, inl.} =
   # we MUST access gch as a global here, because this crosses DLL boundaries!
   when hasThreadSupport:
-    AquireSys(gch.zctLock)
+    AquireSys(HeapLock)
   addZCT(gch.zct, c)
   when hasThreadSupport:
-    ReleaseSys(gch.zctLock)
+    ReleaseSys(HeapLock)
 
 proc decRef(c: PCell) {.inline.} =
   when stressGC:
@@ -284,11 +279,7 @@ proc initGC() =
     init(gch.tempStack)
     Init(gch.cycleRoots)
     Init(gch.decStack)
-    when hasThreadSupport:
-      InitSysLock(gch.cycleRootsLock)
-      InitSysLock(gch.zctLock)
     new(gOutOfMem) # reserve space for the EOutOfMemory exception here!
-    
 
 proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
   var d = cast[TAddress](dest)
@@ -690,10 +681,11 @@ proc unmarkStackAndRegisters(gch: var TGcHeap) =
   var d = gch.decStack.d
   for i in 0..gch.decStack.len-1:
     assert isAllocatedPtr(allocator, d[i])
-    # decRef(d[i]) inlined: cannot create a cycle
+    # decRef(d[i]) inlined: cannot create a cycle and must not aquire lock
     var c = d[i]
+    # XXX no need for an atomic dec here:
     if atomicDec(c.refcount, rcIncrement) <% rcIncrement:
-      rtlAddZCT(c)
+      addZCT(gch.zct, c)
     assert c.typ != nil
   gch.decStack.len = 0
 
diff --git a/lib/system/mmdisp.nim b/lib/system/mmdisp.nim
index f56bb233f..272a2c626 100755
--- a/lib/system/mmdisp.nim
+++ b/lib/system/mmdisp.nim
@@ -97,6 +97,10 @@ when defined(boehmgc):
   proc dealloc(p: Pointer) =
     boehmDealloc(p)
 
+  proc unlockedAlloc(size: int): pointer {.inline.} = result = alloc(size)
+  proc unlockedAlloc0(size: int): pointer {.inline.} = result = alloc0(size)
+  proc unlockedDealloc(p: pointer) {.inline.} = dealloc(p)
+
   proc initGC() = 
     when defined(macosx): boehmGCinit()
   
@@ -148,21 +152,6 @@ elif defined(nogc):
   
   include "system/alloc"
 
-  when false:
-    proc alloc(size: int): pointer =
-      result = c_malloc(size)
-      if result == nil: raiseOutOfMem()
-    proc alloc0(size: int): pointer =
-      result = alloc(size)
-      zeroMem(result, size)
-    proc realloc(p: Pointer, newsize: int): pointer =
-      result = c_realloc(p, newsize)
-      if result == nil: raiseOutOfMem()
-    proc dealloc(p: Pointer) = c_free(p)
-    proc getOccupiedMem(): int = return -1
-    proc getFreeMem(): int = return -1
-    proc getTotalMem(): int = return -1
-
   proc initGC() = nil
   proc GC_disable() = nil
   proc GC_enable() = nil
diff --git a/lib/system/repr.nim b/lib/system/repr.nim
index 9464ff3d8..395adc2ca 100755
--- a/lib/system/repr.nim
+++ b/lib/system/repr.nim
@@ -116,12 +116,16 @@ type
 
 when not defined(useNimRtl):
   proc initReprClosure(cl: var TReprClosure) =
+    # Important: cellsets does not lock the heap when doing allocations! We
+    # have to do it here ...
+    when hasThreadSupport and defined(heapLock): AquireSys(HeapLock)
     Init(cl.marked)
     cl.recdepth = -1      # default is to display everything!
     cl.indent = 0
 
   proc deinitReprClosure(cl: var TReprClosure) =
     Deinit(cl.marked)
+    when hasThreadSupport and defined(heapLock): ReleaseSys(HeapLock)
 
   proc reprBreak(result: var string, cl: TReprClosure) =
     add result, "\n"
diff --git a/lib/system/systhread.nim b/lib/system/systhread.nim
index c497cc961..c83062942 100755
--- a/lib/system/systhread.nim
+++ b/lib/system/systhread.nim
@@ -15,6 +15,8 @@ when not SystemInclude:
   # ugly hack: this file is then included from core/threads, so we have
   # thread support:
   const hasThreadSupport = true
+  
+  include "lib/system/ansi_c"
 
 when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport:
   proc sync_add_and_fetch(p: var int, val: int): int {.
@@ -94,7 +96,3 @@ else:
   proc ReleaseSys(L: var TSysLock) {.
     importc: "pthread_mutex_unlock", header: "<pthread.h>".}
 
-when SystemInclude:
-  var heapLock: TSysLock
-  InitSysLock(HeapLock)
-