summary refs log tree commit diff stats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rwxr-xr-xlib/nimbase.h11
-rwxr-xr-xlib/pure/streams.nim8
-rwxr-xr-xlib/system.nim20
-rwxr-xr-xlib/system/cellsets.nim6
-rwxr-xr-xlib/system/gc.nim1034
-rwxr-xr-xlib/system/hti.nim43
-rwxr-xr-xlib/system/mmdisp.nim6
-rw-r--r--lib/system/oldgc.nim1044
-rwxr-xr-xlib/system/sysstr.nim15
-rw-r--r--lib/system/timers.nim7
-rwxr-xr-xlib/wrappers/openssl.nim6
11 files changed, 1883 insertions, 317 deletions
diff --git a/lib/nimbase.h b/lib/nimbase.h
index 7fb70a60c..573ad16f7 100755
--- a/lib/nimbase.h
+++ b/lib/nimbase.h
@@ -436,6 +436,17 @@ struct TFrame {
   NI len;
 };
 
+#define nimfr(proc, file) \
+  volatile TFrame F; \
+  F.procname = proc; F.filename = file; F.line = 0; F.len = 0; pushFrame(&F);
+
+#define nimfrs(proc, file, slots) \
+  volatile struct {TFrame* prev;NCSTRING procname;NI line;NCSTRING filename; NI len; TVarSlot s[slots];} F; \
+  F.procname = proc; F.filename = file; F.line = 0; F.len = slots; pushFrame((TFrame*)&F);
+
+#define nimln(n, file) \
+  F.line = n; F.filename = file;
+
 #define NIM_POSIX_INIT  __attribute__((constructor)) 
 
 #if defined(_MSCVER) && defined(__i386__)
diff --git a/lib/pure/streams.nim b/lib/pure/streams.nim
index 212aab493..232205ebd 100755
--- a/lib/pure/streams.nim
+++ b/lib/pure/streams.nim
@@ -180,8 +180,12 @@ proc readLine*(s: PStream): TaintedString =
     if c == '\c': 
       c = readChar(s)
       break
-    elif c == '\L' or c == '\0': break
-    result.string.add(c)
+    if c == '\b':
+      result.string.setLen(result.len - 1)
+    elif c == '\L' or c == '\0':
+      break
+    else:
+      result.string.add(c)
 
 type
   PStringStream* = ref TStringStream ## a stream that encapsulates a string
diff --git a/lib/system.nim b/lib/system.nim
index 892f4f8c5..b4c265f62 100755
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -186,9 +186,9 @@ when not defined(niminheritable):
 
 when not defined(EcmaScript) and not defined(NimrodVM):
   type
-    TGenericSeq {.compilerproc, pure, inheritable.} = object
+    TGenericSeq* {.compilerproc, pure, inheritable.} = object
       len, reserved: int
-    PGenericSeq {.exportc.} = ptr TGenericSeq
+    PGenericSeq* {.exportc.} = ptr TGenericSeq
     # len and space without counting the terminating zero:
     NimStringDesc {.compilerproc, final.} = object of TGenericSeq
       data: array[0..100_000_000, char]
@@ -565,7 +565,7 @@ proc abs*(x: int64): int64 {.magic: "AbsI64", noSideEffect.}
   ## checking is turned on).
 
 type
-  IntMax32 = int|int8|int16|int32
+  IntMax32 = bool|int|int8|int16|int32
 
 proc `+%` *(x, y: IntMax32): IntMax32 {.magic: "AddU", noSideEffect.}
 proc `+%` *(x, y: Int64): Int64 {.magic: "AddU", noSideEffect.}
@@ -2486,3 +2486,17 @@ proc compiles*(x: expr): bool {.magic: "Compiles", noSideEffect.} =
 
 when defined(initDebugger):
   initDebugger()
+
+# XXX: make these the default (or implement the NilObject optimization)
+proc safeAdd*[T](x: var seq[T], y: T) {.noSideEffect.} =
+  if x == nil: x = @[y]
+  else: x.add(y)
+
+proc safeAdd*(x: var string, y: char) =
+  if x == nil: x = ""
+  x.add(y)
+
+proc safeAdd*(x: var string, y: string) =
+  if x == nil: x = y
+  else: x.add(y)
+
diff --git a/lib/system/cellsets.nim b/lib/system/cellsets.nim
index b860ef38c..5de4ca811 100755
--- a/lib/system/cellsets.nim
+++ b/lib/system/cellsets.nim
@@ -10,10 +10,12 @@
 # Efficient set of pointers for the GC (and repr)
 
 type
+  TRefCount = int
+
   TCell {.pure.} = object
-    refcount: int  # the refcount and some flags
+    refcount: TRefCount  # the refcount and some flags
     typ: PNimType
-    when leakDetector:
+    when trackAllocationSource:
       filename: cstring
       line: int
 
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index ec656e0ef..bc3474adf 100755
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -32,20 +32,73 @@ when defined(memProfiler):
   proc nimProfile(requestedSize: int)
 
 const
-  rcIncrement = 0b1000 # so that lowest 3 bits are not touched
-  # NOTE: Most colors are currently unused
-  rcBlack = 0b000  # cell is colored black; in use or free
-  rcGray = 0b001   # possible member of a cycle
-  rcWhite = 0b010  # member of a garbage cycle
-  rcPurple = 0b011 # possible root of a cycle
-  rcZct = 0b100    # in ZCT
-  rcRed = 0b101    # Candidate cycle undergoing sigma-computation
-  rcOrange = 0b110 # Candidate cycle awaiting epoch boundary
-  rcShift = 3      # shift by rcShift to get the reference counter
-  colorMask = 0b111
+  rcShift = 6 # the reference count is shifted so we can use
+              # the least significat bits for additinal flags:
+
+  rcAlive = 0b00000           # object is reachable.
+                              # color *black* in the original paper
+                              
+  rcCycleCandidate = 0b00001  # possible root of a cycle. *purple*
+
+  rcDecRefApplied = 0b00010   # the first dec-ref phase of the
+                              # collector was already applied to this
+                              # object. *gray*
+                              
+  rcMaybeDead = 0b00011       # this object is a candidate for deletion
+                              # during the collect cycles algorithm.
+                              # *white*.
+                              
+  rcReallyDead = 0b00100      # this is proved to be garbage
+  
+  rcRetiredBuffer = 0b00101   # this is a seq or string buffer that
+                              # was replaced by a resize operation.
+                              # see growObj for details
+
+  rcColorMask = TRefCount(0b00111)
+
+  rcZct = 0b01000             # already added to ZCT
+  rcInCycleRoots = 0b10000    # already buffered as cycle candidate
+  rcHasStackRef = 0b100000    # the object had a stack ref in the last
+                              # cycle collection
+
+  rcMarkBit = rcHasStackRef   # this is currently used for leak detection
+                              # when traceGC is on
+
+  rcBufferedAnywhere = rcZct or rcInCycleRoots
+
+  rcIncrement = 1 shl rcShift # don't touch the color bits
+
+const
+  NewObjectsAreCycleRoots = true
+    # the alternative is to use the old strategy of adding cycle roots
+    # in incRef (in the compiler itself, this doesn't change much)
+
+  IncRefRemovesCandidates = false
+    # this is safe only if we can reliably track the fact that the object
+    # has stack references. This could be easily done by adding another bit
+    # to the refcount field and setting it up in unmarkStackAndRegisters.
+    # The bit must also be set for new objects that are not rc1 and it must be
+    # examined in the decref loop in collectCycles.
+    # XXX: not implemented yet as tests didn't show any improvement from this
+   
+  MarkingSkipsAcyclicObjects = false
+    # Acyclic objects can be safely ignored in the mark and scan phases, 
+    # because they cannot contribute to the internal count.
+    # XXX: if we generate specialized `markCyclic` and `markAcyclic`
+    # procs we can further optimize this as there won't be need for any
+    # checks in the code
+  
+  MinimumStackMarking = false
+    # Try to scan only the user stack and ignore the part of the stack
+    # belonging to the GC itself. see setStackTop for further info.
+    # XXX: still has problems in release mode in the compiler itself.
+    # investigate how it affects growObj
+
+  CollectCyclesStats = false
+
 type
   TWalkOp = enum
-    waZctDecRef, waPush, waCycleDecRef
+    waPush
 
   TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall.}
     # A ref type can have a finalizer that is called before the object's
@@ -63,19 +116,28 @@ type
   TGcHeap {.final, pure.} = object # this contains the zero count and
                                    # non-zero count table
     stackBottom: pointer
+    stackTop: pointer
     cycleThreshold: int
     zct: TCellSeq            # the zero count table
     decStack: TCellSeq       # cells in the stack that are to decref again
-    cycleRoots: TCellSet
+    cycleRoots: TCellSeq
     tempStack: TCellSeq      # temporary stack for recursion elimination
+    freeStack: TCellSeq      # objects ready to be freed
     recGcLock: int           # prevent recursion via finalizers; no thread lock
+    cycleRootsTrimIdx: int   # Trimming is a light-weight collection of the 
+                             # cycle roots table that uses a cheap linear scan
+                             # to find only possitively dead objects.
+                             # One strategy is to perform it only for new objects
+                             # allocated between the invocations of CollectZCT.
+                             # This index indicates the start of the range of
+                             # such new objects within the table.
     when withRealTime:
       maxPause: TNanos       # max allowed pause in nanoseconds; active if > 0
     region: TMemRegion       # garbage collected region
     stat: TGcStat
 
 var
-  gch {.rtlThreadVar.}: TGcHeap
+  gch* {.rtlThreadVar.}: TGcHeap
 
 when not defined(useNimRtl):
   InstantiateForRegion(gch.region)
@@ -88,16 +150,92 @@ template release(gch: TGcHeap) =
   when hasThreadSupport and hasSharedHeap:
     releaseSys(HeapLock)
 
-proc addZCT(s: var TCellSeq, c: PCell) {.noinline.} =
-  if (c.refcount and rcZct) == 0:
-    c.refcount = c.refcount and not colorMask or rcZct
-    add(s, c)
+template setColor(c: PCell, color) =
+  c.refcount = (c.refcount and not rcColorMask) or color
+
+template color(c: PCell): expr =
+  c.refcount and rcColorMask
+
+template isBitDown(c: PCell, bit): expr =
+  (c.refcount and bit) == 0
+
+template isBitUp(c: PCell, bit): expr =
+  (c.refcount and bit) != 0
+
+template setBit(c: PCell, bit): expr =
+  c.refcount = c.refcount or bit
+
+template isDead(c: Pcell): expr =
+  c.isBitUp(rcReallyDead) # also covers rcRetiredBuffer
+
+template clearBit(c: PCell, bit): expr =
+  c.refcount = c.refcount and (not TRefCount(bit))
+
+when debugGC:
+  var gcCollectionIdx = 0
+
+  proc colorStr(c: PCell): cstring =
+    let color = c.color
+    case color
+    of rcAlive: return "alive"
+    of rcMaybeDead: return "maybedead"
+    of rcCycleCandidate: return "candidate"
+    of rcDecRefApplied: return "marked"
+    of rcRetiredBuffer: return "retired"
+    of rcReallyDead: return "dead"
+    else: return "unknown?"
+  
+  proc inCycleRootsStr(c: PCell): cstring =
+    if c.isBitUp(rcInCycleRoots): result = "cycleroot"
+    else: result = ""
+
+  proc inZctStr(c: PCell): cstring =
+    if c.isBitUp(rcZct): result = "zct"
+    else: result = ""
+
+  proc writeCell*(msg: CString, c: PCell, force = false) =
+    var kind = -1
+    if c.typ != nil: kind = ord(c.typ.kind)
+    when trackAllocationSource:
+      c_fprintf(c_stdout, "[GC %d] %s: %p %d rc=%ld %s %s %s from %s(%ld)\n",
+                gcCollectionIdx,
+                msg, c, kind, c.refcount shr rcShift,
+                c.colorStr, c.inCycleRootsStr, c.inZctStr,
+                c.filename, c.line)
+    else:
+      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n",
+                msg, c, kind, c.refcount shr rcShift)
+
+proc addZCT(zct: var TCellSeq, c: PCell) {.noinline.} =
+  if c.isBitDown(rcZct):
+    c.setBit rcZct
+    zct.add c
+
+template setStackTop(gch) =
+  # This must be called immediately after we enter the GC code
+  # to minimize the size of the scanned stack. The stack consumed
+  # by the GC procs may amount to 200-400 bytes depending on the
+  # build settings and this contributes to false-positives
+  # in the conservative stack marking
+  when MinimumStackMarking:
+    var stackTop {.volatile.}: pointer
+    gch.stackTop = addr(stackTop)
+
+template addCycleRoot(cycleRoots: var TCellSeq, c: PCell) =
+  if c.color != rcCycleCandidate:
+    c.setColor rcCycleCandidate
+    
+    # the object may be buffered already. for example, consider:
+    # decref; incref; decref
+    if c.isBitDown(rcInCycleRoots):
+      c.setBit rcInCycleRoots
+      cycleRoots.add c
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
   result = cast[pointer](cast[TAddress](cell)+%TAddress(sizeof(TCell)))
 
-proc usrToCell(usr: pointer): PCell {.inline.} =
+proc usrToCell*(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
   result = cast[PCell](cast[TAddress](usr)-%TAddress(sizeof(TCell)))
 
@@ -115,22 +253,30 @@ proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
 when BitsPerPage mod (sizeof(int)*8) != 0:
   {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
 
-when debugGC:
-  proc writeCell(msg: CString, c: PCell) =
-    var kind = -1
-    if c.typ != nil: kind = ord(c.typ.kind)
-    when leakDetector:
-      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n",
-                msg, c, kind, c.refcount shr rcShift, c.filename, c.line)
-    else:
-      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n",
-                msg, c, kind, c.refcount shr rcShift)
+# forward declarations:
+proc collectCT(gch: var TGcHeap)
+proc IsOnStack*(p: pointer): bool {.noinline.}
+proc forAllChildren(cell: PCell, op: TWalkOp)
+proc doOperation(p: pointer, op: TWalkOp)
+proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp)
+# we need the prototype here for debugging purposes
+
+proc prepareDealloc(cell: PCell) =
+  if cell.typ.finalizer != nil:
+    # the finalizer could invoke something that
+    # allocates memory; this could trigger a garbage
+    # collection. Since we are already collecting we
+    # prevend recursive entering here by a lock.
+    # XXX: we should set the cell's children to nil!
+    inc(gch.recGcLock)
+    (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
+    dec(gch.recGcLock)
 
 when traceGC:
   # traceGC is a special switch to enable extensive debugging
   type
     TCellState = enum
-      csAllocated, csZctFreed, csCycFreed
+      csAllocated, csFreed
   var
     states: array[TCellState, TCellSet]
 
@@ -140,155 +286,197 @@ when traceGC:
       if c in states[csAllocated]:
         writeCell("attempt to alloc an already allocated cell", c)
         sysAssert(false, "traceCell 1")
-      excl(states[csCycFreed], c)
-      excl(states[csZctFreed], c)
-    of csZctFreed:
-      if c in states[csZctFreed]:
-        writeCell("attempt to free zct cell twice", c)
+      excl(states[csFreed], c)
+      # writecell("allocated", c)
+    of csFreed:
+      if c in states[csFreed]:
+        writeCell("attempt to free a cell twice", c)
         sysAssert(false, "traceCell 2")
-      if c in states[csCycFreed]:
-        writeCell("attempt to free with zct, but already freed with cyc", c)
-        sysAssert(false, "traceCell 3")
       if c notin states[csAllocated]:
         writeCell("attempt to free not an allocated cell", c)
-        sysAssert(false, "traceCell 4")
-      excl(states[csAllocated], c)
-    of csCycFreed:
-      if c notin states[csAllocated]:
-        writeCell("attempt to free a not allocated cell", c)
-        sysAssert(false, "traceCell 5")
-      if c in states[csCycFreed]:
-        writeCell("attempt to free cyc cell twice", c)
-        sysAssert(false, "traceCell 6")
-      if c in states[csZctFreed]:
-        writeCell("attempt to free with cyc, but already freed with zct", c)
-        sysAssert(false, "traceCell 7")
+        sysAssert(false, "traceCell 3")
       excl(states[csAllocated], c)
+      # writecell("freed", c)
     incl(states[state], c)
 
-  proc writeLeakage() =
-    var z = 0
-    var y = 0
-    var e = 0
+  proc computeCellWeight(c: PCell): int =
+    var x: TCellSet
+    x.init
+
+    let startLen = gch.tempStack.len
+    c.forAllChildren waPush
+    
+    while startLen != gch.tempStack.len:
+      dec gch.tempStack.len
+      var c = gch.tempStack.d[gch.tempStack.len]
+      if c in states[csFreed]: continue
+      inc result
+      if c notin x:
+        x.incl c
+        c.forAllChildren waPush
+
+  template markChildrenRec(cell) =
+    let startLen = gch.tempStack.len
+    cell.forAllChildren waPush
+    let isMarked = cell.isBitUp(rcMarkBit)
+    while startLen != gch.tempStack.len:
+      dec gch.tempStack.len
+      var c = gch.tempStack.d[gch.tempStack.len]
+      if c in states[csFreed]: continue
+      if c.isBitDown(rcMarkBit):
+        c.setBit rcMarkBit
+        c.forAllChildren waPush
+    if c.isBitUp(rcMarkBit) and not isMarked:
+      writecell("cyclic cell", cell)
+      cprintf "Weight %d\n", cell.computeCellWeight
+      
+  proc writeLeakage(onlyRoots: bool) =
+    if onlyRoots:
+      for c in elements(states[csAllocated]):
+        if c notin states[csFreed]:
+          markChildrenRec(c)
+    var f = 0
+    var a = 0
     for c in elements(states[csAllocated]):
-      inc(e)
-      if c in states[csZctFreed]: inc(z)
-      elif c in states[csCycFreed]: inc(y)
-      else: writeCell("leak", c)
-    cfprintf(cstdout, "Allocations: %ld; ZCT freed: %ld; CYC freed: %ld\n",
-             e, z, y)
+      inc a
+      if c in states[csFreed]: inc f
+      elif c.isBitDown(rcMarkBit):
+        writeCell("leak", c)
+        cprintf "Weight %d\n", c.computeCellWeight
+    cfprintf(cstdout, "Allocations: %ld; freed: %ld\n", a, f)
 
 template gcTrace(cell, state: expr): stmt {.immediate.} =
+  when logGC: writeCell($state, cell)
   when traceGC: traceCell(cell, state)
 
-# forward declarations:
-proc collectCT(gch: var TGcHeap)
-proc IsOnStack*(p: pointer): bool {.noinline.}
-proc forAllChildren(cell: PCell, op: TWalkOp)
-proc doOperation(p: pointer, op: TWalkOp)
-proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp)
-# we need the prototype here for debugging purposes
-
-when hasThreadSupport and hasSharedHeap:
-  template `--`(x: expr): expr = atomicDec(x, rcIncrement) <% rcIncrement
-  template `++`(x: expr): stmt = discard atomicInc(x, rcIncrement)
-else:
-  template `--`(x: expr): expr = 
-    Dec(x, rcIncrement)
-    x <% rcIncrement
-  template `++`(x: expr): stmt = Inc(x, rcIncrement)
-
-proc prepareDealloc(cell: PCell) =
-  if cell.typ.finalizer != nil:
-    # the finalizer could invoke something that
-    # allocates memory; this could trigger a garbage
-    # collection. Since we are already collecting we
-    # prevend recursive entering here by a lock.
-    # XXX: we should set the cell's children to nil!
-    inc(gch.recGcLock)
-    (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
-    dec(gch.recGcLock)
+template WithHeapLock(blk: stmt): stmt =
+  when hasThreadSupport and hasSharedHeap: AcquireSys(HeapLock)
+  blk
+  when hasThreadSupport and hasSharedHeap: ReleaseSys(HeapLock)
 
 proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} = 
   # we MUST access gch as a global here, because this crosses DLL boundaries!
-  when hasThreadSupport and hasSharedHeap:
-    AcquireSys(HeapLock)
-  incl(gch.cycleRoots, c)
-  when hasThreadSupport and hasSharedHeap:
-    ReleaseSys(HeapLock)
+  WithHeapLock: addCycleRoot(gch.cycleRoots, c)
 
 proc rtlAddZCT(c: PCell) {.rtl, inl.} =
   # we MUST access gch as a global here, because this crosses DLL boundaries!
-  when hasThreadSupport and hasSharedHeap:
-    AcquireSys(HeapLock)
-  addZCT(gch.zct, c)
-  when hasThreadSupport and hasSharedHeap:
-    ReleaseSys(HeapLock)
+  WithHeapLock: addZCT(gch.zct, c)
+
+type
+  TCyclicMode = enum
+    Cyclic,
+    Acyclic,
+    MaybeCyclic
+
+  TReleaseType = enum
+    AddToZTC
+    FreeImmediately
+
+  THeapType = enum
+    LocalHeap
+    SharedHeap
+
+template `++` (rc: TRefCount, heapType: THeapType): stmt =
+  when heapType == SharedHeap:
+    discard atomicInc(rc, rcIncrement)
+  else:
+    inc rc, rcIncrement
 
-proc decRef(c: PCell) {.inline.} =
+template `--`(rc: TRefCount): expr =
+  dec rc, rcIncrement
+  rc <% rcIncrement
+
+template `--` (rc: TRefCount, heapType: THeapType): expr =
+  (when heapType == SharedHeap: atomicDec(rc, rcIncrement) <% rcIncrement
+   else: --rc)
+
+template doDecRef(cc: PCell,
+                  heapType = LocalHeap,
+                  cycleFlag = MaybeCyclic): stmt =
+  var c = cc
   sysAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
+  # XXX: move this elesewhere
+
   sysAssert(c.refcount >=% rcIncrement, "decRef")
-  if --c.refcount:
+  if c.refcount--(heapType):
+    # this is the last reference from the heap
+    # add to a zero-count-table that will be matched against stack pointers
     rtlAddZCT(c)
-  elif canBeCycleRoot(c):
-    # unfortunately this is necessary here too, because a cycle might just
-    # have been broken up and we could recycle it.
-    rtlAddCycleRoot(c) 
-
-proc incRef(c: PCell) {.inline.} = 
-  sysAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
-  ++c.refcount
-  if canBeCycleRoot(c):
-    rtlAddCycleRoot(c)
-
-proc nimGCref(p: pointer) {.compilerProc, inline.} = incRef(usrToCell(p))
-proc nimGCunref(p: pointer) {.compilerProc, inline.} = decRef(usrToCell(p))
+  else:
+    when cycleFlag != Acyclic:
+      if cycleFlag == Cyclic or canBeCycleRoot(c):
+        # a cycle may have been broken
+        rtlAddCycleRoot(c)
+
+template doIncRef(cc: PCell,
+                 heapType = LocalHeap,
+                 cycleFlag = MaybeCyclic): stmt =
+  var c = cc
+  c.refcount++(heapType)
+  when cycleFlag != Acyclic:
+    when NewObjectsAreCycleRoots:
+      if canbeCycleRoot(c):
+        addCycleRoot(gch.cycleRoots, c)
+    elif IncRefRemovesCandidates:
+      c.setColor rcAlive
+  # XXX: this is not really atomic enough!
+  
+proc nimGCref(p: pointer) {.compilerProc, inline.} = doIncRef(usrToCell(p))
+proc nimGCunref(p: pointer) {.compilerProc, inline.} = doDecRef(usrToCell(p))
 
 proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} =
   sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle")
   var c = usrToCell(p)
   sysAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr")
-  if --c.refcount:
+  if c.refcount--(LocalHeap):
     rtlAddZCT(c)
     sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2")
   sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5")
 
-proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} =
-  # the code generator calls this proc!
+template doAsgnRef(dest: ppointer, src: pointer,
+                  heapType = LocalHeap, cycleFlag = MaybeCyclic): stmt =
   sysAssert(not isOnStack(dest), "asgnRef")
   # BUGFIX: first incRef then decRef!
-  if src != nil: incRef(usrToCell(src))
-  if dest[] != nil: decRef(usrToCell(dest[]))
+  if src != nil: doIncRef(usrToCell(src), heapType, cycleFlag)
+  if dest[] != nil: doDecRef(usrToCell(dest[]), heapType, cycleFlag)
   dest[] = src
 
+proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+  # the code generator calls this proc!
+  doAsgnRef(dest, src, LocalHeap, MaybeCyclic)
+
 proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerProc, inline.} =
   # the code generator calls this proc if it is known at compile time that no 
   # cycle is possible.
-  if src != nil:
-    var c = usrToCell(src)
-    ++c.refcount
-  if dest[] != nil: 
-    var c = usrToCell(dest[])
-    if --c.refcount:
-      rtlAddZCT(c)
-  dest[] = src
+  doAsgnRef(dest, src, LocalHeap, Acyclic)
 
 proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerProc.} =
   # unsureAsgnRef updates the reference counters only if dest is not on the
   # stack. It is used by the code generator if it cannot decide wether a
   # reference is in the stack or not (this can happen for var parameters).
   if not IsOnStack(dest):
-    if src != nil: incRef(usrToCell(src))
+    if src != nil: doIncRef(usrToCell(src))
+    # XXX we must detect a shared heap here
+    # better idea may be to just eliminate the need for unsureAsgnRef
+    #
     # XXX finally use assembler for the stack checking instead!
     # the test for '!= nil' is correct, but I got tired of the segfaults
     # resulting from the crappy stack checking:
-    if cast[int](dest[]) >=% PageSize: decRef(usrToCell(dest[]))
+    if cast[int](dest[]) >=% PageSize: doDecRef(usrToCell(dest[]))
   else:
     # can't be an interior pointer if it's a stack location!
-    sysAssert(interiorAllocatedPtr(gch.region, dest)==nil, 
+    sysAssert(interiorAllocatedPtr(gch.region, dest)==nil,
               "stack loc AND interior pointer")
   dest[] = src
 
+when hasThreadSupport and hasSharedHeap:
+  # shared heap version of the above procs
+  proc asgnRefSh(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+    doAsgnRef(dest, src, SharedHeap, MaybeCyclic)
+
+  proc asgnRefNoCycleSh(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+    doAsgnRef(dest, src, SharedHeap, Acyclic)
+
 proc initGC() =
   when not defined(useNimRtl):
     when traceGC:
@@ -303,6 +491,7 @@ proc initGC() =
     # init the rt
     init(gch.zct)
     init(gch.tempStack)
+    init(gch.freeStack)
     Init(gch.cycleRoots)
     Init(gch.decStack)
 
@@ -355,9 +544,10 @@ proc forAllChildren(cell: PCell, op: TWalkOp) =
       var d = cast[TAddress](cellToUsr(cell))
       var s = cast[PGenericSeq](d)
       if s != nil:
+        let baseAddr = d +% GenericSeqSize
         for i in 0..s.len-1:
-          forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +%
-            GenericSeqSize), cell.typ.base, op)
+          forAllChildrenAux(cast[pointer](baseAddr +% i *% cell.typ.base.size),
+                            cell.typ.base, op)
     else: nil
 
 proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
@@ -378,7 +568,7 @@ proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
     template replaceZctEntry(i: expr) =
       c = d[i]
       if c.refcount >=% rcIncrement:
-        c.refcount = c.refcount and not colorMask
+        c.clearBit(rcZct)
         d[i] = res
         return
     if L > 8:
@@ -399,84 +589,108 @@ proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
     for i in countdown(L-1, max(0, L-8)):
       var c = d[i]
       if c.refcount >=% rcIncrement:
-        c.refcount = c.refcount and not colorMask
+        c.clearBit(rcZct)
         d[i] = res
         return
     add(gch.zct, res)
 
-proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
+proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap, rc1: bool): pointer =
   # generates a new object and sets its reference counter to 0
   acquire(gch)
+  sysAssert(allocInv(gch.region), "rawNewObj begin")
   sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  
   collectCT(gch)
-  sysAssert(allocInv(gch.region), "rawNewObj begin")
+  sysAssert(allocInv(gch.region), "rawNewObj after collect")
+
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
+  sysAssert(allocInv(gch.region), "rawNewObj after rawAlloc")
+
   sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
-  # now it is buffered in the ZCT
+  
   res.typ = typ
-  when leakDetector and not hasThreadSupport:
-    if framePtr != nil and framePtr.prev != nil:
-      res.filename = framePtr.prev.filename
-      res.line = framePtr.prev.line
-  res.refcount = rcZct # refcount is zero, but mark it to be in the ZCT  
+  
+  when trackAllocationSource and not hasThreadSupport:
+    if framePtr != nil and framePtr.prev != nil and framePtr.prev.prev != nil:
+      res.filename = framePtr.prev.prev.filename
+      res.line = framePtr.prev.prev.line
+    else:
+      res.filename = "nofile"
+  
+  if rc1:
+    res.refcount = rcIncrement # refcount is 1
+  else:
+    # its refcount is zero, so add it to the ZCT:
+    res.refcount = rcZct
+    addNewObjToZCT(res, gch)
+
+    if NewObjectsAreCycleRoots and canBeCycleRoot(res):
+      res.setBit(rcInCycleRoots)
+      res.setColor rcCycleCandidate
+      gch.cycleRoots.add res
+    
   sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
-  # its refcount is zero, so add it to the ZCT:
-  addNewObjToZCT(res, gch)
+  
   when logGC: writeCell("new cell", res)
   gcTrace(res, csAllocated)
   release(gch)
   result = cellToUsr(res)
+  zeroMem(result, size)
+  when defined(memProfiler): nimProfile(size)
   sysAssert(allocInv(gch.region), "rawNewObj end")
 
 {.pop.}
 
-proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = rawNewObj(typ, size, gch)
-  zeroMem(result, size)
-  when defined(memProfiler): nimProfile(size)
+proc freeCell(gch: var TGcHeap, c: PCell) =
+  # prepareDealloc(c)
+  gcTrace(c, csFreed)
+
+  when reallyDealloc: rawDealloc(gch.region, c)
+  else:
+    sysAssert(c.typ != nil, "collectCycles")
+    zeroMem(c, sizeof(TCell))
+
+template eraseAt(cells: var TCellSeq, at: int): stmt =
+  cells.d[at] = cells.d[cells.len - 1]
+  dec cells.len
+
+template trimAt(roots: var TCellSeq, at: int): stmt =
+  # This will remove a cycle root candidate during trimming.
+  # a candidate is removed either because it received a refup and
+  # it's no longer a candidate or because it received further refdowns
+  # and now it's dead for sure.
+  let c = roots.d[at]
+  c.clearBit(rcInCycleRoots)
+  roots.eraseAt(at)
+  if c.isBitUp(rcReallyDead) and c.refcount <% rcIncrement:
+    # This case covers both dead objects and retired buffers
+    # That's why we must also check the refcount (it may be
+    # kept possitive by stack references).
+    freeCell(gch, c)
 
+proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  setStackTop(gch)
+  result = rawNewObj(typ, size, gch, false)
+  
 proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  # `newObj` already uses locks, so no need for them here.
+  setStackTop(gch)
+  # `rawNewObj` already uses locks, so no need for them here.
   let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
-  result = newObj(typ, size)
+  result = rawNewObj(typ, size, gch, false)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
-  when defined(memProfiler): nimProfile(size)
 
 proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  # generates a new object and sets its reference counter to 1
-  sysAssert(allocInv(gch.region), "newObjRC1 begin")
-  acquire(gch)
-  sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
-  collectCT(gch)
-  sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")
-  
-  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
-  sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
-  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
-  # now it is buffered in the ZCT
-  res.typ = typ
-  when leakDetector and not hasThreadSupport:
-    if framePtr != nil and framePtr.prev != nil:
-      res.filename = framePtr.prev.filename
-      res.line = framePtr.prev.line
-  res.refcount = rcIncrement # refcount is 1
-  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
-  when logGC: writeCell("new cell", res)
-  gcTrace(res, csAllocated)
-  release(gch)
-  result = cellToUsr(res)
-  zeroMem(result, size)
-  sysAssert(allocInv(gch.region), "newObjRC1 end")
-  when defined(memProfiler): nimProfile(size)
+  setStackTop(gch)
+  result = rawNewObj(typ, size, gch, true)
 
 proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
+  setStackTop(gch)
   let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
-  result = newObjRC1(typ, size)
+  result = rawNewObj(typ, size, gch, true)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
-  when defined(memProfiler): nimProfile(size)
-  
+
 proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
   acquire(gch)
   collectCT(gch)
@@ -486,43 +700,73 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
   sysAssert(allocInv(gch.region), "growObj begin")
 
   var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell)))
-  var elemSize = 1
-  if ol.typ.kind != tyString: elemSize = ol.typ.base.size
+  var elemSize = if ol.typ.kind != tyString: ol.typ.base.size
+                 else: 1
   
   var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
+  
+  # XXX: This should happen outside
+  # call user-defined move code
+  # call user-defined default constructor
   copyMem(res, ol, oldsize + sizeof(TCell))
   zeroMem(cast[pointer](cast[TAddress](res)+% oldsize +% sizeof(TCell)),
           newsize-oldsize)
+
   sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
   sysAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
-  #if res.refcount <% rcIncrement:
-  #  add(gch.zct, res)
-  #else: # XXX: what to do here?
-  #  decRef(ol)
-  if (ol.refcount and colorMask) == rcZct:
-    var j = gch.zct.len-1
-    var d = gch.zct.d
-    while j >= 0: 
-      if d[j] == ol:
-        d[j] = res
-        break
-      dec(j)
-  if canBeCycleRoot(ol): excl(gch.cycleRoots, ol)
-  when logGC:
-    writeCell("growObj old cell", ol)
-    writeCell("growObj new cell", res)
-  gcTrace(ol, csZctFreed)
-  gcTrace(res, csAllocated)
-  when reallyDealloc: rawDealloc(gch.region, ol)
+  
+  when false:
+    if ol.isBitUp(rcZct):
+      var j = gch.zct.len-1
+      var d = gch.zct.d
+      while j >= 0: 
+        if d[j] == ol:
+          d[j] = res
+          break
+        dec(j)
+    
+    if ol.isBitUp(rcInCycleRoots):
+      for i in 0 .. <gch.cycleRoots.len:
+        if gch.cycleRoots.d[i] == ol:
+          eraseAt(gch.cycleRoots, i)
+
+    freeCell(gch, ol)
+  
   else:
-    sysAssert(ol.typ != nil, "growObj: 5")
-    zeroMem(ol, sizeof(TCell))
+    # the new buffer inherits the GC state of the old one
+    if res.isBitUp(rcZct): gch.zct.add res
+    if res.isBitUp(rcInCycleRoots): gch.cycleRoots.add res
+
+    # Pay attention to what's going on here! We're not releasing the old memory.
+    # This is because at this point there may be an interior pointer pointing
+    # into this buffer somewhere on the stack (due to `var` parameters now and
+    # and `let` and `var:var` stack locations in the future).
+    # We'll release the memory in the next GC cycle. If we release it here,
+    # we cannot guarantee that no memory will be corrupted when only safe
+    # language features are used. Accessing the memory after the seq/string
+    # has been invalidated may still result in logic errors in the user code.
+    # We may improve on that by protecting the page in debug builds or
+    # by providing a warning when we detect a stack pointer into it.
+    let bufferFlags = ol.refcount and rcBufferedAnywhere
+    if bufferFlags == 0:
+      # we need this in order to collect it safely later
+      ol.refcount = rcRetiredBuffer or rcZct
+      gch.zct.add ol
+    else:
+      ol.refcount = rcRetiredBuffer or bufferFlags
+
+    when logGC:
+      writeCell("growObj old cell", ol)
+      writeCell("growObj new cell", res)
+
+  gcTrace(res, csAllocated)
   release(gch)
   result = cellToUsr(res)
   sysAssert(allocInv(gch.region), "growObj end")
   when defined(memProfiler): nimProfile(newsize-oldsize)
 
 proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
+  setStackTop(gch)
   result = growObj(old, newsize, gch)
 
 {.push profiler:off.}
@@ -533,70 +777,214 @@ proc doOperation(p: pointer, op: TWalkOp) =
   if p == nil: return
   var c: PCell = usrToCell(p)
   sysAssert(c != nil, "doOperation: 1")
-  case op # faster than function pointers because of easy prediction
-  of waZctDecRef:
-    #if not isAllocatedPtr(gch.region, c):
-    #  return
-    #  c_fprintf(c_stdout, "[GC] decref bug: %p", c) 
-    sysAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
-    sysAssert(c.refcount >=% rcIncrement, "doOperation 2")
-    c.refcount = c.refcount -% rcIncrement
-    when logGC: writeCell("decref (from doOperation)", c)
-    if c.refcount <% rcIncrement: addZCT(gch.zct, c)
-  of waPush:
-    add(gch.tempStack, c)
-  of waCycleDecRef:
-    sysAssert(c.refcount >=% rcIncrement, "doOperation 3")
-    c.refcount = c.refcount -% rcIncrement
-
+  gch.tempStack.add c
+  
 proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
   doOperation(d, TWalkOp(op))
 
+type
+  TRecursionType = enum 
+    FromChildren,
+    FromRoot
+
+proc CollectZCT(gch: var TGcHeap): bool
+
+template pseudoRecursion(typ: TRecursionType, body: stmt): stmt =
+  #
+
+proc trimCycleRoots(gch: var TGcHeap, startIdx = gch.cycleRootsTrimIdx) =
+  var i = startIdx
+  while i < gch.cycleRoots.len:
+    if gch.cycleRoots.d[i].color != rcCycleCandidate:
+      gch.cycleRoots.trimAt i
+    else:
+      inc i
+
+  gch.cycleRootsTrimIdx = gch.cycleRoots.len
+
 # we now use a much simpler and non-recursive algorithm for cycle removal
 proc collectCycles(gch: var TGcHeap) =
-  var tabSize = 0
-  for c in elements(gch.cycleRoots):
-    inc(tabSize)
-    forallChildren(c, waCycleDecRef)
-  if tabSize == 0: return
-  gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize)
-
-  # restore reference counts (a depth-first traversal is needed):
-  var marker: TCellSet
-  Init(marker)
-  for c in elements(gch.cycleRoots):
-    if c.refcount >=% rcIncrement:
-      if not containsOrIncl(marker, c):
-        gch.tempStack.len = 0
-        forAllChildren(c, waPush)
-        while gch.tempStack.len > 0:
-          dec(gch.tempStack.len)
-          var d = gch.tempStack.d[gch.tempStack.len]
-          d.refcount = d.refcount +% rcIncrement
-          if d in gch.cycleRoots and not containsOrIncl(marker, d):
-            forAllChildren(d, waPush)
-  # remove cycles:
-  for c in elements(gch.cycleRoots):
-    if c.refcount <% rcIncrement:
-      gch.tempStack.len = 0
-      forAllChildren(c, waPush)
-      while gch.tempStack.len > 0:
-        dec(gch.tempStack.len)
-        var d = gch.tempStack.d[gch.tempStack.len]
-        if d.refcount <% rcIncrement:
-          if d notin gch.cycleRoots: # d is leaf of c and not part of cycle
-            addZCT(gch.zct, d)
-            when logGC: writeCell("add to ZCT (from cycle collector)", d)
-      prepareDealloc(c)
-      gcTrace(c, csCycFreed)
-      when logGC: writeCell("cycle collector dealloc cell", c)
-      when reallyDealloc: rawDealloc(gch.region, c)
+  if gch.cycleRoots.len == 0: return
+  gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, gch.cycleRoots.len)
+
+  when CollectCyclesStats:
+    let l0 = gch.cycleRoots.len
+    let tStart = getTicks()
+
+  var
+    decrefs = 0
+    increfs = 0
+    collected = 0
+    maybedeads = 0
+
+  template ignoreObject(c: PCell): expr =
+    # This controls which objects will be ignored in the mark and scan stages
+    (when MarkingSkipsAcyclicObjects: not canbeCycleRoot(c) else: false)
+    # not canbeCycleRoot(c)
+    # false
+    # c.isBitUp(rcHasStackRef)
+
+  template earlyMarkAliveRec(cell) =
+    let startLen = gch.tempStack.len
+    cell.setColor rcAlive
+    cell.forAllChildren waPush
+    
+    while startLen != gch.tempStack.len:
+      dec gch.tempStack.len
+      var c = gch.tempStack.d[gch.tempStack.len]
+      if c.color != rcAlive:
+        c.setColor rcAlive
+        c.forAllChildren waPush
+  
+  template earlyMarkAlive(stackRoots) =
+    # This marks all objects reachable from the stack as alive before any
+    # of the other stages is executed. Such objects cannot be garbage and
+    # they don't need to participate in the recursive decref/incref.
+    for i in 0 .. <stackRoots.len:
+      var c = stackRoots.d[i]
+      # c.setBit rcHasStackRef
+      earlyMarkAliveRec(c)
+
+  earlyMarkAlive(gch.decStack)
+  
+  when CollectCyclesStats:
+    let tAfterEarlyMarkAlive = getTicks()
+
+  template recursiveDecRef(cell) =
+    let startLen = gch.tempStack.len
+    cell.setColor rcDecRefApplied
+    cell.forAllChildren waPush
+    
+    while startLen != gch.tempStack.len:
+      dec gch.tempStack.len
+      var c = gch.tempStack.d[gch.tempStack.len]
+      if ignoreObject(c): continue
+
+      sysAssert(c.refcount >=% rcIncrement, "recursive dec ref")
+      dec c.refcount, rcIncrement
+      inc decrefs
+      if c.color != rcDecRefApplied:
+        c.setColor rcDecRefApplied
+        c.forAllChildren waPush
+ 
+  template markRoots(roots) =
+    var i = 0
+    while i < roots.len:
+      if roots.d[i].color == rcCycleCandidate:
+        recursiveDecRef(roots.d[i])
+        inc i
       else:
-        sysAssert(c.typ != nil, "collectCycles")
-        zeroMem(c, sizeof(TCell))
+        roots.trimAt i
+  
+  markRoots(gch.cycleRoots)
+  
+  when CollectCyclesStats:
+    let tAfterMark = getTicks()
+    c_printf "COLLECT CYCLES %d: %d/%d\n", gcCollectionIdx, gch.cycleRoots.len, l0
+  
+  template recursiveMarkAlive(cell) =
+    let startLen = gch.tempStack.len
+    cell.setColor rcAlive
+    cell.forAllChildren waPush
+    
+    while startLen != gch.tempStack.len:
+      dec gch.tempStack.len
+      var c = gch.tempStack.d[gch.tempStack.len]
+      if ignoreObject(c): continue
+      inc c.refcount, rcIncrement
+      inc increfs
+      
+      if c.color != rcAlive:
+        c.setColor rcAlive
+        c.forAllChildren waPush
+ 
+  template scanRoots(roots) =
+    for i in 0 .. <roots.len:
+      let startLen = gch.tempStack.len
+      gch.tempStack.add roots.d[i]
+      
+      while startLen != gch.tempStack.len:
+        dec gch.tempStack.len
+        var c = gch.tempStack.d[gch.tempStack.len]
+        if ignoreObject(c): continue
+        if c.color == rcDecRefApplied:
+          if c.refcount >=% rcIncrement:
+            recursiveMarkAlive(c)
+          else:
+            # note that this is not necessarily the ultimate
+            # destiny of the object. we may still mark it alive
+            # later if we encounter another node from where it's
+            # reachable.
+            c.setColor rcMaybeDead
+            inc maybedeads
+            c.forAllChildren waPush
+  
+  scanRoots(gch.cycleRoots)
+  
+  when CollectCyclesStats:
+    let tAfterScan = getTicks()
+
+  template collectDead(roots) =
+    for i in 0 .. <roots.len:
+      var c = roots.d[i]
+      c.clearBit(rcInCycleRoots)
+
+      let startLen = gch.tempStack.len
+      gch.tempStack.add c
+      
+      while startLen != gch.tempStack.len:
+        dec gch.tempStack.len
+        var c = gch.tempStack.d[gch.tempStack.len]
+        when MarkingSkipsAcyclicObjects:
+          if not canbeCycleRoot(c):
+            # This is an acyclic object reachable from a dead cyclic object
+            # We must do a normal decref here that may add the acyclic object
+            # to the ZCT
+            doDecRef(c, LocalHeap, Cyclic)
+            continue
+        if c.color == rcMaybeDead and not c.isBitUp(rcInCycleRoots):
+          c.setColor(rcReallyDead)
+          inc collected
+          c.forAllChildren waPush
+          # we need to postpone the actual deallocation in order to allow
+          # the finalizers to run while the data structures are still intact
+          gch.freeStack.add c
+          prepareDealloc(c)
+
+    for i in 0 .. <gch.freeStack.len:
+      freeCell(gch, gch.freeStack.d[i])
+
+  collectDead(gch.cycleRoots)
+  
+  when CollectCyclesStats:
+    let tFinal = getTicks()
+    cprintf "times:\n  early mark alive: %d ms\n  mark: %d ms\n  scan: %d ms\n  collect: %d ms\n  decrefs: %d\n  increfs: %d\n  marked dead: %d\n  collected: %d\n",
+      (tAfterEarlyMarkAlive - tStart)  div 1_000_000,
+      (tAfterMark - tAfterEarlyMarkAlive) div 1_000_000,
+      (tAfterScan - tAfterMark) div 1_000_000,
+      (tFinal - tAfterScan) div 1_000_000,
+      decrefs,
+      increfs,
+      maybedeads,
+      collected
+
   Deinit(gch.cycleRoots)
   Init(gch.cycleRoots)
 
+  Deinit(gch.freeStack)
+  Init(gch.freeStack)
+
+  when MarkingSkipsAcyclicObjects:
+    # Collect the acyclic objects that became unreachable due to collected
+    # cyclic objects. 
+    discard CollectZCT(gch)
+    # CollectZCT may add new cycle candidates and we may decide to loop here
+    # if gch.cycleRoots.len > 0: repeat
+
+var gcDebugging* = false
+
+var seqdbg* : proc (s: PGenericSeq) {.cdecl.}
+
 proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
   sysAssert(allocInv(gch.region), "gcMark begin")
@@ -607,13 +995,26 @@ proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
     var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
     if objStart != nil:
       # mark the cell:
-      objStart.refcount = objStart.refcount +% rcIncrement
-      add(gch.decStack, objStart)
+      if objStart.color != rcReallyDead:
+        if gcDebugging:
+          # writeCell("marking ", objStart)
+        else:
+          inc objStart.refcount, rcIncrement
+          gch.decStack.add objStart
+      else:
+        # With incremental clean-up, objects spend some time
+        # in various lists before being deallocated.
+        # We just found a reference on the stack to an object,
+        # which we have previously labeled as unreachable.
+        # This is either a bug in the GC or a pure accidental
+        # coincidence due to the conservative stack marking.
+        when debugGC:
+          # writeCell("marking dead object", objStart)
     when false:
       if isAllocatedPtr(gch.region, cell):
         sysAssert false, "allocated pointer but not interior?"
         # mark the cell:
-        cell.refcount = cell.refcount +% rcIncrement
+        inc cell.refcount, rcIncrement
         add(gch.decStack, cell)
   sysAssert(allocInv(gch.region), "gcMark end")
 
@@ -664,6 +1065,11 @@ proc stackSize(): int {.noinline.} =
   var stackTop {.volatile.}: pointer
   result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
 
+var
+  jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
+    # a little hack to get the size of a TJmpBuf in the generated C code
+    # in a platform independant way
+
 when defined(sparc): # For SPARC architecture.
   proc isOnStack(p: pointer): bool =
     var stackTop {.volatile.}: pointer
@@ -703,12 +1109,7 @@ elif stackIncreases:
     var b = cast[TAddress](stackTop)
     var x = cast[TAddress](p)
     result = a <=% x and x <=% b
-
-  var
-    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-      # a little hack to get the size of a TJmpBuf in the generated C code
-      # in a platform independant way
-
+  
   proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
     var registers: C_JmpBuf
     if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
@@ -739,8 +1140,20 @@ else:
     type PStackSlice = ptr array [0..7, pointer]
     var registers: C_JmpBuf
     if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+      when MinimumStackMarking:
+        # mark the registers
+        var jmpbufPtr = cast[TAddress](addr(registers))
+        var jmpbufEnd = jmpbufPtr +% jmpbufSize
+      
+        while jmpbufPtr <=% jmpbufEnd:
+          gcMark(gch, cast[ppointer](jmpbufPtr)[])
+          jmpbufPtr = jmpbufPtr +% sizeof(pointer)
+
+        var sp = cast[TAddress](gch.stackTop)
+      else:
+        var sp = cast[TAddress](addr(registers))
+      # mark the user stack
       var max = cast[TAddress](gch.stackBottom)
-      var sp = cast[TAddress](addr(registers))
       # loop unrolled:
       while sp <% max - 8*sizeof(pointer):
         gcMark(gch, cast[PStackSlice](sp)[0])
@@ -761,11 +1174,36 @@ else:
 # end of non-portable code
 # ----------------------------------------------------------------------------
 
+proc releaseCell(gch: var TGcHeap, cell: PCell) =
+  if cell.color != rcReallyDead:
+    prepareDealloc(cell)
+    cell.setColor rcReallyDead
+
+    let l1 = gch.tempStack.len
+    cell.forAllChildren waPush
+    let l2 = gch.tempStack.len
+    for i in l1 .. <l2:
+      var cc = gch.tempStack.d[i]
+      if cc.refcount--(LocalHeap):
+        releaseCell(gch, cc)
+      else:
+        if canbeCycleRoot(cc):
+          addCycleRoot(gch.cycleRoots, cc)
+
+    gch.tempStack.len = l1
+
+  if cell.isBitDown(rcBufferedAnywhere):
+    freeCell(gch, cell)
+  # else:
+  # This object is either buffered in the cycleRoots list and we'll leave
+  # it there to be collected in the next collectCycles or it's pending in
+  # the ZCT:
+  # (e.g. we are now cleaning the 15th object, but this one is 18th in the
+  #  list. Note that this can happen only if we reached this point by the
+  #  recursion).
+  # We can ignore it now as the ZCT cleaner will reach it soon.
+
 proc CollectZCT(gch: var TGcHeap): bool =
-  # Note: Freeing may add child objects to the ZCT! So essentially we do 
-  # deep freeing, which is bad for incremental operation. In order to 
-  # avoid a deep stack, we move objects to keep the ZCT small.
-  # This is performance critical!
   const workPackage = 100
   var L = addr(gch.zct.len)
   
@@ -773,35 +1211,30 @@ proc CollectZCT(gch: var TGcHeap): bool =
     var steps = workPackage
     var t0: TTicks
     if gch.maxPause > 0: t0 = getticks()
+  
   while L[] > 0:
     var c = gch.zct.d[0]
+    sysAssert c.isBitUp(rcZct), "CollectZCT: rcZct missing!"
     sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr")
-    # remove from ZCT:
-    sysAssert((c.refcount and rcZct) == rcZct, "collectZCT")
     
-    c.refcount = c.refcount and not colorMask
+    # remove from ZCT:    
+    c.clearBit(rcZct)
     gch.zct.d[0] = gch.zct.d[L[] - 1]
     dec(L[])
     when withRealtime: dec steps
-    if c.refcount <% rcIncrement: 
+    if c.refcount <% rcIncrement:
       # It may have a RC > 0, if it is in the hardware stack or
       # it has not been removed yet from the ZCT. This is because
       # ``incref`` does not bother to remove the cell from the ZCT 
       # as this might be too slow.
       # In any case, it should be removed from the ZCT. But not
       # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
-      if canBeCycleRoot(c): excl(gch.cycleRoots, c)
-      when logGC: writeCell("zct dealloc cell", c)
-      gcTrace(c, csZctFreed)
-      # We are about to free the object, call the finalizer BEFORE its
-      # children are deleted as well, because otherwise the finalizer may
-      # access invalid memory. This is done by prepareDealloc():
-      prepareDealloc(c)
-      forAllChildren(c, waZctDecRef)
-      when reallyDealloc: rawDealloc(gch.region, c)
+      if c.color == rcRetiredBuffer:
+        if c.isBitDown(rcInCycleRoots):
+          freeCell(gch, c)
       else:
-        sysAssert(c.typ != nil, "collectZCT 2")
-        zeroMem(c, sizeof(TCell))
+        # if c.color == rcReallyDead: writeCell("ReallyDead in ZCT?", c)
+        releaseCell(gch, c)
     when withRealtime:
       if steps == 0:
         steps = workPackage
@@ -813,22 +1246,40 @@ proc CollectZCT(gch: var TGcHeap): bool =
           if duration >= gch.maxPause - 50_000:
             return false
   result = true
+  gch.trimCycleRoots
+  #deInit(gch.zct)
+  #init(gch.zct)
 
-proc unmarkStackAndRegisters(gch: var TGcHeap) = 
+proc unmarkStackAndRegisters(gch: var TGcHeap) =
   var d = gch.decStack.d
-  for i in 0..gch.decStack.len-1:
+  for i in 0 .. <gch.decStack.len:
     sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters"
-    # decRef(d[i]) inlined: cannot create a cycle and must not acquire lock
+    # XXX: just call doDecRef?
     var c = d[i]
+    sysAssert c.typ != nil, "unmarkStackAndRegisters 2"
+    
+    if c.color == rcRetiredBuffer:
+      continue
+
     # XXX no need for an atomic dec here:
-    if --c.refcount:
+    if c.refcount--(LocalHeap):
+      # the object survived only because of a stack reference
+      # it still doesn't have heap refernces
       addZCT(gch.zct, c)
-    sysAssert c.typ != nil, "unmarkStackAndRegisters 2"
+    
+    if canbeCycleRoot(c):
+      # any cyclic object reachable from the stack can be turned into
+      # a leak if it's orphaned through the stack reference
+      # that's because the write-barrier won't be executed for stack
+      # locations
+      addCycleRoot(gch.cycleRoots, c)
+
   gch.decStack.len = 0
 
 proc collectCTBody(gch: var TGcHeap) =
   when withRealtime:
     let t0 = getticks()
+  when debugGC: inc gcCollectionIdx
   sysAssert(allocInv(gch.region), "collectCT: begin")
   
   gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
@@ -842,7 +1293,7 @@ proc collectCTBody(gch: var TGcHeap) =
     when cycleGC:
       if getOccupiedMem(gch.region) >= gch.cycleThreshold or alwaysCycleGC:
         collectCycles(gch)
-        discard collectZCT(gch)
+        sysAssert gch.zct.len == 0, "zct is not null after collect cycles"
         inc(gch.stat.cycleCollections)
         gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
                                  cycleIncrease)
@@ -909,6 +1360,7 @@ when not defined(useNimRtl):
     # set to the max value to suppress the cycle detector
 
   proc GC_fullCollect() =
+    setStackTop(gch)
     acquire(gch)
     var oldThreshold = gch.cycleThreshold
     gch.cycleThreshold = 0 # forces cycle collection
@@ -928,7 +1380,7 @@ when not defined(useNimRtl):
              "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
              "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" &
              "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
-    when traceGC: writeLeakage()
+    when traceGC: writeLeakage(true)
     GC_enable()
 
 {.pop.}
diff --git a/lib/system/hti.nim b/lib/system/hti.nim
index 93dc79e3d..a2d132dbf 100755
--- a/lib/system/hti.nim
+++ b/lib/system/hti.nim
@@ -13,11 +13,19 @@ when defined(NimString):
 else:
   {.pragma: codegenType.}
 
-type # This should be he same as ast.TTypeKind
-     # many enum fields are not used at runtime
+type 
+  # This should be he same as ast.TTypeKind
+  # many enum fields are not used at runtime
   TNimKind = enum
-    tyNone, tyBool, tyChar,
-    tyEmpty, tyArrayConstr, tyNil, tyExpr, tyStmt, tyTypeDesc,
+    tyNone,
+    tyBool,
+    tyChar,
+    tyEmpty,
+    tyArrayConstr,
+    tyNil,
+    tyExpr,
+    tyStmt,
+    tyTypeDesc,
     tyGenericInvokation, # ``T[a, b]`` for types to invoke
     tyGenericBody,       # ``T[a, b, body]`` last parameter is the body
     tyGenericInst,       # ``T[a, b, realInstance]`` instantiated generic type
@@ -30,15 +38,30 @@ type # This should be he same as ast.TTypeKind
     tyTuple,             # WARNING: The compiler uses tyTuple for pure objects!
     tySet,
     tyRange,
-    tyPtr, tyRef,
+    tyPtr,
+    tyRef,
     tyVar,
     tySequence,
     tyProc,
-    tyPointer, tyOpenArray,
-    tyString, tyCString, tyForward,
-    tyInt, tyInt8, tyInt16, tyInt32, tyInt64,
-    tyFloat, tyFloat32, tyFloat64, tyFloat128,
-    tyUInt, tyUInt8, tyUInt16, tyUInt32, tyUInt64,
+    tyPointer,
+    tyOpenArray,
+    tyString,
+    tyCString,
+    tyForward,
+    tyInt,
+    tyInt8,
+    tyInt16,
+    tyInt32,
+    tyInt64,
+    tyFloat,
+    tyFloat32,
+    tyFloat64,
+    tyFloat128,
+    tyUInt,
+    tyUInt8,
+    tyUInt16,
+    tyUInt32,
+    tyUInt64,
     tyBigNum,
 
   TNimNodeKind = enum nkNone, nkSlot, nkList, nkCase
diff --git a/lib/system/mmdisp.nim b/lib/system/mmdisp.nim
index 1abf3fbbf..9bf9c1e67 100755
--- a/lib/system/mmdisp.nim
+++ b/lib/system/mmdisp.nim
@@ -21,6 +21,7 @@ const
   alwaysGC = false # collect after every memory allocation (for debugging)
   leakDetector = false
   overwriteFree = false
+  trackAllocationSource = leakDetector
   
   cycleGC = true # (de)activate the cycle GC
   reallyDealloc = true # for debugging purposes this can be set to false
@@ -306,7 +307,10 @@ else:
   include "system/cellsets"
   when not leakDetector:
     sysAssert(sizeof(TCell) == sizeof(TFreeCell), "sizeof TFreeCell")
-  include "system/gc"
+  when true:
+    include "system/gc"
+  else:
+    include "system/oldgc"
   
 {.pop.}
 
diff --git a/lib/system/oldgc.nim b/lib/system/oldgc.nim
new file mode 100644
index 000000000..f3b90e6bd
--- /dev/null
+++ b/lib/system/oldgc.nim
@@ -0,0 +1,1044 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2012 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+#            Garbage Collector
+#
+# The basic algorithm is *Deferrent Reference Counting* with cycle detection.
+# This is achieved by combining a Deutsch-Bobrow garbage collector
+# together with Christoper's partial mark-sweep garbage collector.
+#
+# Special care has been taken to avoid recursion as far as possible to avoid
+# stack overflows when traversing deep datastructures. It is well-suited
+# for soft real time applications (like games).
+{.push profiler:off.}
+
+const
+  CycleIncrease = 2 # is a multiplicative increase
+  InitialCycleThreshold = 4*1024*1024 # X MB because cycle checking is slow
+  ZctThreshold = 500  # we collect garbage if the ZCT's size
+                      # reaches this threshold
+                      # this seems to be a good value
+  withRealTime = defined(useRealtimeGC)
+
+when withRealTime and not defined(getTicks):
+  include "system/timers"
+when defined(memProfiler):
+  proc nimProfile(requestedSize: int)
+
+include "system/timers"
+
+const
+  rcIncrement = 0b1000 # so that lowest 3 bits are not touched
+  # NOTE: Most colors are currently unused
+  rcBlack = 0b000  # cell is colored black; in use or free
+  rcGray = 0b001   # possible member of a cycle
+  rcWhite = 0b010  # member of a garbage cycle
+  rcPurple = 0b011 # possible root of a cycle
+  rcZct = 0b100    # in ZCT
+  rcRed = 0b101    # Candidate cycle undergoing sigma-computation
+  rcOrange = 0b110 # Candidate cycle awaiting epoch boundary
+  rcShift = 3      # shift by rcShift to get the reference counter
+  colorMask = 0b111
+type
+  TWalkOp = enum
+    waZctDecRef, waPush, waCycleDecRef
+
+  TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall.}
+    # A ref type can have a finalizer that is called before the object's
+    # storage is freed.
+
+  TGcStat {.final, pure.} = object
+    stackScans: int          # number of performed stack scans (for statistics)
+    cycleCollections: int    # number of performed full collections
+    maxThreshold: int        # max threshold that has been set
+    maxStackSize: int        # max stack size
+    maxStackCells: int       # max stack cells in ``decStack``
+    cycleTableSize: int      # max entries in cycle table  
+    maxPause: int64          # max measured GC pause in nanoseconds
+  
+  TGcHeap {.final, pure.} = object # this contains the zero count and
+                                   # non-zero count table
+    stackBottom: pointer
+    cycleThreshold: int
+    zct: TCellSeq            # the zero count table
+    decStack: TCellSeq       # cells in the stack that are to decref again
+    cycleRoots: TCellSet
+    tempStack: TCellSeq      # temporary stack for recursion elimination
+    recGcLock: int           # prevent recursion via finalizers; no thread lock
+    when withRealTime:
+      maxPause: TNanos       # max allowed pause in nanoseconds; active if > 0
+    region: TMemRegion       # garbage collected region
+    stat: TGcStat
+
+var
+  gch {.rtlThreadVar.}: TGcHeap
+
+when not defined(useNimRtl):
+  InstantiateForRegion(gch.region)
+
+template acquire(gch: TGcHeap) = 
+  when hasThreadSupport and hasSharedHeap:
+    AcquireSys(HeapLock)
+
+template release(gch: TGcHeap) = 
+  when hasThreadSupport and hasSharedHeap:
+    releaseSys(HeapLock)
+
+proc addZCT(s: var TCellSeq, c: PCell) {.noinline.} =
+  if (c.refcount and rcZct) == 0:
+    c.refcount = c.refcount and not colorMask or rcZct
+    add(s, c)
+
+proc cellToUsr(cell: PCell): pointer {.inline.} =
+  # convert object (=pointer to refcount) to pointer to userdata
+  result = cast[pointer](cast[TAddress](cell)+%TAddress(sizeof(TCell)))
+
+proc usrToCell(usr: pointer): PCell {.inline.} =
+  # convert pointer to userdata to object (=pointer to refcount)
+  result = cast[PCell](cast[TAddress](usr)-%TAddress(sizeof(TCell)))
+
+proc canbeCycleRoot(c: PCell): bool {.inline.} =
+  result = ntfAcyclic notin c.typ.flags
+
+proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
+  # used for code generation concerning debugging
+  result = usrToCell(c).typ
+
+proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
+  result = int(usrToCell(p).refcount) shr rcShift
+
+# this that has to equals zero, otherwise we have to round up UnitsPerPage:
+when BitsPerPage mod (sizeof(int)*8) != 0:
+  {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
+
+when debugGC:
+  proc writeCell(msg: CString, c: PCell) =
+    var kind = -1
+    if c.typ != nil: kind = ord(c.typ.kind)
+    when leakDetector:
+      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n",
+                msg, c, kind, c.refcount shr rcShift, c.filename, c.line)
+    else:
+      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n",
+                msg, c, kind, c.refcount shr rcShift)
+
+when traceGC:
+  # traceGC is a special switch to enable extensive debugging
+  type
+    TCellState = enum
+      csAllocated, csZctFreed, csCycFreed
+  var
+    states: array[TCellState, TCellSet]
+
+  proc traceCell(c: PCell, state: TCellState) =
+    case state
+    of csAllocated:
+      if c in states[csAllocated]:
+        writeCell("attempt to alloc an already allocated cell", c)
+        sysAssert(false, "traceCell 1")
+      excl(states[csCycFreed], c)
+      excl(states[csZctFreed], c)
+    of csZctFreed:
+      if c in states[csZctFreed]:
+        writeCell("attempt to free zct cell twice", c)
+        sysAssert(false, "traceCell 2")
+      if c in states[csCycFreed]:
+        writeCell("attempt to free with zct, but already freed with cyc", c)
+        sysAssert(false, "traceCell 3")
+      if c notin states[csAllocated]:
+        writeCell("attempt to free not an allocated cell", c)
+        sysAssert(false, "traceCell 4")
+      excl(states[csAllocated], c)
+    of csCycFreed:
+      if c notin states[csAllocated]:
+        writeCell("attempt to free a not allocated cell", c)
+        sysAssert(false, "traceCell 5")
+      if c in states[csCycFreed]:
+        writeCell("attempt to free cyc cell twice", c)
+        sysAssert(false, "traceCell 6")
+      if c in states[csZctFreed]:
+        writeCell("attempt to free with cyc, but already freed with zct", c)
+        sysAssert(false, "traceCell 7")
+      excl(states[csAllocated], c)
+    incl(states[state], c)
+
+  proc writeLeakage() =
+    var z = 0
+    var y = 0
+    var e = 0
+    for c in elements(states[csAllocated]):
+      inc(e)
+      if c in states[csZctFreed]: inc(z)
+      elif c in states[csCycFreed]: inc(y)
+      else: writeCell("leak", c)
+    cfprintf(cstdout, "Allocations: %ld; ZCT freed: %ld; CYC freed: %ld\n",
+             e, z, y)
+
+template gcTrace(cell, state: expr): stmt {.immediate.} =
+  when traceGC: traceCell(cell, state)
+
+# forward declarations:
+proc collectCT(gch: var TGcHeap)
+proc IsOnStack*(p: pointer): bool {.noinline.}
+proc forAllChildren(cell: PCell, op: TWalkOp)
+proc doOperation(p: pointer, op: TWalkOp)
+proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp)
+# we need the prototype here for debugging purposes
+
+when hasThreadSupport and hasSharedHeap:
+  template `--`(x: expr): expr = atomicDec(x, rcIncrement) <% rcIncrement
+  template `++`(x: expr): stmt = discard atomicInc(x, rcIncrement)
+else:
+  template `--`(x: expr): expr = 
+    Dec(x, rcIncrement)
+    x <% rcIncrement
+  template `++`(x: expr): stmt = Inc(x, rcIncrement)
+
+proc prepareDealloc(cell: PCell) =
+  if cell.typ.finalizer != nil:
+    # the finalizer could invoke something that
+    # allocates memory; this could trigger a garbage
+    # collection. Since we are already collecting we
+    # prevend recursive entering here by a lock.
+    # XXX: we should set the cell's children to nil!
+    inc(gch.recGcLock)
+    (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
+    dec(gch.recGcLock)
+
+proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} = 
+  # we MUST access gch as a global here, because this crosses DLL boundaries!
+  when hasThreadSupport and hasSharedHeap:
+    AcquireSys(HeapLock)
+  incl(gch.cycleRoots, c)
+  when hasThreadSupport and hasSharedHeap:
+    ReleaseSys(HeapLock)
+
+proc rtlAddZCT(c: PCell) {.rtl, inl.} =
+  # we MUST access gch as a global here, because this crosses DLL boundaries!
+  when hasThreadSupport and hasSharedHeap:
+    AcquireSys(HeapLock)
+  addZCT(gch.zct, c)
+  when hasThreadSupport and hasSharedHeap:
+    ReleaseSys(HeapLock)
+
+proc decRef(c: PCell) {.inline.} =
+  sysAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
+  sysAssert(c.refcount >=% rcIncrement, "decRef")
+  if --c.refcount:
+    rtlAddZCT(c)
+  elif canBeCycleRoot(c):
+    # unfortunately this is necessary here too, because a cycle might just
+    # have been broken up and we could recycle it.
+    rtlAddCycleRoot(c) 
+
+proc incRef(c: PCell) {.inline.} = 
+  sysAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
+  ++c.refcount
+  if canBeCycleRoot(c):
+    rtlAddCycleRoot(c)
+
+proc nimGCref(p: pointer) {.compilerProc, inline.} = incRef(usrToCell(p))
+proc nimGCunref(p: pointer) {.compilerProc, inline.} = decRef(usrToCell(p))
+
+proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} =
+  sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle")
+  var c = usrToCell(p)
+  sysAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr")
+  if --c.refcount:
+    rtlAddZCT(c)
+    sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2")
+  sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5")
+
+proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+  # the code generator calls this proc!
+  sysAssert(not isOnStack(dest), "asgnRef")
+  # BUGFIX: first incRef then decRef!
+  if src != nil: incRef(usrToCell(src))
+  if dest[] != nil: decRef(usrToCell(dest[]))
+  dest[] = src
+
+proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+  # the code generator calls this proc if it is known at compile time that no 
+  # cycle is possible.
+  if src != nil:
+    var c = usrToCell(src)
+    ++c.refcount
+  if dest[] != nil: 
+    var c = usrToCell(dest[])
+    if --c.refcount:
+      rtlAddZCT(c)
+  dest[] = src
+
+proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerProc.} =
+  # unsureAsgnRef updates the reference counters only if dest is not on the
+  # stack. It is used by the code generator if it cannot decide wether a
+  # reference is in the stack or not (this can happen for var parameters).
+  if not IsOnStack(dest):
+    if src != nil: incRef(usrToCell(src))
+    # XXX finally use assembler for the stack checking instead!
+    # the test for '!= nil' is correct, but I got tired of the segfaults
+    # resulting from the crappy stack checking:
+    if cast[int](dest[]) >=% PageSize: decRef(usrToCell(dest[]))
+  else:
+    # can't be an interior pointer if it's a stack location!
+    sysAssert(interiorAllocatedPtr(gch.region, dest)==nil, 
+              "stack loc AND interior pointer")
+  dest[] = src
+
+proc initGC() =
+  when not defined(useNimRtl):
+    when traceGC:
+      for i in low(TCellState)..high(TCellState): Init(states[i])
+    gch.cycleThreshold = InitialCycleThreshold
+    gch.stat.stackScans = 0
+    gch.stat.cycleCollections = 0
+    gch.stat.maxThreshold = 0
+    gch.stat.maxStackSize = 0
+    gch.stat.maxStackCells = 0
+    gch.stat.cycleTableSize = 0
+    # init the rt
+    init(gch.zct)
+    init(gch.tempStack)
+    Init(gch.cycleRoots)
+    Init(gch.decStack)
+
+proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
+  var d = cast[TAddress](dest)
+  case n.kind
+  of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
+  of nkList:
+    for i in 0..n.len-1:
+      # inlined for speed
+      if n.sons[i].kind == nkSlot:
+        if n.sons[i].typ.kind in {tyRef, tyString, tySequence}:
+          doOperation(cast[ppointer](d +% n.sons[i].offset)[], op)
+        else:
+          forAllChildrenAux(cast[pointer](d +% n.sons[i].offset), 
+                            n.sons[i].typ, op)
+      else:
+        forAllSlotsAux(dest, n.sons[i], op)
+  of nkCase:
+    var m = selectBranch(dest, n)
+    if m != nil: forAllSlotsAux(dest, m, op)
+  of nkNone: sysAssert(false, "forAllSlotsAux")
+
+proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) =
+  var d = cast[TAddress](dest)
+  if dest == nil: return # nothing to do
+  if ntfNoRefs notin mt.flags:
+    case mt.Kind
+    of tyRef, tyString, tySequence: # leaf:
+      doOperation(cast[ppointer](d)[], op)
+    of tyObject, tyTuple:
+      forAllSlotsAux(dest, mt.node, op)
+    of tyArray, tyArrayConstr, tyOpenArray:
+      for i in 0..(mt.size div mt.base.size)-1:
+        forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op)
+    else: nil
+
+proc forAllChildren(cell: PCell, op: TWalkOp) =
+  sysAssert(cell != nil, "forAllChildren: 1")
+  sysAssert(cell.typ != nil, "forAllChildren: 2")
+  sysAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3"
+  let marker = cell.typ.marker
+  if marker != nil:
+    marker(cellToUsr(cell), op.int)
+  else:
+    case cell.typ.Kind
+    of tyRef: # common case
+      forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
+    of tySequence:
+      var d = cast[TAddress](cellToUsr(cell))
+      var s = cast[PGenericSeq](d)
+      if s != nil:
+        let baseAddr = d +% GenericSeqSize
+        for i in 0..s.len-1:
+          forAllChildrenAux(cast[pointer](baseAddr +% i *% cell.typ.base.size),
+                            cell.typ.base, op)
+    else: nil
+
+proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
+  # we check the last 8 entries (cache line) for a slot that could be reused.
+  # In 63% of all cases we succeed here! But we have to optimize the heck
+  # out of this small linear search so that ``newObj`` is not slowed down.
+  # 
+  # Slots to try          cache hit
+  # 1                     32%
+  # 4                     59%
+  # 8                     63%
+  # 16                    66%
+  # all slots             68%
+  var L = gch.zct.len
+  var d = gch.zct.d
+  when true:
+    # loop unrolled for performance:
+    template replaceZctEntry(i: expr) =
+      c = d[i]
+      if c.refcount >=% rcIncrement:
+        c.refcount = c.refcount and not colorMask
+        d[i] = res
+        return
+    if L > 8:
+      var c: PCell
+      replaceZctEntry(L-1)
+      replaceZctEntry(L-2)
+      replaceZctEntry(L-3)
+      replaceZctEntry(L-4)
+      replaceZctEntry(L-5)
+      replaceZctEntry(L-6)
+      replaceZctEntry(L-7)
+      replaceZctEntry(L-8)
+      add(gch.zct, res)
+    else:
+      d[L] = res
+      inc(gch.zct.len)
+  else:
+    for i in countdown(L-1, max(0, L-8)):
+      var c = d[i]
+      if c.refcount >=% rcIncrement:
+        c.refcount = c.refcount and not colorMask
+        d[i] = res
+        return
+    add(gch.zct, res)
+
+proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
+  # generates a new object and sets its reference counter to 0
+  acquire(gch)
+  sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  collectCT(gch)
+  sysAssert(allocInv(gch.region), "rawNewObj begin")
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
+  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  # now it is buffered in the ZCT
+  res.typ = typ
+  when trackAllocationSource and not hasThreadSupport:
+    if framePtr != nil and framePtr.prev != nil and framePtr.prev.prev != nil:
+      res.filename = framePtr.prev.prev.filename
+      res.line = framePtr.prev.prev.line
+    else:
+      res.filename = "nofile"
+  res.refcount = rcZct # refcount is zero, but mark it to be in the ZCT  
+  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
+  # its refcount is zero, so add it to the ZCT:
+  addNewObjToZCT(res, gch)
+  when logGC: writeCell("new cell", res)
+  gcTrace(res, csAllocated)
+  release(gch)
+  result = cellToUsr(res)
+  sysAssert(allocInv(gch.region), "rawNewObj end")
+
+{.pop.}
+
+proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  result = rawNewObj(typ, size, gch)
+  zeroMem(result, size)
+  when defined(memProfiler): nimProfile(size)
+
+proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
+  # `newObj` already uses locks, so no need for them here.
+  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
+  result = newObj(typ, size)
+  cast[PGenericSeq](result).len = len
+  cast[PGenericSeq](result).reserved = len
+  when defined(memProfiler): nimProfile(size)
+
+proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  # generates a new object and sets its reference counter to 1
+  sysAssert(allocInv(gch.region), "newObjRC1 begin")
+  acquire(gch)
+  sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  collectCT(gch)
+  sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")
+  
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
+  sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
+  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  # now it is buffered in the ZCT
+  res.typ = typ
+  when trackAllocationSource and not hasThreadSupport:
+    if framePtr != nil and framePtr.prev != nil and framePtr.prev.prev != nil:
+      res.filename = framePtr.prev.prev.filename
+      res.line = framePtr.prev.prev.line
+    else:
+      res.filename = "nofile"
+  res.refcount = rcIncrement # refcount is 1
+  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
+  when logGC: writeCell("new cell", res)
+  gcTrace(res, csAllocated)
+  release(gch)
+  result = cellToUsr(res)
+  zeroMem(result, size)
+  sysAssert(allocInv(gch.region), "newObjRC1 end")
+  when defined(memProfiler): nimProfile(size)
+
+proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
+  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
+  result = newObjRC1(typ, size)
+  cast[PGenericSeq](result).len = len
+  cast[PGenericSeq](result).reserved = len
+  when defined(memProfiler): nimProfile(size)
+  
+proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
+  acquire(gch)
+  collectCT(gch)
+  var ol = usrToCell(old)
+  sysAssert(ol.typ != nil, "growObj: 1")
+  sysAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
+  sysAssert(allocInv(gch.region), "growObj begin")
+
+  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell)))
+  var elemSize = 1
+  if ol.typ.kind != tyString: elemSize = ol.typ.base.size
+  
+  var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
+  copyMem(res, ol, oldsize + sizeof(TCell))
+  zeroMem(cast[pointer](cast[TAddress](res)+% oldsize +% sizeof(TCell)),
+          newsize-oldsize)
+  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
+  sysAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
+  #if res.refcount <% rcIncrement:
+  #  add(gch.zct, res)
+  #else: # XXX: what to do here?
+  #  decRef(ol)
+  if (ol.refcount and colorMask) == rcZct:
+    var j = gch.zct.len-1
+    var d = gch.zct.d
+    while j >= 0: 
+      if d[j] == ol:
+        d[j] = res
+        break
+      dec(j)
+  if canBeCycleRoot(ol): excl(gch.cycleRoots, ol)
+  when logGC:
+    writeCell("growObj old cell", ol)
+    writeCell("growObj new cell", res)
+  gcTrace(ol, csZctFreed)
+  gcTrace(res, csAllocated)
+  when reallyDealloc: rawDealloc(gch.region, ol)
+  else:
+    sysAssert(ol.typ != nil, "growObj: 5")
+    zeroMem(ol, sizeof(TCell))
+  release(gch)
+  result = cellToUsr(res)
+  sysAssert(allocInv(gch.region), "growObj end")
+  when defined(memProfiler): nimProfile(newsize-oldsize)
+
+proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
+  result = growObj(old, newsize, gch)
+
+{.push profiler:off.}
+
+# ---------------- cycle collector -------------------------------------------
+
+var
+  decrefs = 0
+  increfs = 0
+  marked = 0
+  collected = 0
+
+proc doOperation(p: pointer, op: TWalkOp) =
+  if p == nil: return
+  var c: PCell = usrToCell(p)
+  sysAssert(c != nil, "doOperation: 1")
+  case op # faster than function pointers because of easy prediction
+  of waZctDecRef:
+    #if not isAllocatedPtr(gch.region, c):
+    #  return
+    #  c_fprintf(c_stdout, "[GC] decref bug: %p", c) 
+    sysAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
+    sysAssert(c.refcount >=% rcIncrement, "doOperation 2")
+    c.refcount = c.refcount -% rcIncrement
+    when logGC: writeCell("decref (from doOperation)", c)
+    if c.refcount <% rcIncrement: addZCT(gch.zct, c)
+  of waPush:
+    add(gch.tempStack, c)
+  of waCycleDecRef:
+    sysAssert(c.refcount >=% rcIncrement, "doOperation 3")
+    c.refcount = c.refcount -% rcIncrement
+    inc decrefs
+
+proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
+  doOperation(d, TWalkOp(op))
+  
+# we now use a much simpler and non-recursive algorithm for cycle removal
+proc collectCycles(gch: var TGcHeap) =
+  var tabSize = 0
+  let tStart = getTicks()
+  decrefs = 0
+  increfs = 0
+  marked = 0
+  collected = 0
+
+  # XXX: acyclic cutoff (specialized marker procs)
+  # short trim cycle roots
+  # long trim with threshold
+  # don't add new objects to both ztc and cycleroots?
+  # leak detector with hash in rawNew / free
+  #
+  for c in elements(gch.cycleRoots):
+    inc(tabSize)
+    forallChildren(c, waCycleDecRef)
+  if tabSize == 0: return
+  gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize)
+
+  c_printf "COLLECT CYCLES: %d\n", tabSize
+  let tAfterMark = getTicks()
+
+  # restore reference counts (a depth-first traversal is needed):
+  var marker: TCellSet
+  Init(marker)
+  for c in elements(gch.cycleRoots):
+    if c.refcount >=% rcIncrement:
+      inc marked
+      if not containsOrIncl(marker, c):
+        gch.tempStack.len = 0
+        forAllChildren(c, waPush)
+        while gch.tempStack.len > 0:
+          dec(gch.tempStack.len)
+          var d = gch.tempStack.d[gch.tempStack.len]
+          d.refcount = d.refcount +% rcIncrement
+          inc increfs
+          if d in gch.cycleRoots and not containsOrIncl(marker, d):
+            forAllChildren(d, waPush)
+  
+  let tAfterScan = getTicks()
+
+  # remove cycles:
+  for c in elements(gch.cycleRoots):
+    if c.refcount <% rcIncrement:
+      inc collected
+      gch.tempStack.len = 0
+      forAllChildren(c, waPush)
+      while gch.tempStack.len > 0:
+        dec(gch.tempStack.len)
+        var d = gch.tempStack.d[gch.tempStack.len]
+        if d.refcount <% rcIncrement:
+          if d notin gch.cycleRoots: # d is leaf of c and not part of cycle
+            addZCT(gch.zct, d)
+            when logGC: writeCell("add to ZCT (from cycle collector)", d)
+      prepareDealloc(c)
+      gcTrace(c, csCycFreed)
+      when logGC: writeCell("cycle collector dealloc cell", c)
+      when reallyDealloc: rawDealloc(gch.region, c)
+      else:
+        sysAssert(c.typ != nil, "collectCycles")
+        zeroMem(c, sizeof(TCell))
+
+  let tFinal = getTicks()
+
+  cprintf "times:\n  mark: %d ms\n  scan: %d ms\n  collect: %d ms\n  decrefs: %d\n  increfs: %d\n  marked: %d\n  collected: %d\n",
+    (tAfterMark - tStart) div 1_000_000,
+    (tAfterScan - tAfterMark) div 1_000_000,
+    (tFinal - tAfterScan) div 1_000_000,
+    decrefs,
+    increfs,
+    marked,
+    collected
+
+  Deinit(gch.cycleRoots)
+  Init(gch.cycleRoots)
+
+var gcDebugging* = false
+var vis*: proc (a: pointer, b: PNimType)
+
+proc debugNode(n: ptr TNimNode) =
+  c_fprintf(c_stdout, "node %s\n", n.name)
+  for i in 0..n.len-1:
+    debugNode(n.sons[i])
+
+proc debugTyp(x: PNimType) =
+  c_fprintf(c_stdout, "type %d\n", x.kind)
+  if x.node != nil:
+    debugNode(x.node)
+
+var seqdbg* : proc (s: PGenericSeq) {.cdecl.}
+
+type
+  TCyclicMode = enum
+    Cyclic,
+    Acyclic,
+    MaybeCyclic
+
+  TReleaseType = enum
+    AddToZTC
+    FreeImmediately
+
+  THeapType = enum
+    LocalHeap
+    SharedHeap
+
+template `++` (rc: TRefCount, heapType: THeapType): stmt =
+  when heapType == SharedHeap:
+    discard atomicInc(rc, rcIncrement)
+  else:
+    inc rc, rcIncrement
+
+template `--`(rc: TRefCount): expr =
+  dec rc, rcIncrement
+  rc <% rcIncrement
+
+template `--` (rc: TRefCount, heapType: THeapType): expr =
+  (when heapType == SharedHeap: atomicDec(rc, rcIncrement) <% rcIncrement
+   else: --rc)
+
+template doDecRef(cc: PCell,
+                  heapType = LocalHeap,
+                  cycleFlag = MaybeCyclic): stmt =
+  var c = cc
+  sysAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
+  # XXX: move this elesewhere
+
+  sysAssert(c.refcount >=% rcIncrement, "decRef")
+  if c.refcount--(heapType):
+    # this is the last reference from the heap
+    # add to a zero-count-table that will be matched against stack pointers
+    rtlAddZCT(c)
+    # writeCell("decref to 0", c)
+  else:
+    when cycleFlag != Acyclic:
+      if cycleFlag == Cyclic or canBeCycleRoot(c):
+        # a cycle may have been broken
+        rtlAddCycleRoot(c)
+
+proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
+  # the addresses are not as cells on the stack, so turn them to cells:
+  sysAssert(allocInv(gch.region), "gcMark begin")
+  var cell = usrToCell(p)
+  var c = cast[TAddress](cell)
+  if c >% PageSize:
+    # fast check: does it look like a cell?
+    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
+    if objStart != nil:
+      # mark the cell:
+      if not gcDebugging:
+        objStart.refcount = objStart.refcount +% rcIncrement
+        add(gch.decStack, objStart)
+    when false:
+      if isAllocatedPtr(gch.region, cell):
+        sysAssert false, "allocated pointer but not interior?"
+        # mark the cell:
+        cell.refcount = cell.refcount +% rcIncrement
+        add(gch.decStack, cell)
+  sysAssert(allocInv(gch.region), "gcMark end")
+
+proc markThreadStacks(gch: var TGcHeap) = 
+  when hasThreadSupport and hasSharedHeap:
+    {.error: "not fully implemented".}
+    var it = threadList
+    while it != nil:
+      # mark registers: 
+      for i in 0 .. high(it.registers): gcMark(gch, it.registers[i])
+      var sp = cast[TAddress](it.stackBottom)
+      var max = cast[TAddress](it.stackTop)
+      # XXX stack direction?
+      # XXX unroll this loop:
+      while sp <=% max:
+        gcMark(gch, cast[ppointer](sp)[])
+        sp = sp +% sizeof(pointer)
+      it = it.next
+
+# ----------------- stack management --------------------------------------
+#  inspired from Smart Eiffel
+
+when defined(sparc):
+  const stackIncreases = false
+elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
+     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
+  const stackIncreases = true
+else:
+  const stackIncreases = false
+
+when not defined(useNimRtl):
+  {.push stack_trace: off.}
+  proc setStackBottom(theStackBottom: pointer) =
+    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
+    # the first init must be the one that defines the stack bottom:
+    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
+    else:
+      var a = cast[TAddress](theStackBottom) # and not PageMask - PageSize*2
+      var b = cast[TAddress](gch.stackBottom)
+      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
+      when stackIncreases:
+        gch.stackBottom = cast[pointer](min(a, b))
+      else:
+        gch.stackBottom = cast[pointer](max(a, b))
+  {.pop.}
+
+proc stackSize(): int {.noinline.} =
+  var stackTop {.volatile.}: pointer
+  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
+
+when defined(sparc): # For SPARC architecture.
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var b = cast[TAddress](gch.stackBottom)
+    var a = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+
+  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
+    when defined(sparcv9):
+      asm  """"flushw \n" """
+    else:
+      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
+
+    var
+      max = gch.stackBottom
+      sp: PPointer
+      stackTop: array[0..1, pointer]
+    sp = addr(stackTop[0])
+    # Addresses decrease as the stack grows.
+    while sp <= max:
+      gcMark(gch, sp[])
+      sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer))
+
+elif defined(ELATE):
+  {.error: "stack marking code is to be written for this architecture".}
+
+elif stackIncreases:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses increase as the stack grows.
+  # ---------------------------------------------------------------------------
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var a = cast[TAddress](gch.stackBottom)
+    var b = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+
+  var
+    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
+      # a little hack to get the size of a TJmpBuf in the generated C code
+      # in a platform independant way
+
+  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
+    var registers: C_JmpBuf
+    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+      var max = cast[TAddress](gch.stackBottom)
+      var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
+      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
+      # otherwise sp would be below the registers structure).
+      while sp >=% max:
+        gcMark(gch, cast[ppointer](sp)[])
+        sp = sp -% sizeof(pointer)
+
+else:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses decrease as the stack grows.
+  # ---------------------------------------------------------------------------
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var b = cast[TAddress](gch.stackBottom)
+    var a = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+
+  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
+    # We use a jmp_buf buffer that is in the C stack.
+    # Used to traverse the stack and registers assuming
+    # that 'setjmp' will save registers in the C stack.
+    type PStackSlice = ptr array [0..7, pointer]
+    var registers: C_JmpBuf
+    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+      var max = cast[TAddress](gch.stackBottom)
+      var sp = cast[TAddress](addr(registers))
+      # loop unrolled:
+      while sp <% max - 8*sizeof(pointer):
+        gcMark(gch, cast[PStackSlice](sp)[0])
+        gcMark(gch, cast[PStackSlice](sp)[1])
+        gcMark(gch, cast[PStackSlice](sp)[2])
+        gcMark(gch, cast[PStackSlice](sp)[3])
+        gcMark(gch, cast[PStackSlice](sp)[4])
+        gcMark(gch, cast[PStackSlice](sp)[5])
+        gcMark(gch, cast[PStackSlice](sp)[6])
+        gcMark(gch, cast[PStackSlice](sp)[7])
+        sp = sp +% sizeof(pointer)*8
+      # last few entries:
+      while sp <=% max:
+        gcMark(gch, cast[ppointer](sp)[])
+        sp = sp +% sizeof(pointer)
+
+# ----------------------------------------------------------------------------
+# end of non-portable code
+# ----------------------------------------------------------------------------
+
+proc CollectZCT(gch: var TGcHeap): bool =
+  # Note: Freeing may add child objects to the ZCT! So essentially we do 
+  # deep freeing, which is bad for incremental operation. In order to 
+  # avoid a deep stack, we move objects to keep the ZCT small.
+  # This is performance critical!
+  const workPackage = 100
+  var L = addr(gch.zct.len)
+  
+  when withRealtime:
+    var steps = workPackage
+    var t0: TTicks
+    if gch.maxPause > 0: t0 = getticks()
+  while L[] > 0:
+    var c = gch.zct.d[0]
+    sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr")
+    # remove from ZCT:
+    sysAssert((c.refcount and rcZct) == rcZct, "collectZCT")
+    
+    c.refcount = c.refcount and not colorMask
+    gch.zct.d[0] = gch.zct.d[L[] - 1]
+    dec(L[])
+    when withRealtime: dec steps
+    if c.refcount <% rcIncrement: 
+      # It may have a RC > 0, if it is in the hardware stack or
+      # it has not been removed yet from the ZCT. This is because
+      # ``incref`` does not bother to remove the cell from the ZCT 
+      # as this might be too slow.
+      # In any case, it should be removed from the ZCT. But not
+      # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
+      if canBeCycleRoot(c): excl(gch.cycleRoots, c)
+      when logGC: writeCell("zct dealloc cell", c)
+      gcTrace(c, csZctFreed)
+      # We are about to free the object, call the finalizer BEFORE its
+      # children are deleted as well, because otherwise the finalizer may
+      # access invalid memory. This is done by prepareDealloc():
+      prepareDealloc(c)
+      forAllChildren(c, waZctDecRef)
+      when reallyDealloc: rawDealloc(gch.region, c)
+      else:
+        sysAssert(c.typ != nil, "collectZCT 2")
+        zeroMem(c, sizeof(TCell))
+    when withRealtime:
+      if steps == 0:
+        steps = workPackage
+        if gch.maxPause > 0:
+          let duration = getticks() - t0
+          # the GC's measuring is not accurate and needs some cleanup actions 
+          # (stack unmarking), so subtract some short amount of time in to
+          # order to miss deadlines less often:
+          if duration >= gch.maxPause - 50_000:
+            return false
+  result = true
+
+proc unmarkStackAndRegisters(gch: var TGcHeap) = 
+  var d = gch.decStack.d
+  for i in 0..gch.decStack.len-1:
+    sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters"
+    # decRef(d[i]) inlined: cannot create a cycle and must not acquire lock
+    var c = d[i]
+    # XXX no need for an atomic dec here:
+    if --c.refcount:
+      addZCT(gch.zct, c)
+    sysAssert c.typ != nil, "unmarkStackAndRegisters 2"
+  gch.decStack.len = 0
+
+proc collectCTBody(gch: var TGcHeap) =
+  when withRealtime:
+    let t0 = getticks()
+  sysAssert(allocInv(gch.region), "collectCT: begin")
+  
+  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+  sysAssert(gch.decStack.len == 0, "collectCT")
+  prepareForInteriorPointerChecking(gch.region)
+  markStackAndRegisters(gch)
+  markThreadStacks(gch)
+  gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
+  inc(gch.stat.stackScans)
+  if collectZCT(gch):
+    when cycleGC:
+      if getOccupiedMem(gch.region) >= gch.cycleThreshold or alwaysCycleGC:
+        collectCycles(gch)
+        discard collectZCT(gch)
+        inc(gch.stat.cycleCollections)
+        gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
+                                 cycleIncrease)
+        gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
+  unmarkStackAndRegisters(gch)
+  sysAssert(allocInv(gch.region), "collectCT: end")
+  
+  when withRealtime:
+    let duration = getticks() - t0
+    gch.stat.maxPause = max(gch.stat.maxPause, duration)
+    when defined(reportMissedDeadlines):
+      if gch.maxPause > 0 and duration > gch.maxPause:
+        c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration)
+
+proc collectCT(gch: var TGcHeap) =
+  if (gch.zct.len >= ZctThreshold or (cycleGC and
+      getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and 
+      gch.recGcLock == 0:
+    collectCTBody(gch)
+
+when withRealtime:
+  proc toNano(x: int): TNanos {.inline.} =
+    result = x * 1000
+
+  proc GC_setMaxPause*(MaxPauseInUs: int) =
+    gch.maxPause = MaxPauseInUs.toNano
+
+  proc GC_step(gch: var TGcHeap, us: int, strongAdvice: bool) =
+    acquire(gch)
+    gch.maxPause = us.toNano
+    if (gch.zct.len >= ZctThreshold or (cycleGC and
+        getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or 
+        strongAdvice:
+      collectCTBody(gch)
+    release(gch)
+
+  proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice)
+
+when not defined(useNimRtl):
+  proc GC_disable() = 
+    when hasThreadSupport and hasSharedHeap:
+      discard atomicInc(gch.recGcLock, 1)
+    else:
+      inc(gch.recGcLock)
+  proc GC_enable() =
+    if gch.recGcLock > 0: 
+      when hasThreadSupport and hasSharedHeap:
+        discard atomicDec(gch.recGcLock, 1)
+      else:
+        dec(gch.recGcLock)
+
+  proc GC_setStrategy(strategy: TGC_Strategy) =
+    case strategy
+    of gcThroughput: nil
+    of gcResponsiveness: nil
+    of gcOptimizeSpace: nil
+    of gcOptimizeTime: nil
+
+  proc GC_enableMarkAndSweep() =
+    gch.cycleThreshold = InitialCycleThreshold
+
+  proc GC_disableMarkAndSweep() =
+    gch.cycleThreshold = high(gch.cycleThreshold)-1
+    # set to the max value to suppress the cycle detector
+
+  proc GC_fullCollect() =
+    acquire(gch)
+    var oldThreshold = gch.cycleThreshold
+    gch.cycleThreshold = 0 # forces cycle collection
+    collectCT(gch)
+    gch.cycleThreshold = oldThreshold
+    release(gch)
+
+  proc GC_getStatistics(): string =
+    GC_disable()
+    result = "[GC] total memory: " & $(getTotalMem()) & "\n" &
+             "[GC] occupied memory: " & $(getOccupiedMem()) & "\n" &
+             "[GC] stack scans: " & $gch.stat.stackScans & "\n" &
+             "[GC] stack cells: " & $gch.stat.maxStackCells & "\n" &
+             "[GC] cycle collections: " & $gch.stat.cycleCollections & "\n" &
+             "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
+             "[GC] zct capacity: " & $gch.zct.cap & "\n" &
+             "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
+             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" &
+             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
+    when traceGC: writeLeakage()
+    GC_enable()
+
+{.pop.}
diff --git a/lib/system/sysstr.nim b/lib/system/sysstr.nim
index 5d2113439..55223d6c6 100755
--- a/lib/system/sysstr.nim
+++ b/lib/system/sysstr.nim
@@ -204,10 +204,21 @@ proc setLengthSeq(seq: PGenericSeq, elemSize, newLen: int): PGenericSeq {.
     # we need to decref here, otherwise the GC leaks!
     when not defined(boehmGC) and not defined(nogc):
       for i in newLen..result.len-1:
+        let len0 = gch.tempStack.len
         forAllChildrenAux(cast[pointer](cast[TAddress](result) +%
                           GenericSeqSize +% (i*%elemSize)),
-                          extGetCellType(result).base, waZctDecRef)
-    # and set the memory to nil:
+                          extGetCellType(result).base, waPush)
+        let len1 = gch.tempStack.len
+        for i in len0 .. <len1:
+          doDecRef(gch.tempStack.d[i], LocalHeap, MaybeCyclic)
+        gch.tempStack.len = len0
+                          
+    # XXX: zeroing out the memory can still result in crashes if a wiped-out
+    # cell is aliased by another pointer (ie proc paramter or a let variable).
+    # This is a tought problem, because even if we don't zeroMem here, in the
+    # presense of user defined destructors, the user will expect the cell to be
+    # "destroyed" thus creating the same problem. We can destoy the cell in the
+    # finalizer of the sequence, but this makes destruction non-deterministic.
     zeroMem(cast[pointer](cast[TAddress](result) +% GenericSeqSize +%
            (newLen*%elemSize)), (result.len-%newLen) *% elemSize)
   result.len = newLen
diff --git a/lib/system/timers.nim b/lib/system/timers.nim
index 0166c1e3f..fa1a13a5f 100644
--- a/lib/system/timers.nim
+++ b/lib/system/timers.nim
@@ -44,10 +44,11 @@ elif defined(macosx):
 
   proc getTicks(): TTicks {.inline.} =
     result = TTicks(mach_absolute_time())
-
+  
+  var timeBaseInfo: TMachTimebaseInfoData
+  mach_timebase_info(timeBaseInfo)
+    
   proc `-`(a, b: TTicks): TNanos =
-    var timeBaseInfo: TMachTimebaseInfoData
-    mach_timebase_info(timeBaseInfo)
     result = (a.int64 - b.int64)  * timeBaseInfo.numer div timeBaseInfo.denom
 
 elif defined(posixRealtime):
diff --git a/lib/wrappers/openssl.nim b/lib/wrappers/openssl.nim
index f692db13a..438774a15 100755
--- a/lib/wrappers/openssl.nim
+++ b/lib/wrappers/openssl.nim
@@ -49,9 +49,9 @@ else:
   const
     versions = "(|.1.0.0|.0.9.9|.0.9.8|.0.9.7|.0.9.6|.0.9.5|.0.9.4)"
   when defined(macosx):
-    const 
-      DLLSSLName = "libssl.dylib" & versions
-      DLLUtilName = "libcrypto.dylib" & versions
+    const
+      DLLSSLName = "libssl" & versions & ".dylib"
+      DLLUtilName = "libcrypto" & versions & ".dylib"
   else:
     const 
       DLLSSLName = "libssl.so" & versions