summary refs log tree commit diff stats
path: root/lib/system/oldgc.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/system/oldgc.nim')
-rw-r--r--lib/system/oldgc.nim1044
1 files changed, 1044 insertions, 0 deletions
diff --git a/lib/system/oldgc.nim b/lib/system/oldgc.nim
new file mode 100644
index 000000000..f3b90e6bd
--- /dev/null
+++ b/lib/system/oldgc.nim
@@ -0,0 +1,1044 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2012 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+#            Garbage Collector
+#
+# The basic algorithm is *Deferrent Reference Counting* with cycle detection.
+# This is achieved by combining a Deutsch-Bobrow garbage collector
+# together with Christoper's partial mark-sweep garbage collector.
+#
+# Special care has been taken to avoid recursion as far as possible to avoid
+# stack overflows when traversing deep datastructures. It is well-suited
+# for soft real time applications (like games).
+{.push profiler:off.}
+
+const
+  CycleIncrease = 2 # is a multiplicative increase
+  InitialCycleThreshold = 4*1024*1024 # X MB because cycle checking is slow
+  ZctThreshold = 500  # we collect garbage if the ZCT's size
+                      # reaches this threshold
+                      # this seems to be a good value
+  withRealTime = defined(useRealtimeGC)
+
+when withRealTime and not defined(getTicks):
+  include "system/timers"
+when defined(memProfiler):
+  proc nimProfile(requestedSize: int)
+
+include "system/timers"
+
+const
+  rcIncrement = 0b1000 # so that lowest 3 bits are not touched
+  # NOTE: Most colors are currently unused
+  rcBlack = 0b000  # cell is colored black; in use or free
+  rcGray = 0b001   # possible member of a cycle
+  rcWhite = 0b010  # member of a garbage cycle
+  rcPurple = 0b011 # possible root of a cycle
+  rcZct = 0b100    # in ZCT
+  rcRed = 0b101    # Candidate cycle undergoing sigma-computation
+  rcOrange = 0b110 # Candidate cycle awaiting epoch boundary
+  rcShift = 3      # shift by rcShift to get the reference counter
+  colorMask = 0b111
+type
+  TWalkOp = enum
+    waZctDecRef, waPush, waCycleDecRef
+
+  TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall.}
+    # A ref type can have a finalizer that is called before the object's
+    # storage is freed.
+
+  TGcStat {.final, pure.} = object
+    stackScans: int          # number of performed stack scans (for statistics)
+    cycleCollections: int    # number of performed full collections
+    maxThreshold: int        # max threshold that has been set
+    maxStackSize: int        # max stack size
+    maxStackCells: int       # max stack cells in ``decStack``
+    cycleTableSize: int      # max entries in cycle table  
+    maxPause: int64          # max measured GC pause in nanoseconds
+  
+  TGcHeap {.final, pure.} = object # this contains the zero count and
+                                   # non-zero count table
+    stackBottom: pointer
+    cycleThreshold: int
+    zct: TCellSeq            # the zero count table
+    decStack: TCellSeq       # cells in the stack that are to decref again
+    cycleRoots: TCellSet
+    tempStack: TCellSeq      # temporary stack for recursion elimination
+    recGcLock: int           # prevent recursion via finalizers; no thread lock
+    when withRealTime:
+      maxPause: TNanos       # max allowed pause in nanoseconds; active if > 0
+    region: TMemRegion       # garbage collected region
+    stat: TGcStat
+
+var
+  gch {.rtlThreadVar.}: TGcHeap
+
+when not defined(useNimRtl):
+  InstantiateForRegion(gch.region)
+
+template acquire(gch: TGcHeap) = 
+  when hasThreadSupport and hasSharedHeap:
+    AcquireSys(HeapLock)
+
+template release(gch: TGcHeap) = 
+  when hasThreadSupport and hasSharedHeap:
+    releaseSys(HeapLock)
+
+proc addZCT(s: var TCellSeq, c: PCell) {.noinline.} =
+  if (c.refcount and rcZct) == 0:
+    c.refcount = c.refcount and not colorMask or rcZct
+    add(s, c)
+
+proc cellToUsr(cell: PCell): pointer {.inline.} =
+  # convert object (=pointer to refcount) to pointer to userdata
+  result = cast[pointer](cast[TAddress](cell)+%TAddress(sizeof(TCell)))
+
+proc usrToCell(usr: pointer): PCell {.inline.} =
+  # convert pointer to userdata to object (=pointer to refcount)
+  result = cast[PCell](cast[TAddress](usr)-%TAddress(sizeof(TCell)))
+
+proc canbeCycleRoot(c: PCell): bool {.inline.} =
+  result = ntfAcyclic notin c.typ.flags
+
+proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
+  # used for code generation concerning debugging
+  result = usrToCell(c).typ
+
+proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
+  result = int(usrToCell(p).refcount) shr rcShift
+
+# this that has to equals zero, otherwise we have to round up UnitsPerPage:
+when BitsPerPage mod (sizeof(int)*8) != 0:
+  {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
+
+when debugGC:
+  proc writeCell(msg: CString, c: PCell) =
+    var kind = -1
+    if c.typ != nil: kind = ord(c.typ.kind)
+    when leakDetector:
+      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n",
+                msg, c, kind, c.refcount shr rcShift, c.filename, c.line)
+    else:
+      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n",
+                msg, c, kind, c.refcount shr rcShift)
+
+when traceGC:
+  # traceGC is a special switch to enable extensive debugging
+  type
+    TCellState = enum
+      csAllocated, csZctFreed, csCycFreed
+  var
+    states: array[TCellState, TCellSet]
+
+  proc traceCell(c: PCell, state: TCellState) =
+    case state
+    of csAllocated:
+      if c in states[csAllocated]:
+        writeCell("attempt to alloc an already allocated cell", c)
+        sysAssert(false, "traceCell 1")
+      excl(states[csCycFreed], c)
+      excl(states[csZctFreed], c)
+    of csZctFreed:
+      if c in states[csZctFreed]:
+        writeCell("attempt to free zct cell twice", c)
+        sysAssert(false, "traceCell 2")
+      if c in states[csCycFreed]:
+        writeCell("attempt to free with zct, but already freed with cyc", c)
+        sysAssert(false, "traceCell 3")
+      if c notin states[csAllocated]:
+        writeCell("attempt to free not an allocated cell", c)
+        sysAssert(false, "traceCell 4")
+      excl(states[csAllocated], c)
+    of csCycFreed:
+      if c notin states[csAllocated]:
+        writeCell("attempt to free a not allocated cell", c)
+        sysAssert(false, "traceCell 5")
+      if c in states[csCycFreed]:
+        writeCell("attempt to free cyc cell twice", c)
+        sysAssert(false, "traceCell 6")
+      if c in states[csZctFreed]:
+        writeCell("attempt to free with cyc, but already freed with zct", c)
+        sysAssert(false, "traceCell 7")
+      excl(states[csAllocated], c)
+    incl(states[state], c)
+
+  proc writeLeakage() =
+    var z = 0
+    var y = 0
+    var e = 0
+    for c in elements(states[csAllocated]):
+      inc(e)
+      if c in states[csZctFreed]: inc(z)
+      elif c in states[csCycFreed]: inc(y)
+      else: writeCell("leak", c)
+    cfprintf(cstdout, "Allocations: %ld; ZCT freed: %ld; CYC freed: %ld\n",
+             e, z, y)
+
+template gcTrace(cell, state: expr): stmt {.immediate.} =
+  when traceGC: traceCell(cell, state)
+
+# forward declarations:
+proc collectCT(gch: var TGcHeap)
+proc IsOnStack*(p: pointer): bool {.noinline.}
+proc forAllChildren(cell: PCell, op: TWalkOp)
+proc doOperation(p: pointer, op: TWalkOp)
+proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp)
+# we need the prototype here for debugging purposes
+
+when hasThreadSupport and hasSharedHeap:
+  template `--`(x: expr): expr = atomicDec(x, rcIncrement) <% rcIncrement
+  template `++`(x: expr): stmt = discard atomicInc(x, rcIncrement)
+else:
+  template `--`(x: expr): expr = 
+    Dec(x, rcIncrement)
+    x <% rcIncrement
+  template `++`(x: expr): stmt = Inc(x, rcIncrement)
+
+proc prepareDealloc(cell: PCell) =
+  if cell.typ.finalizer != nil:
+    # the finalizer could invoke something that
+    # allocates memory; this could trigger a garbage
+    # collection. Since we are already collecting we
+    # prevend recursive entering here by a lock.
+    # XXX: we should set the cell's children to nil!
+    inc(gch.recGcLock)
+    (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
+    dec(gch.recGcLock)
+
+proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} = 
+  # we MUST access gch as a global here, because this crosses DLL boundaries!
+  when hasThreadSupport and hasSharedHeap:
+    AcquireSys(HeapLock)
+  incl(gch.cycleRoots, c)
+  when hasThreadSupport and hasSharedHeap:
+    ReleaseSys(HeapLock)
+
+proc rtlAddZCT(c: PCell) {.rtl, inl.} =
+  # we MUST access gch as a global here, because this crosses DLL boundaries!
+  when hasThreadSupport and hasSharedHeap:
+    AcquireSys(HeapLock)
+  addZCT(gch.zct, c)
+  when hasThreadSupport and hasSharedHeap:
+    ReleaseSys(HeapLock)
+
+proc decRef(c: PCell) {.inline.} =
+  sysAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
+  sysAssert(c.refcount >=% rcIncrement, "decRef")
+  if --c.refcount:
+    rtlAddZCT(c)
+  elif canBeCycleRoot(c):
+    # unfortunately this is necessary here too, because a cycle might just
+    # have been broken up and we could recycle it.
+    rtlAddCycleRoot(c) 
+
+proc incRef(c: PCell) {.inline.} = 
+  sysAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
+  ++c.refcount
+  if canBeCycleRoot(c):
+    rtlAddCycleRoot(c)
+
+proc nimGCref(p: pointer) {.compilerProc, inline.} = incRef(usrToCell(p))
+proc nimGCunref(p: pointer) {.compilerProc, inline.} = decRef(usrToCell(p))
+
+proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} =
+  sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle")
+  var c = usrToCell(p)
+  sysAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr")
+  if --c.refcount:
+    rtlAddZCT(c)
+    sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2")
+  sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5")
+
+proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+  # the code generator calls this proc!
+  sysAssert(not isOnStack(dest), "asgnRef")
+  # BUGFIX: first incRef then decRef!
+  if src != nil: incRef(usrToCell(src))
+  if dest[] != nil: decRef(usrToCell(dest[]))
+  dest[] = src
+
+proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+  # the code generator calls this proc if it is known at compile time that no 
+  # cycle is possible.
+  if src != nil:
+    var c = usrToCell(src)
+    ++c.refcount
+  if dest[] != nil: 
+    var c = usrToCell(dest[])
+    if --c.refcount:
+      rtlAddZCT(c)
+  dest[] = src
+
+proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerProc.} =
+  # unsureAsgnRef updates the reference counters only if dest is not on the
+  # stack. It is used by the code generator if it cannot decide wether a
+  # reference is in the stack or not (this can happen for var parameters).
+  if not IsOnStack(dest):
+    if src != nil: incRef(usrToCell(src))
+    # XXX finally use assembler for the stack checking instead!
+    # the test for '!= nil' is correct, but I got tired of the segfaults
+    # resulting from the crappy stack checking:
+    if cast[int](dest[]) >=% PageSize: decRef(usrToCell(dest[]))
+  else:
+    # can't be an interior pointer if it's a stack location!
+    sysAssert(interiorAllocatedPtr(gch.region, dest)==nil, 
+              "stack loc AND interior pointer")
+  dest[] = src
+
+proc initGC() =
+  when not defined(useNimRtl):
+    when traceGC:
+      for i in low(TCellState)..high(TCellState): Init(states[i])
+    gch.cycleThreshold = InitialCycleThreshold
+    gch.stat.stackScans = 0
+    gch.stat.cycleCollections = 0
+    gch.stat.maxThreshold = 0
+    gch.stat.maxStackSize = 0
+    gch.stat.maxStackCells = 0
+    gch.stat.cycleTableSize = 0
+    # init the rt
+    init(gch.zct)
+    init(gch.tempStack)
+    Init(gch.cycleRoots)
+    Init(gch.decStack)
+
+proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
+  var d = cast[TAddress](dest)
+  case n.kind
+  of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
+  of nkList:
+    for i in 0..n.len-1:
+      # inlined for speed
+      if n.sons[i].kind == nkSlot:
+        if n.sons[i].typ.kind in {tyRef, tyString, tySequence}:
+          doOperation(cast[ppointer](d +% n.sons[i].offset)[], op)
+        else:
+          forAllChildrenAux(cast[pointer](d +% n.sons[i].offset), 
+                            n.sons[i].typ, op)
+      else:
+        forAllSlotsAux(dest, n.sons[i], op)
+  of nkCase:
+    var m = selectBranch(dest, n)
+    if m != nil: forAllSlotsAux(dest, m, op)
+  of nkNone: sysAssert(false, "forAllSlotsAux")
+
+proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) =
+  var d = cast[TAddress](dest)
+  if dest == nil: return # nothing to do
+  if ntfNoRefs notin mt.flags:
+    case mt.Kind
+    of tyRef, tyString, tySequence: # leaf:
+      doOperation(cast[ppointer](d)[], op)
+    of tyObject, tyTuple:
+      forAllSlotsAux(dest, mt.node, op)
+    of tyArray, tyArrayConstr, tyOpenArray:
+      for i in 0..(mt.size div mt.base.size)-1:
+        forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op)
+    else: nil
+
+proc forAllChildren(cell: PCell, op: TWalkOp) =
+  sysAssert(cell != nil, "forAllChildren: 1")
+  sysAssert(cell.typ != nil, "forAllChildren: 2")
+  sysAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3"
+  let marker = cell.typ.marker
+  if marker != nil:
+    marker(cellToUsr(cell), op.int)
+  else:
+    case cell.typ.Kind
+    of tyRef: # common case
+      forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
+    of tySequence:
+      var d = cast[TAddress](cellToUsr(cell))
+      var s = cast[PGenericSeq](d)
+      if s != nil:
+        let baseAddr = d +% GenericSeqSize
+        for i in 0..s.len-1:
+          forAllChildrenAux(cast[pointer](baseAddr +% i *% cell.typ.base.size),
+                            cell.typ.base, op)
+    else: nil
+
+proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
+  # we check the last 8 entries (cache line) for a slot that could be reused.
+  # In 63% of all cases we succeed here! But we have to optimize the heck
+  # out of this small linear search so that ``newObj`` is not slowed down.
+  # 
+  # Slots to try          cache hit
+  # 1                     32%
+  # 4                     59%
+  # 8                     63%
+  # 16                    66%
+  # all slots             68%
+  var L = gch.zct.len
+  var d = gch.zct.d
+  when true:
+    # loop unrolled for performance:
+    template replaceZctEntry(i: expr) =
+      c = d[i]
+      if c.refcount >=% rcIncrement:
+        c.refcount = c.refcount and not colorMask
+        d[i] = res
+        return
+    if L > 8:
+      var c: PCell
+      replaceZctEntry(L-1)
+      replaceZctEntry(L-2)
+      replaceZctEntry(L-3)
+      replaceZctEntry(L-4)
+      replaceZctEntry(L-5)
+      replaceZctEntry(L-6)
+      replaceZctEntry(L-7)
+      replaceZctEntry(L-8)
+      add(gch.zct, res)
+    else:
+      d[L] = res
+      inc(gch.zct.len)
+  else:
+    for i in countdown(L-1, max(0, L-8)):
+      var c = d[i]
+      if c.refcount >=% rcIncrement:
+        c.refcount = c.refcount and not colorMask
+        d[i] = res
+        return
+    add(gch.zct, res)
+
+proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
+  # generates a new object and sets its reference counter to 0
+  acquire(gch)
+  sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  collectCT(gch)
+  sysAssert(allocInv(gch.region), "rawNewObj begin")
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
+  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  # now it is buffered in the ZCT
+  res.typ = typ
+  when trackAllocationSource and not hasThreadSupport:
+    if framePtr != nil and framePtr.prev != nil and framePtr.prev.prev != nil:
+      res.filename = framePtr.prev.prev.filename
+      res.line = framePtr.prev.prev.line
+    else:
+      res.filename = "nofile"
+  res.refcount = rcZct # refcount is zero, but mark it to be in the ZCT  
+  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
+  # its refcount is zero, so add it to the ZCT:
+  addNewObjToZCT(res, gch)
+  when logGC: writeCell("new cell", res)
+  gcTrace(res, csAllocated)
+  release(gch)
+  result = cellToUsr(res)
+  sysAssert(allocInv(gch.region), "rawNewObj end")
+
+{.pop.}
+
+proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  result = rawNewObj(typ, size, gch)
+  zeroMem(result, size)
+  when defined(memProfiler): nimProfile(size)
+
+proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
+  # `newObj` already uses locks, so no need for them here.
+  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
+  result = newObj(typ, size)
+  cast[PGenericSeq](result).len = len
+  cast[PGenericSeq](result).reserved = len
+  when defined(memProfiler): nimProfile(size)
+
+proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  # generates a new object and sets its reference counter to 1
+  sysAssert(allocInv(gch.region), "newObjRC1 begin")
+  acquire(gch)
+  sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  collectCT(gch)
+  sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")
+  
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
+  sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
+  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  # now it is buffered in the ZCT
+  res.typ = typ
+  when trackAllocationSource and not hasThreadSupport:
+    if framePtr != nil and framePtr.prev != nil and framePtr.prev.prev != nil:
+      res.filename = framePtr.prev.prev.filename
+      res.line = framePtr.prev.prev.line
+    else:
+      res.filename = "nofile"
+  res.refcount = rcIncrement # refcount is 1
+  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
+  when logGC: writeCell("new cell", res)
+  gcTrace(res, csAllocated)
+  release(gch)
+  result = cellToUsr(res)
+  zeroMem(result, size)
+  sysAssert(allocInv(gch.region), "newObjRC1 end")
+  when defined(memProfiler): nimProfile(size)
+
+proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
+  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
+  result = newObjRC1(typ, size)
+  cast[PGenericSeq](result).len = len
+  cast[PGenericSeq](result).reserved = len
+  when defined(memProfiler): nimProfile(size)
+  
+proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
+  acquire(gch)
+  collectCT(gch)
+  var ol = usrToCell(old)
+  sysAssert(ol.typ != nil, "growObj: 1")
+  sysAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
+  sysAssert(allocInv(gch.region), "growObj begin")
+
+  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell)))
+  var elemSize = 1
+  if ol.typ.kind != tyString: elemSize = ol.typ.base.size
+  
+  var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
+  copyMem(res, ol, oldsize + sizeof(TCell))
+  zeroMem(cast[pointer](cast[TAddress](res)+% oldsize +% sizeof(TCell)),
+          newsize-oldsize)
+  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
+  sysAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
+  #if res.refcount <% rcIncrement:
+  #  add(gch.zct, res)
+  #else: # XXX: what to do here?
+  #  decRef(ol)
+  if (ol.refcount and colorMask) == rcZct:
+    var j = gch.zct.len-1
+    var d = gch.zct.d
+    while j >= 0: 
+      if d[j] == ol:
+        d[j] = res
+        break
+      dec(j)
+  if canBeCycleRoot(ol): excl(gch.cycleRoots, ol)
+  when logGC:
+    writeCell("growObj old cell", ol)
+    writeCell("growObj new cell", res)
+  gcTrace(ol, csZctFreed)
+  gcTrace(res, csAllocated)
+  when reallyDealloc: rawDealloc(gch.region, ol)
+  else:
+    sysAssert(ol.typ != nil, "growObj: 5")
+    zeroMem(ol, sizeof(TCell))
+  release(gch)
+  result = cellToUsr(res)
+  sysAssert(allocInv(gch.region), "growObj end")
+  when defined(memProfiler): nimProfile(newsize-oldsize)
+
+proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
+  result = growObj(old, newsize, gch)
+
+{.push profiler:off.}
+
+# ---------------- cycle collector -------------------------------------------
+
+var
+  decrefs = 0
+  increfs = 0
+  marked = 0
+  collected = 0
+
+proc doOperation(p: pointer, op: TWalkOp) =
+  if p == nil: return
+  var c: PCell = usrToCell(p)
+  sysAssert(c != nil, "doOperation: 1")
+  case op # faster than function pointers because of easy prediction
+  of waZctDecRef:
+    #if not isAllocatedPtr(gch.region, c):
+    #  return
+    #  c_fprintf(c_stdout, "[GC] decref bug: %p", c) 
+    sysAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
+    sysAssert(c.refcount >=% rcIncrement, "doOperation 2")
+    c.refcount = c.refcount -% rcIncrement
+    when logGC: writeCell("decref (from doOperation)", c)
+    if c.refcount <% rcIncrement: addZCT(gch.zct, c)
+  of waPush:
+    add(gch.tempStack, c)
+  of waCycleDecRef:
+    sysAssert(c.refcount >=% rcIncrement, "doOperation 3")
+    c.refcount = c.refcount -% rcIncrement
+    inc decrefs
+
+proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
+  doOperation(d, TWalkOp(op))
+  
+# we now use a much simpler and non-recursive algorithm for cycle removal
+proc collectCycles(gch: var TGcHeap) =
+  var tabSize = 0
+  let tStart = getTicks()
+  decrefs = 0
+  increfs = 0
+  marked = 0
+  collected = 0
+
+  # XXX: acyclic cutoff (specialized marker procs)
+  # short trim cycle roots
+  # long trim with threshold
+  # don't add new objects to both ztc and cycleroots?
+  # leak detector with hash in rawNew / free
+  #
+  for c in elements(gch.cycleRoots):
+    inc(tabSize)
+    forallChildren(c, waCycleDecRef)
+  if tabSize == 0: return
+  gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize)
+
+  c_printf "COLLECT CYCLES: %d\n", tabSize
+  let tAfterMark = getTicks()
+
+  # restore reference counts (a depth-first traversal is needed):
+  var marker: TCellSet
+  Init(marker)
+  for c in elements(gch.cycleRoots):
+    if c.refcount >=% rcIncrement:
+      inc marked
+      if not containsOrIncl(marker, c):
+        gch.tempStack.len = 0
+        forAllChildren(c, waPush)
+        while gch.tempStack.len > 0:
+          dec(gch.tempStack.len)
+          var d = gch.tempStack.d[gch.tempStack.len]
+          d.refcount = d.refcount +% rcIncrement
+          inc increfs
+          if d in gch.cycleRoots and not containsOrIncl(marker, d):
+            forAllChildren(d, waPush)
+  
+  let tAfterScan = getTicks()
+
+  # remove cycles:
+  for c in elements(gch.cycleRoots):
+    if c.refcount <% rcIncrement:
+      inc collected
+      gch.tempStack.len = 0
+      forAllChildren(c, waPush)
+      while gch.tempStack.len > 0:
+        dec(gch.tempStack.len)
+        var d = gch.tempStack.d[gch.tempStack.len]
+        if d.refcount <% rcIncrement:
+          if d notin gch.cycleRoots: # d is leaf of c and not part of cycle
+            addZCT(gch.zct, d)
+            when logGC: writeCell("add to ZCT (from cycle collector)", d)
+      prepareDealloc(c)
+      gcTrace(c, csCycFreed)
+      when logGC: writeCell("cycle collector dealloc cell", c)
+      when reallyDealloc: rawDealloc(gch.region, c)
+      else:
+        sysAssert(c.typ != nil, "collectCycles")
+        zeroMem(c, sizeof(TCell))
+
+  let tFinal = getTicks()
+
+  cprintf "times:\n  mark: %d ms\n  scan: %d ms\n  collect: %d ms\n  decrefs: %d\n  increfs: %d\n  marked: %d\n  collected: %d\n",
+    (tAfterMark - tStart) div 1_000_000,
+    (tAfterScan - tAfterMark) div 1_000_000,
+    (tFinal - tAfterScan) div 1_000_000,
+    decrefs,
+    increfs,
+    marked,
+    collected
+
+  Deinit(gch.cycleRoots)
+  Init(gch.cycleRoots)
+
+var gcDebugging* = false
+var vis*: proc (a: pointer, b: PNimType)
+
+proc debugNode(n: ptr TNimNode) =
+  c_fprintf(c_stdout, "node %s\n", n.name)
+  for i in 0..n.len-1:
+    debugNode(n.sons[i])
+
+proc debugTyp(x: PNimType) =
+  c_fprintf(c_stdout, "type %d\n", x.kind)
+  if x.node != nil:
+    debugNode(x.node)
+
+var seqdbg* : proc (s: PGenericSeq) {.cdecl.}
+
+type
+  TCyclicMode = enum
+    Cyclic,
+    Acyclic,
+    MaybeCyclic
+
+  TReleaseType = enum
+    AddToZTC
+    FreeImmediately
+
+  THeapType = enum
+    LocalHeap
+    SharedHeap
+
+template `++` (rc: TRefCount, heapType: THeapType): stmt =
+  when heapType == SharedHeap:
+    discard atomicInc(rc, rcIncrement)
+  else:
+    inc rc, rcIncrement
+
+template `--`(rc: TRefCount): expr =
+  dec rc, rcIncrement
+  rc <% rcIncrement
+
+template `--` (rc: TRefCount, heapType: THeapType): expr =
+  (when heapType == SharedHeap: atomicDec(rc, rcIncrement) <% rcIncrement
+   else: --rc)
+
+template doDecRef(cc: PCell,
+                  heapType = LocalHeap,
+                  cycleFlag = MaybeCyclic): stmt =
+  var c = cc
+  sysAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
+  # XXX: move this elesewhere
+
+  sysAssert(c.refcount >=% rcIncrement, "decRef")
+  if c.refcount--(heapType):
+    # this is the last reference from the heap
+    # add to a zero-count-table that will be matched against stack pointers
+    rtlAddZCT(c)
+    # writeCell("decref to 0", c)
+  else:
+    when cycleFlag != Acyclic:
+      if cycleFlag == Cyclic or canBeCycleRoot(c):
+        # a cycle may have been broken
+        rtlAddCycleRoot(c)
+
+proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
+  # the addresses are not as cells on the stack, so turn them to cells:
+  sysAssert(allocInv(gch.region), "gcMark begin")
+  var cell = usrToCell(p)
+  var c = cast[TAddress](cell)
+  if c >% PageSize:
+    # fast check: does it look like a cell?
+    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
+    if objStart != nil:
+      # mark the cell:
+      if not gcDebugging:
+        objStart.refcount = objStart.refcount +% rcIncrement
+        add(gch.decStack, objStart)
+    when false:
+      if isAllocatedPtr(gch.region, cell):
+        sysAssert false, "allocated pointer but not interior?"
+        # mark the cell:
+        cell.refcount = cell.refcount +% rcIncrement
+        add(gch.decStack, cell)
+  sysAssert(allocInv(gch.region), "gcMark end")
+
+proc markThreadStacks(gch: var TGcHeap) = 
+  when hasThreadSupport and hasSharedHeap:
+    {.error: "not fully implemented".}
+    var it = threadList
+    while it != nil:
+      # mark registers: 
+      for i in 0 .. high(it.registers): gcMark(gch, it.registers[i])
+      var sp = cast[TAddress](it.stackBottom)
+      var max = cast[TAddress](it.stackTop)
+      # XXX stack direction?
+      # XXX unroll this loop:
+      while sp <=% max:
+        gcMark(gch, cast[ppointer](sp)[])
+        sp = sp +% sizeof(pointer)
+      it = it.next
+
+# ----------------- stack management --------------------------------------
+#  inspired from Smart Eiffel
+
+when defined(sparc):
+  const stackIncreases = false
+elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
+     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
+  const stackIncreases = true
+else:
+  const stackIncreases = false
+
+when not defined(useNimRtl):
+  {.push stack_trace: off.}
+  proc setStackBottom(theStackBottom: pointer) =
+    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
+    # the first init must be the one that defines the stack bottom:
+    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
+    else:
+      var a = cast[TAddress](theStackBottom) # and not PageMask - PageSize*2
+      var b = cast[TAddress](gch.stackBottom)
+      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
+      when stackIncreases:
+        gch.stackBottom = cast[pointer](min(a, b))
+      else:
+        gch.stackBottom = cast[pointer](max(a, b))
+  {.pop.}
+
+proc stackSize(): int {.noinline.} =
+  var stackTop {.volatile.}: pointer
+  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
+
+when defined(sparc): # For SPARC architecture.
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var b = cast[TAddress](gch.stackBottom)
+    var a = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+
+  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
+    when defined(sparcv9):
+      asm  """"flushw \n" """
+    else:
+      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
+
+    var
+      max = gch.stackBottom
+      sp: PPointer
+      stackTop: array[0..1, pointer]
+    sp = addr(stackTop[0])
+    # Addresses decrease as the stack grows.
+    while sp <= max:
+      gcMark(gch, sp[])
+      sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer))
+
+elif defined(ELATE):
+  {.error: "stack marking code is to be written for this architecture".}
+
+elif stackIncreases:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses increase as the stack grows.
+  # ---------------------------------------------------------------------------
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var a = cast[TAddress](gch.stackBottom)
+    var b = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+
+  var
+    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
+      # a little hack to get the size of a TJmpBuf in the generated C code
+      # in a platform independant way
+
+  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
+    var registers: C_JmpBuf
+    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+      var max = cast[TAddress](gch.stackBottom)
+      var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
+      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
+      # otherwise sp would be below the registers structure).
+      while sp >=% max:
+        gcMark(gch, cast[ppointer](sp)[])
+        sp = sp -% sizeof(pointer)
+
+else:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses decrease as the stack grows.
+  # ---------------------------------------------------------------------------
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var b = cast[TAddress](gch.stackBottom)
+    var a = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+
+  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
+    # We use a jmp_buf buffer that is in the C stack.
+    # Used to traverse the stack and registers assuming
+    # that 'setjmp' will save registers in the C stack.
+    type PStackSlice = ptr array [0..7, pointer]
+    var registers: C_JmpBuf
+    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+      var max = cast[TAddress](gch.stackBottom)
+      var sp = cast[TAddress](addr(registers))
+      # loop unrolled:
+      while sp <% max - 8*sizeof(pointer):
+        gcMark(gch, cast[PStackSlice](sp)[0])
+        gcMark(gch, cast[PStackSlice](sp)[1])
+        gcMark(gch, cast[PStackSlice](sp)[2])
+        gcMark(gch, cast[PStackSlice](sp)[3])
+        gcMark(gch, cast[PStackSlice](sp)[4])
+        gcMark(gch, cast[PStackSlice](sp)[5])
+        gcMark(gch, cast[PStackSlice](sp)[6])
+        gcMark(gch, cast[PStackSlice](sp)[7])
+        sp = sp +% sizeof(pointer)*8
+      # last few entries:
+      while sp <=% max:
+        gcMark(gch, cast[ppointer](sp)[])
+        sp = sp +% sizeof(pointer)
+
+# ----------------------------------------------------------------------------
+# end of non-portable code
+# ----------------------------------------------------------------------------
+
+proc CollectZCT(gch: var TGcHeap): bool =
+  # Note: Freeing may add child objects to the ZCT! So essentially we do 
+  # deep freeing, which is bad for incremental operation. In order to 
+  # avoid a deep stack, we move objects to keep the ZCT small.
+  # This is performance critical!
+  const workPackage = 100
+  var L = addr(gch.zct.len)
+  
+  when withRealtime:
+    var steps = workPackage
+    var t0: TTicks
+    if gch.maxPause > 0: t0 = getticks()
+  while L[] > 0:
+    var c = gch.zct.d[0]
+    sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr")
+    # remove from ZCT:
+    sysAssert((c.refcount and rcZct) == rcZct, "collectZCT")
+    
+    c.refcount = c.refcount and not colorMask
+    gch.zct.d[0] = gch.zct.d[L[] - 1]
+    dec(L[])
+    when withRealtime: dec steps
+    if c.refcount <% rcIncrement: 
+      # It may have a RC > 0, if it is in the hardware stack or
+      # it has not been removed yet from the ZCT. This is because
+      # ``incref`` does not bother to remove the cell from the ZCT 
+      # as this might be too slow.
+      # In any case, it should be removed from the ZCT. But not
+      # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
+      if canBeCycleRoot(c): excl(gch.cycleRoots, c)
+      when logGC: writeCell("zct dealloc cell", c)
+      gcTrace(c, csZctFreed)
+      # We are about to free the object, call the finalizer BEFORE its
+      # children are deleted as well, because otherwise the finalizer may
+      # access invalid memory. This is done by prepareDealloc():
+      prepareDealloc(c)
+      forAllChildren(c, waZctDecRef)
+      when reallyDealloc: rawDealloc(gch.region, c)
+      else:
+        sysAssert(c.typ != nil, "collectZCT 2")
+        zeroMem(c, sizeof(TCell))
+    when withRealtime:
+      if steps == 0:
+        steps = workPackage
+        if gch.maxPause > 0:
+          let duration = getticks() - t0
+          # the GC's measuring is not accurate and needs some cleanup actions 
+          # (stack unmarking), so subtract some short amount of time in to
+          # order to miss deadlines less often:
+          if duration >= gch.maxPause - 50_000:
+            return false
+  result = true
+
+proc unmarkStackAndRegisters(gch: var TGcHeap) = 
+  var d = gch.decStack.d
+  for i in 0..gch.decStack.len-1:
+    sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters"
+    # decRef(d[i]) inlined: cannot create a cycle and must not acquire lock
+    var c = d[i]
+    # XXX no need for an atomic dec here:
+    if --c.refcount:
+      addZCT(gch.zct, c)
+    sysAssert c.typ != nil, "unmarkStackAndRegisters 2"
+  gch.decStack.len = 0
+
+proc collectCTBody(gch: var TGcHeap) =
+  when withRealtime:
+    let t0 = getticks()
+  sysAssert(allocInv(gch.region), "collectCT: begin")
+  
+  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+  sysAssert(gch.decStack.len == 0, "collectCT")
+  prepareForInteriorPointerChecking(gch.region)
+  markStackAndRegisters(gch)
+  markThreadStacks(gch)
+  gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
+  inc(gch.stat.stackScans)
+  if collectZCT(gch):
+    when cycleGC:
+      if getOccupiedMem(gch.region) >= gch.cycleThreshold or alwaysCycleGC:
+        collectCycles(gch)
+        discard collectZCT(gch)
+        inc(gch.stat.cycleCollections)
+        gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
+                                 cycleIncrease)
+        gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
+  unmarkStackAndRegisters(gch)
+  sysAssert(allocInv(gch.region), "collectCT: end")
+  
+  when withRealtime:
+    let duration = getticks() - t0
+    gch.stat.maxPause = max(gch.stat.maxPause, duration)
+    when defined(reportMissedDeadlines):
+      if gch.maxPause > 0 and duration > gch.maxPause:
+        c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration)
+
+proc collectCT(gch: var TGcHeap) =
+  if (gch.zct.len >= ZctThreshold or (cycleGC and
+      getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and 
+      gch.recGcLock == 0:
+    collectCTBody(gch)
+
+when withRealtime:
+  proc toNano(x: int): TNanos {.inline.} =
+    result = x * 1000
+
+  proc GC_setMaxPause*(MaxPauseInUs: int) =
+    gch.maxPause = MaxPauseInUs.toNano
+
+  proc GC_step(gch: var TGcHeap, us: int, strongAdvice: bool) =
+    acquire(gch)
+    gch.maxPause = us.toNano
+    if (gch.zct.len >= ZctThreshold or (cycleGC and
+        getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or 
+        strongAdvice:
+      collectCTBody(gch)
+    release(gch)
+
+  proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice)
+
+when not defined(useNimRtl):
+  proc GC_disable() = 
+    when hasThreadSupport and hasSharedHeap:
+      discard atomicInc(gch.recGcLock, 1)
+    else:
+      inc(gch.recGcLock)
+  proc GC_enable() =
+    if gch.recGcLock > 0: 
+      when hasThreadSupport and hasSharedHeap:
+        discard atomicDec(gch.recGcLock, 1)
+      else:
+        dec(gch.recGcLock)
+
+  proc GC_setStrategy(strategy: TGC_Strategy) =
+    case strategy
+    of gcThroughput: nil
+    of gcResponsiveness: nil
+    of gcOptimizeSpace: nil
+    of gcOptimizeTime: nil
+
+  proc GC_enableMarkAndSweep() =
+    gch.cycleThreshold = InitialCycleThreshold
+
+  proc GC_disableMarkAndSweep() =
+    gch.cycleThreshold = high(gch.cycleThreshold)-1
+    # set to the max value to suppress the cycle detector
+
+  proc GC_fullCollect() =
+    acquire(gch)
+    var oldThreshold = gch.cycleThreshold
+    gch.cycleThreshold = 0 # forces cycle collection
+    collectCT(gch)
+    gch.cycleThreshold = oldThreshold
+    release(gch)
+
+  proc GC_getStatistics(): string =
+    GC_disable()
+    result = "[GC] total memory: " & $(getTotalMem()) & "\n" &
+             "[GC] occupied memory: " & $(getOccupiedMem()) & "\n" &
+             "[GC] stack scans: " & $gch.stat.stackScans & "\n" &
+             "[GC] stack cells: " & $gch.stat.maxStackCells & "\n" &
+             "[GC] cycle collections: " & $gch.stat.cycleCollections & "\n" &
+             "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
+             "[GC] zct capacity: " & $gch.zct.cap & "\n" &
+             "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
+             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" &
+             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
+    when traceGC: writeLeakage()
+    GC_enable()
+
+{.pop.}