summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAraq <rumpf_a@web.de>2015-11-29 14:55:12 +0100
committerAraq <rumpf_a@web.de>2015-12-01 00:53:30 +0100
commita1739455d39507bb27c3a125d97ff9870037f772 (patch)
tree57ec4404edf051ae03af798f0a49432850fdd026
parentcc6d9f69b4813f08809a223e967e4c00c8874144 (diff)
downloadNim-a1739455d39507bb27c3a125d97ff9870037f772.tar.gz
first version of the new hard realtime GC
-rw-r--r--lib/system/gc2.nim1483
1 files changed, 478 insertions, 1005 deletions
diff --git a/lib/system/gc2.nim b/lib/system/gc2.nim
index 4ca0d144f..78901b1dc 100644
--- a/lib/system/gc2.nim
+++ b/lib/system/gc2.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -9,13 +9,13 @@
 
 #            Garbage Collector
 #
-# The basic algorithm is *Deferrent Reference Counting* with cycle detection.
-# This is achieved by combining a Deutsch-Bobrow garbage collector
-# together with Christoper's partial mark-sweep garbage collector.
-#
-# Special care has been taken to avoid recursion as far as possible to avoid
-# stack overflows when traversing deep datastructures. It is well-suited
-# for soft real time applications (like games).
+# The basic algorithm is *Deferred Reference Counting* with an incremental mark
+# and sweep GC to free cycles. It is hard realtime in that if you play
+# according to its rules, no deadline will ever be missed.
+
+when defined(nimCoroutines):
+  import arch
+
 {.push profiler:off.}
 
 const
@@ -29,82 +29,30 @@ const
 when withRealTime and not declared(getTicks):
   include "system/timers"
 when defined(memProfiler):
-  proc nimProfile(requestedSize: int)
+  proc nimProfile(requestedSize: int) {.benign.}
 
 const
-  rcShift = 6 # the reference count is shifted so we can use
-              # the least significat bits for additinal flags:
-
-  rcAlive = 0b00000           # object is reachable.
-                              # color *black* in the original paper
-
-  rcCycleCandidate = 0b00001  # possible root of a cycle. *purple*
-
-  rcDecRefApplied = 0b00010   # the first dec-ref phase of the
-                              # collector was already applied to this
-                              # object. *gray*
-
-  rcMaybeDead = 0b00011       # this object is a candidate for deletion
-                              # during the collect cycles algorithm.
-                              # *white*.
-
-  rcReallyDead = 0b00100      # this is proved to be garbage
-
-  rcRetiredBuffer = 0b00101   # this is a seq or string buffer that
-                              # was replaced by a resize operation.
-                              # see growObj for details
-
-  rcColorMask = RefCount(0b00111)
-
-  rcZct = 0b01000             # already added to ZCT
-  rcInCycleRoots = 0b10000    # already buffered as cycle candidate
-  rcHasStackRef = 0b100000    # the object had a stack ref in the last
-                              # cycle collection
-
-  rcMarkBit = rcHasStackRef   # this is currently used for leak detection
-                              # when traceGC is on
-
-  rcBufferedAnywhere = rcZct or rcInCycleRoots
-
-  rcIncrement = 1 shl rcShift # don't touch the color bits
-
-const
-  NewObjectsAreCycleRoots = true
-    # the alternative is to use the old strategy of adding cycle roots
-    # in incRef (in the compiler itself, this doesn't change much)
-
-  IncRefRemovesCandidates = false
-    # this is safe only if we can reliably track the fact that the object
-    # has stack references. This could be easily done by adding another bit
-    # to the refcount field and setting it up in unmarkStackAndRegisters.
-    # The bit must also be set for new objects that are not rc1 and it must be
-    # examined in the decref loop in collectCycles.
-    # XXX: not implemented yet as tests didn't show any improvement from this
-
-  MarkingSkipsAcyclicObjects = true
-    # Acyclic objects can be safely ignored in the mark and scan phases,
-    # because they cannot contribute to the internal count.
-    # XXX: if we generate specialized `markCyclic` and `markAcyclic`
-    # procs we can further optimize this as there won't be need for any
-    # checks in the code
-
-  MinimumStackMarking = false
-    # Try to scan only the user stack and ignore the part of the stack
-    # belonging to the GC itself. see setStackTop for further info.
-    # XXX: still has problems in release mode in the compiler itself.
-    # investigate how it affects growObj
-
-  CollectCyclesStats = false
-
+  rcIncrement = 0b1000 # so that lowest 3 bits are not touched
+  rcWhite = 0b000  # cell is colored white
+  rcGrey = 0b001   # traditional color for incremental mark&sweep
+  rcBlack = 0b010  # marked as alive; direct children also have been processed
+  rcUnused = 0b011
+  ZctFlag = 0b100  # in ZCT
+  rcShift = 3      # shift by rcShift to get the reference counter
+  colorMask = 0b011
 type
   WalkOp = enum
-    waPush
+    waMarkGlobal,    # part of the backup mark&sweep
+    waMarkGrey,
+    waZctDecRef #, waDebug
 
-  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall.}
+  Phase {.pure.} = enum
+    None, Marking, Sweeping
+  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
     # A ref type can have a finalizer that is called before the object's
     # storage is freed.
 
-  GcStat {.final, pure.} = object
+  GcStat = object
     stackScans: int          # number of performed stack scans (for statistics)
     cycleCollections: int    # number of performed full collections
     maxThreshold: int        # max threshold that has been set
@@ -113,134 +61,76 @@ type
     cycleTableSize: int      # max entries in cycle table
     maxPause: int64          # max measured GC pause in nanoseconds
 
-  GcHeap {.final, pure.} = object # this contains the zero count and
-                                   # non-zero count table
+  GcStack = object
+    prev: ptr GcStack
+    next: ptr GcStack
+    starts: pointer
+    pos: pointer
+    maxStackSize: int
+
+  GcHeap = object # this contains the zero count and
+                  # non-zero count table
+    stack: ptr GcStack
     stackBottom: pointer
-    stackTop: pointer
+    phase: Phase
     cycleThreshold: int
+    when useCellIds:
+      idGenerator: int
     zct: CellSeq             # the zero count table
     decStack: CellSeq        # cells in the stack that are to decref again
-    cycleRoots: CellSeq
-    tempStack: CellSeq       # temporary stack for recursion elimination
-    freeStack: CellSeq       # objects ready to be freed
+    greyStack: CellSeq
     recGcLock: int           # prevent recursion via finalizers; no thread lock
-    cycleRootsTrimIdx: int   # Trimming is a light-weight collection of the
-                             # cycle roots table that uses a cheap linear scan
-                             # to find only possitively dead objects.
-                             # One strategy is to perform it only for new objects
-                             # allocated between the invocations of collectZCT.
-                             # This index indicates the start of the range of
-                             # such new objects within the table.
     when withRealTime:
       maxPause: Nanos        # max allowed pause in nanoseconds; active if > 0
     region: MemRegion        # garbage collected region
     stat: GcStat
-{.deprecated: [TWalkOp: WalkOp, TFinalizer: Finalizer, TGcStat: GcStat,
-              TGcHeap: GcHeap].}
+    additionalRoots: CellSeq # dummy roots for GC_ref/unref
+
 var
-  gch* {.rtlThreadVar.}: GcHeap
+  gch {.rtlThreadVar.}: GcHeap
 
 when not defined(useNimRtl):
   instantiateForRegion(gch.region)
 
-template acquire(gch: GcHeap) =
-  when hasThreadSupport and hasSharedHeap:
-    AcquireSys(HeapLock)
-
-template release(gch: GcHeap) =
-  when hasThreadSupport and hasSharedHeap:
-    releaseSys(HeapLock)
-
-template setColor(c: PCell, color) =
-  c.refcount = (c.refcount and not rcColorMask) or color
-
-template color(c: PCell): expr =
-  c.refcount and rcColorMask
-
-template isBitDown(c: PCell, bit): expr =
-  (c.refcount and bit) == 0
-
-template isBitUp(c: PCell, bit): expr =
-  (c.refcount and bit) != 0
-
-template setBit(c: PCell, bit): expr =
-  c.refcount = c.refcount or bit
-
-template isDead(c: Pcell): expr =
-  c.isBitUp(rcReallyDead) # also covers rcRetiredBuffer
-
-template clearBit(c: PCell, bit): expr =
-  c.refcount = c.refcount and (not RefCount(bit))
-
-when debugGC:
-  var gcCollectionIdx = 0
-
-  proc colorStr(c: PCell): cstring =
-    let color = c.color
-    case color
-    of rcAlive: return "alive"
-    of rcMaybeDead: return "maybedead"
-    of rcCycleCandidate: return "candidate"
-    of rcDecRefApplied: return "marked"
-    of rcRetiredBuffer: return "retired"
-    of rcReallyDead: return "dead"
-    else: return "unknown?"
-
-  proc inCycleRootsStr(c: PCell): cstring =
-    if c.isBitUp(rcInCycleRoots): result = "cycleroot"
-    else: result = ""
-
-  proc inZctStr(c: PCell): cstring =
-    if c.isBitUp(rcZct): result = "zct"
-    else: result = ""
-
-  proc writeCell*(msg: CString, c: PCell, force = false) =
-    var kind = -1
-    if c.typ != nil: kind = ord(c.typ.kind)
-    when trackAllocationSource:
-      c_fprintf(c_stdout, "[GC %d] %s: %p %d rc=%ld %s %s %s from %s(%ld)\n",
-                gcCollectionIdx,
-                msg, c, kind, c.refcount shr rcShift,
-                c.colorStr, c.inCycleRootsStr, c.inZctStr,
-                c.filename, c.line)
-    else:
-      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n",
-                msg, c, kind, c.refcount shr rcShift)
-
-proc addZCT(zct: var CellSeq, c: PCell) {.noinline.} =
-  if c.isBitDown(rcZct):
-    c.setBit rcZct
-    zct.add c
-
-template setStackTop(gch) =
-  # This must be called immediately after we enter the GC code
-  # to minimize the size of the scanned stack. The stack consumed
-  # by the GC procs may amount to 200-400 bytes depending on the
-  # build settings and this contributes to false-positives
-  # in the conservative stack marking
-  when MinimumStackMarking:
-    var stackTop {.volatile.}: pointer
-    gch.stackTop = addr(stackTop)
-
-template addCycleRoot(cycleRoots: var CellSeq, c: PCell) =
-  if c.color != rcCycleCandidate:
-    c.setColor rcCycleCandidate
-
-    # the object may be buffered already. for example, consider:
-    # decref; incref; decref
-    if c.isBitDown(rcInCycleRoots):
-      c.setBit rcInCycleRoots
-      cycleRoots.add c
+proc initGC() =
+  when not defined(useNimRtl):
+    when traceGC:
+      for i in low(CellState)..high(CellState): init(states[i])
+    gch.cycleThreshold = InitialCycleThreshold
+    gch.stat.stackScans = 0
+    gch.stat.cycleCollections = 0
+    gch.stat.maxThreshold = 0
+    gch.stat.maxStackSize = 0
+    gch.stat.maxStackCells = 0
+    gch.stat.cycleTableSize = 0
+    # init the rt
+    init(gch.zct)
+    init(gch.decStack)
+    init(gch.additionalRoots)
+    init(gch.greyStack)
+
+template gcAssert(cond: bool, msg: string) =
+  when defined(useGcAssert):
+    if not cond:
+      echo "[GCASSERT] ", msg
+      GC_disable()
+      writeStackTrace()
+      quit 1
+
+proc addZCT(s: var CellSeq, c: PCell) {.noinline.} =
+  if (c.refcount and ZctFlag) == 0:
+    c.refcount = c.refcount or ZctFlag
+    add(s, c)
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
   result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell)))
 
-proc usrToCell*(usr: pointer): PCell {.inline.} =
+proc usrToCell(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
   result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
 
-proc canbeCycleRoot(c: PCell): bool {.inline.} =
+proc canBeCycleRoot(c: PCell): bool {.inline.} =
   result = ntfAcyclic notin c.typ.flags
 
 proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
@@ -254,14 +144,43 @@ proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
 when BitsPerPage mod (sizeof(int)*8) != 0:
   {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
 
+template color(c): expr = c.refCount and colorMask
+template setColor(c, col) =
+  when col == rcWhite:
+    c.refcount = c.refcount and not colorMask
+  else:
+    c.refcount = c.refcount and not colorMask or col
+
+proc writeCell(msg: cstring, c: PCell) =
+  var kind = -1
+  if c.typ != nil: kind = ord(c.typ.kind)
+  when leakDetector:
+    c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n",
+              msg, c, kind, c.refcount shr rcShift, c.filename, c.line)
+  else:
+    c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld; color=%ld\n",
+              msg, c, kind, c.refcount shr rcShift, c.color)
+
+template gcTrace(cell, state: expr): stmt {.immediate.} =
+  when traceGC: traceCell(cell, state)
+
 # forward declarations:
-proc collectCT(gch: var GcHeap)
-proc isOnStack*(p: pointer): bool {.noinline.}
-proc forAllChildren(cell: PCell, op: WalkOp)
-proc doOperation(p: pointer, op: WalkOp)
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp)
+proc collectCT(gch: var GcHeap) {.benign.}
+proc isOnStack(p: pointer): bool {.noinline, benign.}
+proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
+proc doOperation(p: pointer, op: WalkOp) {.benign.}
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
 # we need the prototype here for debugging purposes
 
+when hasThreadSupport and hasSharedHeap:
+  template `--`(x: expr): expr = atomicDec(x, rcIncrement) <% rcIncrement
+  template `++`(x: expr): stmt = discard atomicInc(x, rcIncrement)
+else:
+  template `--`(x: expr): expr =
+    dec(x, rcIncrement)
+    x <% rcIncrement
+  template `++`(x: expr): stmt = inc(x, rcIncrement)
+
 proc prepareDealloc(cell: PCell) =
   if cell.typ.finalizer != nil:
     # the finalizer could invoke something that
@@ -273,246 +192,127 @@ proc prepareDealloc(cell: PCell) =
     (cast[Finalizer](cell.typ.finalizer))(cellToUsr(cell))
     dec(gch.recGcLock)
 
-when traceGC:
-  # traceGC is a special switch to enable extensive debugging
-  type
-    CellState = enum
-      csAllocated, csFreed
-  {.deprecated: [TCellState: CellState].}
-  var
-    states: array[CellState, CellSet]
-
-  proc traceCell(c: PCell, state: CellState) =
-    case state
-    of csAllocated:
-      if c in states[csAllocated]:
-        writeCell("attempt to alloc an already allocated cell", c)
-        sysAssert(false, "traceCell 1")
-      excl(states[csFreed], c)
-      # writecell("allocated", c)
-    of csFreed:
-      if c in states[csFreed]:
-        writeCell("attempt to free a cell twice", c)
-        sysAssert(false, "traceCell 2")
-      if c notin states[csAllocated]:
-        writeCell("attempt to free not an allocated cell", c)
-        sysAssert(false, "traceCell 3")
-      excl(states[csAllocated], c)
-      # writecell("freed", c)
-    incl(states[state], c)
-
-  proc computeCellWeight(c: PCell): int =
-    var x: CellSet
-    x.init
-
-    let startLen = gch.tempStack.len
-    c.forAllChildren waPush
-
-    while startLen != gch.tempStack.len:
-      dec gch.tempStack.len
-      var c = gch.tempStack.d[gch.tempStack.len]
-      if c in states[csFreed]: continue
-      inc result
-      if c notin x:
-        x.incl c
-        c.forAllChildren waPush
-
-  template markChildrenRec(cell) =
-    let startLen = gch.tempStack.len
-    cell.forAllChildren waPush
-    let isMarked = cell.isBitUp(rcMarkBit)
-    while startLen != gch.tempStack.len:
-      dec gch.tempStack.len
-      var c = gch.tempStack.d[gch.tempStack.len]
-      if c in states[csFreed]: continue
-      if c.isBitDown(rcMarkBit):
-        c.setBit rcMarkBit
-        c.forAllChildren waPush
-    if c.isBitUp(rcMarkBit) and not isMarked:
-      writecell("cyclic cell", cell)
-      cprintf "Weight %d\n", cell.computeCellWeight
-
-  proc writeLeakage(onlyRoots: bool) =
-    if onlyRoots:
-      for c in elements(states[csAllocated]):
-        if c notin states[csFreed]:
-          markChildrenRec(c)
-    var f = 0
-    var a = 0
-    for c in elements(states[csAllocated]):
-      inc a
-      if c in states[csFreed]: inc f
-      elif c.isBitDown(rcMarkBit):
-        writeCell("leak", c)
-        cprintf "Weight %d\n", c.computeCellWeight
-    cfprintf(cstdout, "Allocations: %ld; freed: %ld\n", a, f)
-
-template gcTrace(cell, state: expr): stmt {.immediate.} =
-  when logGC: writeCell($state, cell)
-  when traceGC: traceCell(cell, state)
-
-template WithHeapLock(blk: stmt): stmt =
-  when hasThreadSupport and hasSharedHeap: AcquireSys(HeapLock)
-  blk
-  when hasThreadSupport and hasSharedHeap: ReleaseSys(HeapLock)
-
 proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} =
   # we MUST access gch as a global here, because this crosses DLL boundaries!
-  WithHeapLock: addCycleRoot(gch.cycleRoots, c)
+  discard
 
 proc rtlAddZCT(c: PCell) {.rtl, inl.} =
   # we MUST access gch as a global here, because this crosses DLL boundaries!
-  WithHeapLock: addZCT(gch.zct, c)
+  addZCT(gch.zct, c)
 
-type
-  CyclicMode = enum
-    Cyclic,
-    Acyclic,
-    MaybeCyclic
-
-  ReleaseType = enum
-    AddToZTC
-    FreeImmediately
-
-  HeapType = enum
-    LocalHeap
-    SharedHeap
-{.deprecated: [TCyclicMode: CyclicMode, TReleaseType: ReleaseType,
-              THeapType: HeapType].}
-
-template `++` (rc: RefCount, heapType: HeapType): stmt =
-  when heapType == SharedHeap:
-    discard atomicInc(rc, rcIncrement)
-  else:
-    inc rc, rcIncrement
-
-template `--`(rc: RefCount): expr =
-  dec rc, rcIncrement
-  rc <% rcIncrement
-
-template `--` (rc: RefCount, heapType: HeapType): expr =
-  (when heapType == SharedHeap: atomicDec(rc, rcIncrement) <% rcIncrement else: --rc)
-
-template doDecRef(cc: PCell,
-                  heapType = LocalHeap,
-                  cycleFlag = MaybeCyclic): stmt =
-  var c = cc
-  sysAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
-  # XXX: move this elesewhere
-
-  sysAssert(c.refcount >=% rcIncrement, "decRef")
-  if c.refcount--(heapType):
-    # this is the last reference from the heap
-    # add to a zero-count-table that will be matched against stack pointers
+proc decRef(c: PCell) {.inline.} =
+  gcAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
+  gcAssert(c.refcount >=% rcIncrement, "decRef")
+  if --c.refcount:
     rtlAddZCT(c)
-  else:
-    when cycleFlag != Acyclic:
-      if cycleFlag == Cyclic or canBeCycleRoot(c):
-        # a cycle may have been broken
-        rtlAddCycleRoot(c)
-
-template doIncRef(cc: PCell,
-                 heapType = LocalHeap,
-                 cycleFlag = MaybeCyclic): stmt =
-  var c = cc
-  c.refcount++(heapType)
-  when cycleFlag != Acyclic:
-    when NewObjectsAreCycleRoots:
-      if canbeCycleRoot(c):
-        addCycleRoot(gch.cycleRoots, c)
-    elif IncRefRemovesCandidates:
-      c.setColor rcAlive
-  # XXX: this is not really atomic enough!
-
-proc nimGCref(p: pointer) {.compilerProc, inline.} = doIncRef(usrToCell(p))
-proc nimGCunref(p: pointer) {.compilerProc, inline.} = doDecRef(usrToCell(p))
+
+proc incRef(c: PCell) {.inline.} =
+  gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
+  c.refcount = c.refcount +% rcIncrement
+
+proc nimGCref(p: pointer) {.compilerProc.} =
+  let cell = usrToCell(p)
+  incRef(cell)
+  add(gch.additionalRoots, cell)
+
+proc nimGCunref(p: pointer) {.compilerProc.} =
+  let cell = usrToCell(p)
+  decRef(cell)
+  var L = gch.additionalRoots.len-1
+  var i = L
+  let d = gch.additionalRoots.d
+  while i >= 0:
+    if d[i] == cell:
+      d[i] = d[L]
+      dec gch.additionalRoots.len
+      break
+    dec(i)
+
+template markGrey(x: PCell) =
+  if x.color == rcWhite and gch.phase == Phase.Marking:
+    x.setColor(rcGrey)
+    add(gch.greyStack, x)
+
+proc GC_addCycleRoot*[T](p: ref T) {.inline.} =
+  ## adds 'p' to the cycle candidate set for the cycle collector. It is
+  ## necessary if you used the 'acyclic' pragma for optimization
+  ## purposes and need to break cycles manually.
+  rtlAddCycleRoot(usrToCell(cast[pointer](p)))
 
 proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} =
   sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle")
   var c = usrToCell(p)
-  sysAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr")
-  if c.refcount--(LocalHeap):
+  gcAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr")
+  if --c.refcount:
     rtlAddZCT(c)
     sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2")
   sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5")
 
-template doAsgnRef(dest: PPointer, src: pointer,
-                  heapType = LocalHeap, cycleFlag = MaybeCyclic): stmt =
-  sysAssert(not isOnStack(dest), "asgnRef")
-  # BUGFIX: first incRef then decRef!
-  if src != nil: doIncRef(usrToCell(src), heapType, cycleFlag)
-  if dest[] != nil: doDecRef(usrToCell(dest[]), heapType, cycleFlag)
-  dest[] = src
-
 proc asgnRef(dest: PPointer, src: pointer) {.compilerProc, inline.} =
   # the code generator calls this proc!
-  doAsgnRef(dest, src, LocalHeap, MaybeCyclic)
+  gcAssert(not isOnStack(dest), "asgnRef")
+  # BUGFIX: first incRef then decRef!
+  if src != nil:
+    let s = usrToCell(src)
+    incRef(s)
+    markGrey(s)
+  if dest[] != nil: decRef(usrToCell(dest[]))
+  dest[] = src
 
 proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerProc, inline.} =
   # the code generator calls this proc if it is known at compile time that no
   # cycle is possible.
-  doAsgnRef(dest, src, LocalHeap, Acyclic)
+  gcAssert(not isOnStack(dest), "asgnRefNoCycle")
+  if src != nil:
+    var c = usrToCell(src)
+    ++c.refcount
+    markGrey(c)
+  if dest[] != nil:
+    var c = usrToCell(dest[])
+    if --c.refcount:
+      rtlAddZCT(c)
+  dest[] = src
 
 proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerProc.} =
   # unsureAsgnRef updates the reference counters only if dest is not on the
   # stack. It is used by the code generator if it cannot decide wether a
   # reference is in the stack or not (this can happen for var parameters).
   if not isOnStack(dest):
-    if src != nil: doIncRef(usrToCell(src))
-    # XXX we must detect a shared heap here
-    # better idea may be to just eliminate the need for unsureAsgnRef
-    #
+    if src != nil:
+      let s = usrToCell(src)
+      incRef(s)
+      markGrey(s)
     # XXX finally use assembler for the stack checking instead!
     # the test for '!= nil' is correct, but I got tired of the segfaults
     # resulting from the crappy stack checking:
-    if cast[int](dest[]) >=% PageSize: doDecRef(usrToCell(dest[]))
+    if cast[int](dest[]) >=% PageSize: decRef(usrToCell(dest[]))
   else:
     # can't be an interior pointer if it's a stack location!
-    sysAssert(interiorAllocatedPtr(gch.region, dest)==nil,
-              "stack loc AND interior pointer")
+    gcAssert(interiorAllocatedPtr(gch.region, dest) == nil,
+             "stack loc AND interior pointer")
   dest[] = src
 
-when hasThreadSupport and hasSharedHeap:
-  # shared heap version of the above procs
-  proc asgnRefSh(dest: PPointer, src: pointer) {.compilerProc, inline.} =
-    doAsgnRef(dest, src, SharedHeap, MaybeCyclic)
-
-  proc asgnRefNoCycleSh(dest: PPointer, src: pointer) {.compilerProc, inline.} =
-    doAsgnRef(dest, src, SharedHeap, Acyclic)
+type
+  GlobalMarkerProc = proc () {.nimcall, benign.}
+var
+  globalMarkersLen: int
+  globalMarkers: array[0.. 7_000, GlobalMarkerProc]
 
-proc initGC() =
-  when not defined(useNimRtl):
-    when traceGC:
-      for i in low(CellState)..high(CellState): init(states[i])
-    gch.cycleThreshold = InitialCycleThreshold
-    gch.stat.stackScans = 0
-    gch.stat.cycleCollections = 0
-    gch.stat.maxThreshold = 0
-    gch.stat.maxStackSize = 0
-    gch.stat.maxStackCells = 0
-    gch.stat.cycleTableSize = 0
-    # init the rt
-    init(gch.zct)
-    init(gch.tempStack)
-    init(gch.freeStack)
-    init(gch.cycleRoots)
-    init(gch.decStack)
+proc nimRegisterGlobalMarker(markerProc: GlobalMarkerProc) {.compilerProc.} =
+  if globalMarkersLen <= high(globalMarkers):
+    globalMarkers[globalMarkersLen] = markerProc
+    inc globalMarkersLen
+  else:
+    echo "[GC] cannot register global variable; too many global variables"
+    quit 1
 
-proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) =
+proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
   var d = cast[ByteAddress](dest)
   case n.kind
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
   of nkList:
     for i in 0..n.len-1:
-      # inlined for speed
-      if n.sons[i].kind == nkSlot:
-        if n.sons[i].typ.kind in {tyRef, tyString, tySequence}:
-          doOperation(cast[PPointer](d +% n.sons[i].offset)[], op)
-        else:
-          forAllChildrenAux(cast[pointer](d +% n.sons[i].offset),
-                            n.sons[i].typ, op)
-      else:
-        forAllSlotsAux(dest, n.sons[i], op)
+      forAllSlotsAux(dest, n.sons[i], op)
   of nkCase:
     var m = selectBranch(dest, n)
     if m != nil: forAllSlotsAux(dest, m, op)
@@ -533,9 +333,10 @@ proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
     else: discard
 
 proc forAllChildren(cell: PCell, op: WalkOp) =
-  sysAssert(cell != nil, "forAllChildren: 1")
-  sysAssert(cell.typ != nil, "forAllChildren: 2")
-  sysAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3"
+  gcAssert(cell != nil, "forAllChildren: 1")
+  gcAssert(isAllocatedPtr(gch.region, cell), "forAllChildren: 2")
+  gcAssert(cell.typ != nil, "forAllChildren: 3")
+  gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 4"
   let marker = cell.typ.marker
   if marker != nil:
     marker(cellToUsr(cell), op.int)
@@ -547,10 +348,9 @@ proc forAllChildren(cell: PCell, op: WalkOp) =
       var d = cast[ByteAddress](cellToUsr(cell))
       var s = cast[PGenericSeq](d)
       if s != nil:
-        let baseAddr = d +% GenericSeqSize
         for i in 0..s.len-1:
-          forAllChildrenAux(cast[pointer](baseAddr +% i *% cell.typ.base.size),
-                            cell.typ.base, op)
+          forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +%
+            GenericSeqSize), cell.typ.base, op)
     else: discard
 
 proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} =
@@ -571,7 +371,7 @@ proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} =
     template replaceZctEntry(i: expr) =
       c = d[i]
       if c.refcount >=% rcIncrement:
-        c.clearBit(rcZct)
+        c.refcount = c.refcount and not ZctFlag
         d[i] = res
         return
     if L > 8:
@@ -592,408 +392,302 @@ proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} =
     for i in countdown(L-1, max(0, L-8)):
       var c = d[i]
       if c.refcount >=% rcIncrement:
-        c.clearBit(rcZct)
+        c.refcount = c.refcount and not ZctFlag
         d[i] = res
         return
     add(gch.zct, res)
 
-proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap, rc1 = false): pointer =
+{.push stackTrace: off, profiler:off.}
+proc gcInvariant*() =
+  sysAssert(allocInv(gch.region), "injected")
+  when declared(markForDebug):
+    markForDebug(gch)
+{.pop.}
+
+proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   # generates a new object and sets its reference counter to 0
-  acquire(gch)
   sysAssert(allocInv(gch.region), "rawNewObj begin")
-  sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
-
+  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
   collectCT(gch)
-  sysAssert(allocInv(gch.region), "rawNewObj after collect")
-
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
-  sysAssert(allocInv(gch.region), "rawNewObj after rawAlloc")
-
-  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
-
+  gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  # now it is buffered in the ZCT
   res.typ = typ
-
-  when trackAllocationSource and not hasThreadSupport:
-    if framePtr != nil and framePtr.prev != nil and framePtr.prev.prev != nil:
-      res.filename = framePtr.prev.prev.filename
-      res.line = framePtr.prev.prev.line
-    else:
-      res.filename = "nofile"
-
-  if rc1:
-    res.refcount = rcIncrement # refcount is 1
-  else:
-    # its refcount is zero, so add it to the ZCT:
-    res.refcount = rcZct
-    addNewObjToZCT(res, gch)
-
-    if NewObjectsAreCycleRoots and canBeCycleRoot(res):
-      res.setBit(rcInCycleRoots)
-      res.setColor rcCycleCandidate
-      gch.cycleRoots.add res
-
+  when leakDetector and not hasThreadSupport:
+    if framePtr != nil and framePtr.prev != nil:
+      res.filename = framePtr.prev.filename
+      res.line = framePtr.prev.line
+  # refcount is zero, color is black, but mark it to be in the ZCT
+  res.refcount = ZctFlag
   sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
-
+  # its refcount is zero, so add it to the ZCT:
+  addNewObjToZCT(res, gch)
   when logGC: writeCell("new cell", res)
   gcTrace(res, csAllocated)
-  release(gch)
+  when useCellIds:
+    inc gch.idGenerator
+    res.id = gch.idGenerator
   result = cellToUsr(res)
   sysAssert(allocInv(gch.region), "rawNewObj end")
 
 {.pop.}
 
-proc freeCell(gch: var GcHeap, c: PCell) =
-  # prepareDealloc(c)
-  gcTrace(c, csFreed)
-
-  when reallyDealloc: rawDealloc(gch.region, c)
-  else:
-    sysAssert(c.typ != nil, "collectCycles")
-    zeroMem(c, sizeof(Cell))
-
-template eraseAt(cells: var CellSeq, at: int): stmt =
-  cells.d[at] = cells.d[cells.len - 1]
-  dec cells.len
-
-template trimAt(roots: var CellSeq, at: int): stmt =
-  # This will remove a cycle root candidate during trimming.
-  # a candidate is removed either because it received a refup and
-  # it's no longer a candidate or because it received further refdowns
-  # and now it's dead for sure.
-  let c = roots.d[at]
-  c.clearBit(rcInCycleRoots)
-  roots.eraseAt(at)
-  if c.isBitUp(rcReallyDead) and c.refcount <% rcIncrement:
-    # This case covers both dead objects and retired buffers
-    # That's why we must also check the refcount (it may be
-    # kept possitive by stack references).
-    freeCell(gch, c)
+proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  result = rawNewObj(typ, size, gch)
+  when defined(memProfiler): nimProfile(size)
 
 proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  setStackTop(gch)
-  result = rawNewObj(typ, size, gch, false)
+  result = rawNewObj(typ, size, gch)
   zeroMem(result, size)
   when defined(memProfiler): nimProfile(size)
 
-proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  setStackTop(gch)
-  result = rawNewObj(typ, size, gch, false)
-  when defined(memProfiler): nimProfile(size)
-
 proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  setStackTop(gch)
   # `newObj` already uses locks, so no need for them here.
   let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
   result = newObj(typ, size)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
+  when defined(memProfiler): nimProfile(size)
 
 proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  setStackTop(gch)
-  result = rawNewObj(typ, size, gch, true)
+  # generates a new object and sets its reference counter to 1
+  sysAssert(allocInv(gch.region), "newObjRC1 begin")
+  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  collectCT(gch)
+  sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")
+
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
+  sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
+  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  # now it is buffered in the ZCT
+  res.typ = typ
+  when leakDetector and not hasThreadSupport:
+    if framePtr != nil and framePtr.prev != nil:
+      res.filename = framePtr.prev.filename
+      res.line = framePtr.prev.line
+  res.refcount = rcIncrement # refcount is 1
+  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
+  when logGC: writeCell("new cell", res)
+  gcTrace(res, csAllocated)
+  when useCellIds:
+    inc gch.idGenerator
+    res.id = gch.idGenerator
+  result = cellToUsr(res)
+  zeroMem(result, size)
+  sysAssert(allocInv(gch.region), "newObjRC1 end")
   when defined(memProfiler): nimProfile(size)
 
 proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  setStackTop(gch)
   let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
   result = newObjRC1(typ, size)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
+  when defined(memProfiler): nimProfile(size)
 
 proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
-  acquire(gch)
   collectCT(gch)
   var ol = usrToCell(old)
   sysAssert(ol.typ != nil, "growObj: 1")
-  sysAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
+  gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
   sysAssert(allocInv(gch.region), "growObj begin")
 
   var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
-  var elemSize = if ol.typ.kind != tyString: ol.typ.base.size
-                 else: 1
+  var elemSize = 1
+  if ol.typ.kind != tyString: elemSize = ol.typ.base.size
 
   var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
-
-  # XXX: This should happen outside
-  # call user-defined move code
-  # call user-defined default constructor
   copyMem(res, ol, oldsize + sizeof(Cell))
   zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)),
           newsize-oldsize)
-
   sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
-  sysAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
-
-  when false:
-    if ol.isBitUp(rcZct):
-      var j = gch.zct.len-1
-      var d = gch.zct.d
-      while j >= 0:
-        if d[j] == ol:
-          d[j] = res
-          break
-        dec(j)
-
-    if ol.isBitUp(rcInCycleRoots):
-      for i in 0 .. <gch.cycleRoots.len:
-        if gch.cycleRoots.d[i] == ol:
-          eraseAt(gch.cycleRoots, i)
-
-    freeCell(gch, ol)
-
-  else:
-    # the new buffer inherits the GC state of the old one
-    if res.isBitUp(rcZct): gch.zct.add res
-    if res.isBitUp(rcInCycleRoots): gch.cycleRoots.add res
-
-    # Pay attention to what's going on here! We're not releasing the old memory.
-    # This is because at this point there may be an interior pointer pointing
-    # into this buffer somewhere on the stack (due to `var` parameters now and
-    # and `let` and `var:var` stack locations in the future).
-    # We'll release the memory in the next GC cycle. If we release it here,
-    # we cannot guarantee that no memory will be corrupted when only safe
-    # language features are used. Accessing the memory after the seq/string
-    # has been invalidated may still result in logic errors in the user code.
-    # We may improve on that by protecting the page in debug builds or
-    # by providing a warning when we detect a stack pointer into it.
-    let bufferFlags = ol.refcount and rcBufferedAnywhere
-    if bufferFlags == 0:
-      # we need this in order to collect it safely later
-      ol.refcount = rcRetiredBuffer or rcZct
-      gch.zct.add ol
-    else:
-      ol.refcount = rcRetiredBuffer or bufferFlags
-
-    when logGC:
-      writeCell("growObj old cell", ol)
-      writeCell("growObj new cell", res)
-
+  # This can be wrong for intermediate temps that are nevertheless on the
+  # heap because of lambda lifting:
+  #gcAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
+  when logGC:
+    writeCell("growObj old cell", ol)
+    writeCell("growObj new cell", res)
+  gcTrace(ol, csZctFreed)
   gcTrace(res, csAllocated)
-  release(gch)
+  when reallyDealloc:
+    sysAssert(allocInv(gch.region), "growObj before dealloc")
+    if ol.refcount shr rcShift <=% 1:
+      # free immediately to save space:
+      if (ol.refcount and ZctFlag) != 0:
+        var j = gch.zct.len-1
+        var d = gch.zct.d
+        while j >= 0:
+          if d[j] == ol:
+            d[j] = res
+            break
+          dec(j)
+      rawDealloc(gch.region, ol)
+    else:
+      # we split the old refcount in 2 parts. XXX This is still not entirely
+      # correct if the pointer that receives growObj's result is on the stack.
+      # A better fix would be to emit the location specific write barrier for
+      # 'growObj', but this is lots of more work and who knows what new problems
+      # this would create.
+      res.refcount = rcIncrement
+      decRef(ol)
+  else:
+    sysAssert(ol.typ != nil, "growObj: 5")
+    zeroMem(ol, sizeof(Cell))
+  when useCellIds:
+    inc gch.idGenerator
+    res.id = gch.idGenerator
   result = cellToUsr(res)
   sysAssert(allocInv(gch.region), "growObj end")
   when defined(memProfiler): nimProfile(newsize-oldsize)
 
 proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
-  setStackTop(gch)
   result = growObj(old, newsize, gch)
 
 {.push profiler:off.}
 
-# ---------------- cycle collector -------------------------------------------
 
-proc doOperation(p: pointer, op: WalkOp) =
-  if p == nil: return
-  var c: PCell = usrToCell(p)
-  sysAssert(c != nil, "doOperation: 1")
-  gch.tempStack.add c
+template takeStartTime(workPackageSize) {.dirty.} =
+  const workPackage = workPackageSize
+  when withRealTime:
+    var steps = workPackage
+    var t0: Ticks
+    if gch.maxPause > 0: t0 = getticks()
 
-proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
-  doOperation(d, WalkOp(op))
+template takeTime {.dirty.} =
+  when withRealTime: dec steps
+
+template checkTime {.dirty.} =
+  when withRealTime:
+    if steps == 0:
+      steps = workPackage
+      if gch.maxPause > 0:
+        let duration = getticks() - t0
+        # the GC's measuring is not accurate and needs some cleanup actions
+        # (stack unmarking), so subtract some short amount of time in
+        # order to miss deadlines less often:
+        if duration >= gch.maxPause - 50_000:
+          return false
 
-type
-  RecursionType = enum
-    FromChildren,
-    FromRoot
-{.deprecated: [TRecursionType: RecursionType].}
+# ---------------- cycle collector -------------------------------------------
 
-proc collectZCT(gch: var GcHeap): bool
+proc freeCyclicCell(gch: var GcHeap, c: PCell) =
+  prepareDealloc(c)
+  gcTrace(c, csCycFreed)
+  when logGC: writeCell("cycle collector dealloc cell", c)
+  when reallyDealloc:
+    sysAssert(allocInv(gch.region), "free cyclic cell")
+    rawDealloc(gch.region, c)
+  else:
+    gcAssert(c.typ != nil, "freeCyclicCell")
+    zeroMem(c, sizeof(Cell))
 
-template pseudoRecursion(typ: RecursionType, body: stmt): stmt =
-  discard
+proc sweep(gch: var GcHeap) =
+  # XXX make this incremental!
+  for x in allObjects(gch.region):
+    if isCell(x):
+      # cast to PCell is correct here:
+      var c = cast[PCell](x)
+      if c.color == rcWhite: freeCyclicCell(gch, c)
+      else: c.setColor(rcWhite)
+
+proc markS(gch: var GcHeap, c: PCell) =
+  if x.color != rcGrey:
+    x.setColor(rcGrey)
+    add(gch.greyStack, x)
+  #forAllChildren(c, waMarkGrey)
+  #x.setColor(rcBlack)
+
+proc markIncrementally(gch: var GcHeap): bool =
+  var L = addr(gch.greyStack.len)
+  takeStartTime(100)
+  while L[] > 0:
+    var c = gch.greyStack.d[0]
+    sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr")
+    gch.greyStack.d[0] = gch.greyStack.d[L[] - 1]
+    dec(L[])
+    takeTime()
+    if c.color == clGrey:
+      forAllChildren(c, waMarkGrey)
+      c.setColor(clBlack)
+    checkTime()
+  result = true
 
-proc trimCycleRoots(gch: var GcHeap, startIdx = gch.cycleRootsTrimIdx) =
-  var i = startIdx
-  while i < gch.cycleRoots.len:
-    if gch.cycleRoots.d[i].color != rcCycleCandidate:
-      gch.cycleRoots.trimAt i
-    else:
-      inc i
+proc markGlobals(gch: var GcHeap) =
+  for i in 0 .. < globalMarkersLen: globalMarkers[i]()
 
-  gch.cycleRootsTrimIdx = gch.cycleRoots.len
+proc markLocals(gch: var GcHeap) =
+  var d = gch.decStack.d
+  for i in 0..<gch.decStack.len:
+    sysAssert isAllocatedPtr(gch.region, d[i]), "markLocals"
+    markS(gch, d[i])
 
-# we now use a much simpler and non-recursive algorithm for cycle removal
-proc collectCycles(gch: var GcHeap) =
-  if gch.cycleRoots.len == 0: return
-  gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, gch.cycleRoots.len)
+when logGC:
+  var
+    cycleCheckA: array[100, PCell]
+    cycleCheckALen = 0
+
+  proc alreadySeen(c: PCell): bool =
+    for i in 0 .. <cycleCheckALen:
+      if cycleCheckA[i] == c: return true
+    if cycleCheckALen == len(cycleCheckA):
+      gcAssert(false, "cycle detection overflow")
+      quit 1
+    cycleCheckA[cycleCheckALen] = c
+    inc cycleCheckALen
+
+  proc debugGraph(s: PCell) =
+    if alreadySeen(s):
+      writeCell("child cell (already seen) ", s)
+    else:
+      writeCell("cell {", s)
+      forAllChildren(s, waDebug)
+      c_fprintf(c_stdout, "}\n")
 
-  when CollectCyclesStats:
-    let l0 = gch.cycleRoots.len
-    let tStart = getTicks()
+proc doOperation(p: pointer, op: WalkOp) =
+  if p == nil: return
+  var c: PCell = usrToCell(p)
+  gcAssert(c != nil, "doOperation: 1")
+  # the 'case' should be faster than function pointers because of easy
+  # prediction:
+  case op
+  of waZctDecRef:
+    #if not isAllocatedPtr(gch.region, c):
+    #  c_fprintf(c_stdout, "[GC] decref bug: %p", c)
+    gcAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
+    gcAssert(c.refcount >=% rcIncrement, "doOperation 2")
+    #c.refcount = c.refcount -% rcIncrement
+    when logGC: writeCell("decref (from doOperation)", c)
+    decRef(c)
+    #if c.refcount <% rcIncrement: addZCT(gch.zct, c)
+  of waMarkGlobal:
+    when hasThreadSupport:
+      # could point to a cell which we don't own and don't want to touch/trace
+      if isAllocatedPtr(gch.region, c):
+        markS(gch, c)
+    else:
+      markS(gch, c)
+  of waMarkGrey:
+    if x.color != rcGrey:
+      c.setColor(rcGrey)
+      add(gch.greyStack, c)
+  #of waDebug: debugGraph(c)
 
-  var
-    decrefs = 0
-    increfs = 0
-    collected = 0
-    maybedeads = 0
-
-  template ignoreObject(c: PCell): expr =
-    # This controls which objects will be ignored in the mark and scan stages
-    (when MarkingSkipsAcyclicObjects: not canbeCycleRoot(c) else: false)
-    # not canbeCycleRoot(c)
-    # false
-    # c.isBitUp(rcHasStackRef)
-
-  template earlyMarkAliveRec(cell) =
-    let startLen = gch.tempStack.len
-    cell.setColor rcAlive
-    cell.forAllChildren waPush
-
-    while startLen != gch.tempStack.len:
-      dec gch.tempStack.len
-      var c = gch.tempStack.d[gch.tempStack.len]
-      if c.color != rcAlive:
-        c.setColor rcAlive
-        c.forAllChildren waPush
-
-  template earlyMarkAlive(stackRoots) =
-    # This marks all objects reachable from the stack as alive before any
-    # of the other stages is executed. Such objects cannot be garbage and
-    # they don't need to participate in the recursive decref/incref.
-    for i in 0 .. <stackRoots.len:
-      var c = stackRoots.d[i]
-      # c.setBit rcHasStackRef
-      earlyMarkAliveRec(c)
-
-  earlyMarkAlive(gch.decStack)
-
-  when CollectCyclesStats:
-    let tAfterEarlyMarkAlive = getTicks()
-
-  template recursiveDecRef(cell) =
-    let startLen = gch.tempStack.len
-    cell.setColor rcDecRefApplied
-    cell.forAllChildren waPush
-
-    while startLen != gch.tempStack.len:
-      dec gch.tempStack.len
-      var c = gch.tempStack.d[gch.tempStack.len]
-      if ignoreObject(c): continue
-
-      sysAssert(c.refcount >=% rcIncrement, "recursive dec ref")
-      dec c.refcount, rcIncrement
-      inc decrefs
-      if c.color != rcDecRefApplied:
-        c.setColor rcDecRefApplied
-        c.forAllChildren waPush
-
-  template markRoots(roots) =
-    var i = 0
-    while i < roots.len:
-      if roots.d[i].color == rcCycleCandidate:
-        recursiveDecRef(roots.d[i])
-        inc i
-      else:
-        roots.trimAt i
-
-  markRoots(gch.cycleRoots)
-
-  when CollectCyclesStats:
-    let tAfterMark = getTicks()
-    c_printf "COLLECT CYCLES %d: %d/%d\n", gcCollectionIdx, gch.cycleRoots.len, l0
-
-  template recursiveMarkAlive(cell) =
-    let startLen = gch.tempStack.len
-    cell.setColor rcAlive
-    cell.forAllChildren waPush
-
-    while startLen != gch.tempStack.len:
-      dec gch.tempStack.len
-      var c = gch.tempStack.d[gch.tempStack.len]
-      if ignoreObject(c): continue
-      inc c.refcount, rcIncrement
-      inc increfs
-
-      if c.color != rcAlive:
-        c.setColor rcAlive
-        c.forAllChildren waPush
-
-  template scanRoots(roots) =
-    for i in 0 .. <roots.len:
-      let startLen = gch.tempStack.len
-      gch.tempStack.add roots.d[i]
-
-      while startLen != gch.tempStack.len:
-        dec gch.tempStack.len
-        var c = gch.tempStack.d[gch.tempStack.len]
-        if ignoreObject(c): continue
-        if c.color == rcDecRefApplied:
-          if c.refcount >=% rcIncrement:
-            recursiveMarkAlive(c)
-          else:
-            # note that this is not necessarily the ultimate
-            # destiny of the object. we may still mark it alive
-            # later if we encounter another node from where it's
-            # reachable.
-            c.setColor rcMaybeDead
-            inc maybedeads
-            c.forAllChildren waPush
-
-  scanRoots(gch.cycleRoots)
-
-  when CollectCyclesStats:
-    let tAfterScan = getTicks()
-
-  template collectDead(roots) =
-    for i in 0 .. <roots.len:
-      var c = roots.d[i]
-      c.clearBit(rcInCycleRoots)
-
-      let startLen = gch.tempStack.len
-      gch.tempStack.add c
-
-      while startLen != gch.tempStack.len:
-        dec gch.tempStack.len
-        var c = gch.tempStack.d[gch.tempStack.len]
-        when MarkingSkipsAcyclicObjects:
-          if not canbeCycleRoot(c):
-            # This is an acyclic object reachable from a dead cyclic object
-            # We must do a normal decref here that may add the acyclic object
-            # to the ZCT
-            doDecRef(c, LocalHeap, Cyclic)
-            continue
-        if c.color == rcMaybeDead and not c.isBitUp(rcInCycleRoots):
-          c.setColor(rcReallyDead)
-          inc collected
-          c.forAllChildren waPush
-          # we need to postpone the actual deallocation in order to allow
-          # the finalizers to run while the data structures are still intact
-          gch.freeStack.add c
-          prepareDealloc(c)
-
-    for i in 0 .. <gch.freeStack.len:
-      freeCell(gch, gch.freeStack.d[i])
-
-  collectDead(gch.cycleRoots)
-
-  when CollectCyclesStats:
-    let tFinal = getTicks()
-    cprintf "times:\n  early mark alive: %d ms\n  mark: %d ms\n  scan: %d ms\n  collect: %d ms\n  decrefs: %d\n  increfs: %d\n  marked dead: %d\n  collected: %d\n",
-      (tAfterEarlyMarkAlive - tStart)  div 1_000_000,
-      (tAfterMark - tAfterEarlyMarkAlive) div 1_000_000,
-      (tAfterScan - tAfterMark) div 1_000_000,
-      (tFinal - tAfterScan) div 1_000_000,
-      decrefs,
-      increfs,
-      maybedeads,
-      collected
-
-  deinit(gch.cycleRoots)
-  init(gch.cycleRoots)
-
-  deinit(gch.freeStack)
-  init(gch.freeStack)
-
-  when MarkingSkipsAcyclicObjects:
-    # Collect the acyclic objects that became unreachable due to collected
-    # cyclic objects.
-    discard collectZCT(gch)
-    # collectZCT may add new cycle candidates and we may decide to loop here
-    # if gch.cycleRoots.len > 0: repeat
-
-var gcDebugging* = false
-
-var seqdbg* : proc (s: PGenericSeq) {.cdecl.}
+proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
+  doOperation(d, WalkOp(op))
+
+proc collectZCT(gch: var GcHeap): bool {.benign.}
+
+proc collectCycles(gch: var GcHeap): bool =
+  # ensure the ZCT 'color' is not used:
+  while gch.zct.len > 0: discard collectZCT(gch)
+  markGlobals(gch)
+  markLocals(gch)
+  if markIncrementally(gch):
+    gch.phase = Phase.Sweeping
+    sweep(gch)
+    gch.phase = Phase.None
+    result = true
+  else:
+    gch.phase = Phase.Marking
 
 proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
@@ -1005,235 +699,33 @@ proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
     var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
     if objStart != nil:
       # mark the cell:
-      if objStart.color != rcReallyDead:
-        if gcDebugging:
-          # writeCell("marking ", objStart)
-          discard
-        else:
-          inc objStart.refcount, rcIncrement
-          gch.decStack.add objStart
-      else:
-        # With incremental clean-up, objects spend some time
-        # in various lists before being deallocated.
-        # We just found a reference on the stack to an object,
-        # which we have previously labeled as unreachable.
-        # This is either a bug in the GC or a pure accidental
-        # coincidence due to the conservative stack marking.
-        when debugGC:
-          # writeCell("marking dead object", objStart)
-          discard
-    when false:
-      if isAllocatedPtr(gch.region, cell):
-        sysAssert false, "allocated pointer but not interior?"
-        # mark the cell:
-        inc cell.refcount, rcIncrement
-        add(gch.decStack, cell)
+      objStart.refcount = objStart.refcount +% rcIncrement
+      add(gch.decStack, objStart)
   sysAssert(allocInv(gch.region), "gcMark end")
 
-proc markThreadStacks(gch: var GcHeap) =
-  when hasThreadSupport and hasSharedHeap:
-    {.error: "not fully implemented".}
-    var it = threadList
-    while it != nil:
-      # mark registers:
-      for i in 0 .. high(it.registers): gcMark(gch, it.registers[i])
-      var sp = cast[ByteAddress](it.stackBottom)
-      var max = cast[ByteAddress](it.stackTop)
-      # XXX stack direction?
-      # XXX unroll this loop:
-      while sp <=% max:
-        gcMark(gch, cast[PPointer](sp)[])
-        sp = sp +% sizeof(pointer)
-      it = it.next
-
-# ----------------- stack management --------------------------------------
-#  inspired from Smart Eiffel
-
-when defined(sparc):
-  const stackIncreases = false
-elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
-     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
-  const stackIncreases = true
-else:
-  const stackIncreases = false
-
-when not defined(useNimRtl):
-  {.push stack_trace: off.}
-  proc setStackBottom(theStackBottom: pointer) =
-    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
-    # the first init must be the one that defines the stack bottom:
-    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
-    else:
-      var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[ByteAddress](gch.stackBottom)
-      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
-      when stackIncreases:
-        gch.stackBottom = cast[pointer](min(a, b))
-      else:
-        gch.stackBottom = cast[pointer](max(a, b))
-  {.pop.}
-
-proc stackSize(): int {.noinline.} =
-  var stackTop {.volatile.}: pointer
-  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
+include gc_common
 
-var
-  jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-    # a little hack to get the size of a JmpBuf in the generated C code
-    # in a platform independent way
-
-when defined(sparc): # For SPARC architecture.
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-    when defined(sparcv9):
-      asm  """"flushw \n" """
-    else:
-      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
-
-    var
-      max = gch.stackBottom
-      sp: PPointer
-      stackTop: array[0..1, pointer]
-    sp = addr(stackTop[0])
-    # Addresses decrease as the stack grows.
-    while sp <= max:
-      gcMark(gch, sp[])
-      sp = cast[PPointer](cast[ByteAddress](sp) +% sizeof(pointer))
-
-elif defined(ELATE):
-  {.error: "stack marking code is to be written for this architecture".}
-
-elif stackIncreases:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses increase as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var a = cast[ByteAddress](gch.stackBottom)
-    var b = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[ByteAddress](gch.stackBottom)
-      var sp = cast[ByteAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
-      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
-      # otherwise sp would be below the registers structure).
-      while sp >=% max:
-        gcMark(gch, cast[PPointer](sp)[])
-        sp = sp -% sizeof(pointer)
-
-else:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses decrease as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-    # We use a jmp_buf buffer that is in the C stack.
-    # Used to traverse the stack and registers assuming
-    # that 'setjmp' will save registers in the C stack.
-    type PStackSlice = ptr array [0..7, pointer]
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      when MinimumStackMarking:
-        # mark the registers
-        var jmpbufPtr = cast[ByteAddress](addr(registers))
-        var jmpbufEnd = jmpbufPtr +% jmpbufSize
-
-        while jmpbufPtr <=% jmpbufEnd:
-          gcMark(gch, cast[PPointer](jmpbufPtr)[])
-          jmpbufPtr = jmpbufPtr +% sizeof(pointer)
-
-        var sp = cast[ByteAddress](gch.stackTop)
-      else:
-        var sp = cast[ByteAddress](addr(registers))
-      # mark the user stack
-      var max = cast[ByteAddress](gch.stackBottom)
-      # loop unrolled:
-      while sp <% max - 8*sizeof(pointer):
-        gcMark(gch, cast[PStackSlice](sp)[0])
-        gcMark(gch, cast[PStackSlice](sp)[1])
-        gcMark(gch, cast[PStackSlice](sp)[2])
-        gcMark(gch, cast[PStackSlice](sp)[3])
-        gcMark(gch, cast[PStackSlice](sp)[4])
-        gcMark(gch, cast[PStackSlice](sp)[5])
-        gcMark(gch, cast[PStackSlice](sp)[6])
-        gcMark(gch, cast[PStackSlice](sp)[7])
-        sp = sp +% sizeof(pointer)*8
-      # last few entries:
-      while sp <=% max:
-        gcMark(gch, cast[PPointer](sp)[])
-        sp = sp +% sizeof(pointer)
-
-# ----------------------------------------------------------------------------
-# end of non-portable code
-# ----------------------------------------------------------------------------
-
-proc releaseCell(gch: var GcHeap, cell: PCell) =
-  if cell.color != rcReallyDead:
-    prepareDealloc(cell)
-    cell.setColor rcReallyDead
-
-    let l1 = gch.tempStack.len
-    cell.forAllChildren waPush
-    let l2 = gch.tempStack.len
-    for i in l1 .. <l2:
-      var cc = gch.tempStack.d[i]
-      if cc.refcount--(LocalHeap):
-        releaseCell(gch, cc)
-      else:
-        if canbeCycleRoot(cc):
-          addCycleRoot(gch.cycleRoots, cc)
-
-    gch.tempStack.len = l1
-
-  if cell.isBitDown(rcBufferedAnywhere):
-    freeCell(gch, cell)
-  # else:
-  # This object is either buffered in the cycleRoots list and we'll leave
-  # it there to be collected in the next collectCycles or it's pending in
-  # the ZCT:
-  # (e.g. we are now cleaning the 15th object, but this one is 18th in the
-  #  list. Note that this can happen only if we reached this point by the
-  #  recursion).
-  # We can ignore it now as the ZCT cleaner will reach it soon.
+proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
+  forEachStackSlot(gch, gcMark)
 
 proc collectZCT(gch: var GcHeap): bool =
-  const workPackage = 100
+  # Note: Freeing may add child objects to the ZCT! So essentially we do
+  # deep freeing, which is bad for incremental operation. In order to
+  # avoid a deep stack, we move objects to keep the ZCT small.
+  # This is performance critical!
   var L = addr(gch.zct.len)
-
-  when withRealtime:
-    var steps = workPackage
-    var t0: Ticks
-    if gch.maxPause > 0: t0 = getticks()
+  takeStartTime(100)
 
   while L[] > 0:
     var c = gch.zct.d[0]
-    sysAssert c.isBitUp(rcZct), "collectZCT: rcZct missing!"
-    sysAssert(isAllocatedPtr(gch.region, c), "collectZCT: isAllocatedPtr")
-
+    sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr")
     # remove from ZCT:
-    c.clearBit(rcZct)
+    gcAssert((c.refcount and ZctFlag) == ZctFlag, "collectZCT")
+
+    c.refcount = c.refcount and not ZctFlag
     gch.zct.d[0] = gch.zct.d[L[] - 1]
     dec(L[])
-    when withRealtime: dec steps
+    takeTime()
     if c.refcount <% rcIncrement:
       # It may have a RC > 0, if it is in the hardware stack or
       # it has not been removed yet from the ZCT. This is because
@@ -1241,92 +733,78 @@ proc collectZCT(gch: var GcHeap): bool =
       # as this might be too slow.
       # In any case, it should be removed from the ZCT. But not
       # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
-      if c.color == rcRetiredBuffer:
-        if c.isBitDown(rcInCycleRoots):
-          freeCell(gch, c)
+      when logGC: writeCell("zct dealloc cell", c)
+      gcTrace(c, csZctFreed)
+      # We are about to free the object, call the finalizer BEFORE its
+      # children are deleted as well, because otherwise the finalizer may
+      # access invalid memory. This is done by prepareDealloc():
+      prepareDealloc(c)
+      forAllChildren(c, waZctDecRef)
+      when reallyDealloc:
+        sysAssert(allocInv(gch.region), "collectZCT: rawDealloc")
+        rawDealloc(gch.region, c)
       else:
-        # if c.color == rcReallyDead: writeCell("ReallyDead in ZCT?", c)
-        releaseCell(gch, c)
-    when withRealtime:
-      if steps == 0:
-        steps = workPackage
-        if gch.maxPause > 0:
-          let duration = getticks() - t0
-          # the GC's measuring is not accurate and needs some cleanup actions
-          # (stack unmarking), so subtract some short amount of time in to
-          # order to miss deadlines less often:
-          if duration >= gch.maxPause - 50_000:
-            return false
+        sysAssert(c.typ != nil, "collectZCT 2")
+        zeroMem(c, sizeof(Cell))
+    checkTime()
   result = true
-  gch.trimCycleRoots
-  #deInit(gch.zct)
-  #init(gch.zct)
 
 proc unmarkStackAndRegisters(gch: var GcHeap) =
   var d = gch.decStack.d
-  for i in 0 .. <gch.decStack.len:
+  for i in 0..gch.decStack.len-1:
     sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters"
-    # XXX: just call doDecRef?
-    var c = d[i]
-    sysAssert c.typ != nil, "unmarkStackAndRegisters 2"
-
-    if c.color == rcRetiredBuffer:
-      continue
-
-    # XXX no need for an atomic dec here:
-    if c.refcount--(LocalHeap):
-      # the object survived only because of a stack reference
-      # it still doesn't have heap references
-      addZCT(gch.zct, c)
-
-    if canbeCycleRoot(c):
-      # any cyclic object reachable from the stack can be turned into
-      # a leak if it's orphaned through the stack reference
-      # that's because the write-barrier won't be executed for stack
-      # locations
-      addCycleRoot(gch.cycleRoots, c)
-
+    decRef(d[i])
   gch.decStack.len = 0
 
 proc collectCTBody(gch: var GcHeap) =
-  when withRealtime:
+  when withRealTime:
     let t0 = getticks()
-  when debugGC: inc gcCollectionIdx
   sysAssert(allocInv(gch.region), "collectCT: begin")
 
-  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+  when not defined(nimCoroutines):
+    gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
   sysAssert(gch.decStack.len == 0, "collectCT")
   prepareForInteriorPointerChecking(gch.region)
   markStackAndRegisters(gch)
-  markThreadStacks(gch)
   gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
   inc(gch.stat.stackScans)
   if collectZCT(gch):
     when cycleGC:
       if getOccupiedMem(gch.region) >= gch.cycleThreshold or alwaysCycleGC:
-        collectCycles(gch)
-        sysAssert gch.zct.len == 0, "zct is not null after collect cycles"
-        inc(gch.stat.cycleCollections)
-        gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
-                                 CycleIncrease)
-        gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
+        if collectCycles(gch):
+          inc(gch.stat.cycleCollections)
+          gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
+                                   CycleIncrease)
+          gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
   unmarkStackAndRegisters(gch)
   sysAssert(allocInv(gch.region), "collectCT: end")
 
-  when withRealtime:
+  when withRealTime:
     let duration = getticks() - t0
     gch.stat.maxPause = max(gch.stat.maxPause, duration)
     when defined(reportMissedDeadlines):
       if gch.maxPause > 0 and duration > gch.maxPause:
         c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration)
 
+when defined(nimCoroutines):
+  proc currentStackSizes(): int =
+    for stack in items(gch.stack):
+      result = result + stackSize(stack.starts, stack.pos)
+
 proc collectCT(gch: var GcHeap) =
-  if (gch.zct.len >= ZctThreshold or (cycleGC and
+  # stackMarkCosts prevents some pathological behaviour: Stack marking
+  # becomes more expensive with large stacks and large stacks mean that
+  # cells with RC=0 are more likely to be kept alive by the stack.
+  when defined(nimCoroutines):
+    let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold)
+  else:
+    let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
+  if (gch.zct.len >= stackMarkCosts or (cycleGC and
       getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
       gch.recGcLock == 0:
     collectCTBody(gch)
 
-when withRealtime:
+when withRealTime:
   proc toNano(x: int): Nanos {.inline.} =
     result = x * 1000
 
@@ -1334,13 +812,11 @@ when withRealtime:
     gch.maxPause = MaxPauseInUs.toNano
 
   proc GC_step(gch: var GcHeap, us: int, strongAdvice: bool) =
-    acquire(gch)
     gch.maxPause = us.toNano
     if (gch.zct.len >= ZctThreshold or (cycleGC and
         getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or
         strongAdvice:
       collectCTBody(gch)
-    release(gch)
 
   proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice)
 
@@ -1358,11 +834,7 @@ when not defined(useNimRtl):
         dec(gch.recGcLock)
 
   proc GC_setStrategy(strategy: GC_Strategy) =
-    case strategy
-    of gcThroughput: discard
-    of gcResponsiveness: discard
-    of gcOptimizeSpace: discard
-    of gcOptimizeTime: discard
+    discard
 
   proc GC_enableMarkAndSweep() =
     gch.cycleThreshold = InitialCycleThreshold
@@ -1372,13 +844,10 @@ when not defined(useNimRtl):
     # set to the max value to suppress the cycle detector
 
   proc GC_fullCollect() =
-    setStackTop(gch)
-    acquire(gch)
     var oldThreshold = gch.cycleThreshold
     gch.cycleThreshold = 0 # forces cycle collection
     collectCT(gch)
     gch.cycleThreshold = oldThreshold
-    release(gch)
 
   proc GC_getStatistics(): string =
     GC_disable()
@@ -1390,9 +859,13 @@ when not defined(useNimRtl):
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
              "[GC] zct capacity: " & $gch.zct.cap & "\n" &
              "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
-             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" &
              "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
-    when traceGC: writeLeakage(true)
+    when defined(nimCoroutines):
+      result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
+      for stack in items(gch.stack):
+        result = result & "[GC]   stack " & stack.starts.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
+    else:
+      result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
     GC_enable()
 
 {.pop.}