summary refs log tree commit diff stats
path: root/lib/system/gc.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/system/gc.nim')
-rw-r--r--[-rwxr-xr-x]lib/system/gc.nim1286
1 files changed, 775 insertions, 511 deletions
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index da8f75768..9289c7f55 100755..100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -1,647 +1,911 @@
 #
 #
-#            Nimrod's Runtime Library
-#        (c) Copyright 2009 Andreas Rumpf
+#            Nim's Runtime Library
+#        (c) Copyright 2016 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
 #
 
-
 #            Garbage Collector
 #
-# The basic algorithm is *Deferrent Reference Counting* with cycle detection.
-# Special care has been taken to avoid recursion as far as possible to avoid
-# stack overflows when traversing deep datastructures. This is comparable to
-# an incremental and generational GC. It should be well-suited for soft real
-# time applications (like games).
-#
-# Future Improvements:
-# * Support for multi-threading. However, locks for the reference counting
-#   might turn out to be too slow.
+# Refcounting + Mark&Sweep. Complex algorithms avoided.
+# Been there, done that, didn't work.
+
+#[
+
+A *cell* is anything that is traced by the GC
+(sequences, refs, strings, closures).
+
+The basic algorithm is *Deferrent Reference Counting* with cycle detection.
+References on the stack are not counted for better performance and easier C
+code generation.
+
+Each cell has a header consisting of a RC and a pointer to its type
+descriptor. However the program does not know about these, so they are placed at
+negative offsets. In the GC code the type `PCell` denotes a pointer
+decremented by the right offset, so that the header can be accessed easily. It
+is extremely important that `pointer` is not confused with a `PCell`.
+
+In Nim the compiler cannot always know if a reference
+is stored on the stack or not. This is caused by var parameters.
+Consider this example:
+
+  ```Nim
+  proc setRef(r: var ref TNode) =
+    new(r)
+
+  proc usage =
+    var
+      r: ref TNode
+    setRef(r) # here we should not update the reference counts, because
+              # r is on the stack
+    setRef(r.left) # here we should update the refcounts!
+  ```
+
+We have to decide at runtime whether the reference is on the stack or not.
+The generated code looks roughly like this:
+
+  ```C
+  void setref(TNode** ref) {
+    unsureAsgnRef(ref, newObj(TNode_TI, sizeof(TNode)))
+  }
+  void usage(void) {
+    setRef(&r)
+    setRef(&r->left)
+  }
+  ```
+
+Note that for systems with a continuous stack (which most systems have)
+the check whether the ref is on the stack is very cheap (only two
+comparisons).
+]#
+
+{.push profiler:off.}
 
 const
   CycleIncrease = 2 # is a multiplicative increase
-  InitialCycleThreshold = 4*1024*1024 # X MB because cycle checking is slow
-  ZctThreshold = 256  # we collect garbage if the ZCT's size
-                      # reaches this threshold
-                      # this seems to be a good value
+  InitialCycleThreshold = when defined(nimCycleBreaker): high(int)
+                          else: 4*1024*1024 # X MB because cycle checking is slow
+  InitialZctThreshold = 500  # we collect garbage if the ZCT's size
+                             # reaches this threshold
+                             # this seems to be a good value
+  withRealTime = defined(useRealtimeGC)
+
+when withRealTime and not declared(getTicks):
+  include "system/timers"
+when defined(memProfiler):
+  proc nimProfile(requestedSize: int) {.benign.}
+
+when hasThreadSupport:
+  import std/sharedlist
 
 const
   rcIncrement = 0b1000 # so that lowest 3 bits are not touched
-  # NOTE: Most colors are currently unused
   rcBlack = 0b000  # cell is colored black; in use or free
   rcGray = 0b001   # possible member of a cycle
   rcWhite = 0b010  # member of a garbage cycle
   rcPurple = 0b011 # possible root of a cycle
-  rcZct = 0b100    # in ZCT
-  rcRed = 0b101    # Candidate cycle undergoing sigma-computation
-  rcOrange = 0b110 # Candidate cycle awaiting epoch boundary
+  ZctFlag = 0b100  # in ZCT
   rcShift = 3      # shift by rcShift to get the reference counter
-  colorMask = 0b111
+  colorMask = 0b011
 type
-  TWalkOp = enum
-    waZctDecRef, waPush, waCycleDecRef
+  WalkOp = enum
+    waMarkGlobal,    # part of the backup/debug mark&sweep
+    waMarkPrecise,   # part of the backup/debug mark&sweep
+    waZctDecRef, waPush
+    #, waDebug
 
-  TFinalizer {.compilerproc.} = proc (self: pointer)
+  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign, raises: [].}
     # A ref type can have a finalizer that is called before the object's
     # storage is freed.
 
-  TGcStat {.final, pure.} = object
+  GcStat {.final, pure.} = object
     stackScans: int          # number of performed stack scans (for statistics)
     cycleCollections: int    # number of performed full collections
     maxThreshold: int        # max threshold that has been set
     maxStackSize: int        # max stack size
     maxStackCells: int       # max stack cells in ``decStack``
-    cycleTableSize: int      # max entries in cycle table  
-  
-  TGcHeap {.final, pure.} = object # this contains the zero count and
-                                   # non-zero count table
-    zct: TCellSeq            # the zero count table
-    decStack: TCellSeq       # cells in the stack that are to decref again
-    cycleRoots: TCellSet
-    tempStack: TCellSeq      # temporary stack for recursion elimination
-    stat: TGcStat
+    cycleTableSize: int      # max entries in cycle table
+    maxPause: int64          # max measured GC pause in nanoseconds
+
+  GcStack {.final, pure.} = object
+    when nimCoroutines:
+      prev: ptr GcStack
+      next: ptr GcStack
+      maxStackSize: int      # Used to track statistics because we can not use
+                             # GcStat.maxStackSize when multiple stacks exist.
+    bottom: pointer
+
+    when withRealTime or nimCoroutines:
+      pos: pointer           # Used with `withRealTime` only for code clarity, see GC_Step().
+    when withRealTime:
+      bottomSaved: pointer
+
+  GcHeap {.final, pure.} = object # this contains the zero count and
+                                  # non-zero count table
+    stack: GcStack
+    when nimCoroutines:
+      activeStack: ptr GcStack    # current executing coroutine stack.
+    cycleThreshold: int
+    zctThreshold: int
+    when useCellIds:
+      idGenerator: int
+    zct: CellSeq             # the zero count table
+    decStack: CellSeq        # cells in the stack that are to decref again
+    tempStack: CellSeq       # temporary stack for recursion elimination
+    recGcLock: int           # prevent recursion via finalizers; no thread lock
+    when withRealTime:
+      maxPause: Nanos        # max allowed pause in nanoseconds; active if > 0
+    region: MemRegion        # garbage collected region
+    stat: GcStat
+    marked: CellSet
+    additionalRoots: CellSeq # dummy roots for GC_ref/unref
+    when hasThreadSupport:
+      toDispose: SharedList[pointer]
+    gcThreadId: int
 
 var
-  stackBottom: pointer
-  gch: TGcHeap
-  cycleThreshold: int = InitialCycleThreshold
-  recGcLock: int = 0
-    # we use a lock to prevend the garbage collector to be triggered in a
-    # finalizer; the collector should not call itself this way! Thus every
-    # object allocated by a finalizer will not trigger a garbage collection.
-    # This is wasteful but safe. This is a lock against recursive garbage
-    # collection, not a lock for threads!
-
-proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerproc.}
-  # unsureAsgnRef updates the reference counters only if dest is not on the
-  # stack. It is used by the code generator if it cannot decide wether a
-  # reference is in the stack or not (this can happen for var parameters).
-#proc growObj(old: pointer, newsize: int): pointer {.compilerproc.}
-proc newObj(typ: PNimType, size: int): pointer {.compilerproc.}
-proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.}
-
-proc addZCT(s: var TCellSeq, c: PCell) {.noinline.} =
-  if (c.refcount and rcZct) == 0:
-    c.refcount = c.refcount and not colorMask or rcZct
+  gch {.rtlThreadVar.}: GcHeap
+
+when not defined(useNimRtl):
+  instantiateForRegion(gch.region)
+
+template gcAssert(cond: bool, msg: string) =
+  when defined(useGcAssert):
+    if not cond:
+      cstderr.rawWrite "[GCASSERT] "
+      cstderr.rawWrite msg
+      when defined(logGC):
+        cstderr.rawWrite "[GCASSERT] statistics:\L"
+        cstderr.rawWrite GC_getStatistics()
+      GC_disable()
+      writeStackTrace()
+      #var x: ptr int
+      #echo x[]
+      rawQuit 1
+
+proc addZCT(s: var CellSeq, c: PCell) {.noinline.} =
+  if (c.refcount and ZctFlag) == 0:
+    c.refcount = c.refcount or ZctFlag
     add(s, c)
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[TAddress](cell)+%TAddress(sizeof(TCell)))
+  result = cast[pointer](cast[int](cell)+%ByteAddress(sizeof(Cell)))
 
 proc usrToCell(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[TAddress](usr)-%TAddress(sizeof(TCell)))
-
-proc canbeCycleRoot(c: PCell): bool {.inline.} =
-  result = ntfAcyclic notin c.typ.flags
+  result = cast[PCell](cast[int](usr)-%ByteAddress(sizeof(Cell)))
 
 proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
   # used for code generation concerning debugging
   result = usrToCell(c).typ
 
 proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
-  result = int(usrToCell(p).refcount) shr rcShift
-
-proc GC_disable() = inc(recGcLock)
-proc GC_enable() =
-  if recGcLock > 0: dec(recGcLock)
-
-proc GC_setStrategy(strategy: TGC_Strategy) =
-  case strategy
-  of gcThroughput: nil
-  of gcResponsiveness: nil
-  of gcOptimizeSpace: nil
-  of gcOptimizeTime: nil
-
-proc GC_enableMarkAndSweep() =
-  cycleThreshold = InitialCycleThreshold
-
-proc GC_disableMarkAndSweep() =
-  cycleThreshold = high(cycleThreshold)-1
-  # set to the max value to suppress the cycle detector
+  result = usrToCell(p).refcount shr rcShift
 
 # this that has to equals zero, otherwise we have to round up UnitsPerPage:
 when BitsPerPage mod (sizeof(int)*8) != 0:
   {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
 
-when debugGC:
-  proc writeCell(msg: CString, c: PCell) =
+template color(c): untyped = c.refCount and colorMask
+template setColor(c, col) =
+  when col == rcBlack:
+    c.refcount = c.refcount and not colorMask
+  else:
+    c.refcount = c.refcount and not colorMask or col
+
+when defined(logGC):
+  proc writeCell(msg: cstring, c: PCell) =
     var kind = -1
-    if c.typ != nil: kind = ord(c.typ.kind)
-    when debugGC:
-      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n",
-                msg, c, kind, c.refcount shr rcShift, c.filename, c.line)
+    var typName: cstring = "nil"
+    if c.typ != nil:
+      kind = ord(c.typ.kind)
+      when defined(nimTypeNames):
+        if not c.typ.name.isNil:
+          typName = c.typ.name
+
+    when leakDetector:
+      c_printf("[GC] %s: %p %d %s rc=%ld from %s(%ld)\n",
+                msg, c, kind, typName, c.refcount shr rcShift, c.filename, c.line)
     else:
-      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n",
-                msg, c, kind, c.refcount shr rcShift)
-
-when traceGC:
-  # traceGC is a special switch to enable extensive debugging
-  type
-    TCellState = enum
-      csAllocated, csZctFreed, csCycFreed
-  var
-    states: array[TCellState, TCellSet]
-
-  proc traceCell(c: PCell, state: TCellState) =
-    case state
-    of csAllocated:
-      if c in states[csAllocated]:
-        writeCell("attempt to alloc an already allocated cell", c)
-        assert(false)
-      excl(states[csCycFreed], c)
-      excl(states[csZctFreed], c)
-    of csZctFreed:
-      if c in states[csZctFreed]:
-        writeCell("attempt to free zct cell twice", c)
-        assert(false)
-      if c in states[csCycFreed]:
-        writeCell("attempt to free with zct, but already freed with cyc", c)
-        assert(false)
-      if c notin states[csAllocated]:
-        writeCell("attempt to free not an allocated cell", c)
-        assert(false)
-      excl(states[csAllocated], c)
-    of csCycFreed:
-      if c notin states[csAllocated]:
-        writeCell("attempt to free a not allocated cell", c)
-        assert(false)
-      if c in states[csCycFreed]:
-        writeCell("attempt to free cyc cell twice", c)
-        assert(false)
-      if c in states[csZctFreed]:
-        writeCell("attempt to free with cyc, but already freed with zct", c)
-        assert(false)
-      excl(states[csAllocated], c)
-    incl(states[state], c)
-
-  proc writeLeakage() =
-    var z = 0
-    var y = 0
-    var e = 0
-    for c in elements(states[csAllocated]):
-      inc(e)
-      if c in states[csZctFreed]: inc(z)
-      elif c in states[csCycFreed]: inc(z)
-      else: writeCell("leak", c)
-    cfprintf(cstdout, "Allocations: %ld; ZCT freed: %ld; CYC freed: %ld\n",
-             e, z, y)
-
-template gcTrace(cell, state: expr): stmt =
-  when traceGC: traceCell(cell, state)
+      c_printf("[GC] %s: %p %d %s rc=%ld; thread=%ld\n",
+                msg, c, kind, typName, c.refcount shr rcShift, gch.gcThreadId)
 
-# -----------------------------------------------------------------------------
+template logCell(msg: cstring, c: PCell) =
+  when defined(logGC):
+    writeCell(msg, c)
+
+template gcTrace(cell, state: untyped) =
+  when traceGC: traceCell(cell, state)
 
 # forward declarations:
-proc collectCT(gch: var TGcHeap)
-proc IsOnStack(p: pointer): bool {.noinline.}
-proc forAllChildren(cell: PCell, op: TWalkOp)
-proc doOperation(p: pointer, op: TWalkOp)
-proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp)
+proc collectCT(gch: var GcHeap) {.benign, raises: [].}
+proc isOnStack(p: pointer): bool {.noinline, benign, raises: [].}
+proc forAllChildren(cell: PCell, op: WalkOp) {.benign, raises: [].}
+proc doOperation(p: pointer, op: WalkOp) {.benign, raises: [].}
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign, raises: [].}
 # we need the prototype here for debugging purposes
 
-proc prepareDealloc(cell: PCell) =
-  if cell.typ.finalizer != nil:
-    # the finalizer could invoke something that
-    # allocates memory; this could trigger a garbage
-    # collection. Since we are already collecting we
-    # prevend recursive entering here by a lock.
-    # XXX: we should set the cell's children to nil!
-    inc(recGcLock)
-    (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
-    dec(recGcLock)
+proc incRef(c: PCell) {.inline.} =
+  gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
+  c.refcount = c.refcount +% rcIncrement
+  # and not colorMask
+  logCell("incRef", c)
 
-proc setStackBottom(theStackBottom: pointer) {.compilerproc.} =
-  stackBottom = theStackBottom
+proc nimGCref(p: pointer) {.compilerproc.} =
+  # we keep it from being collected by pretending it's not even allocated:
+  let c = usrToCell(p)
+  add(gch.additionalRoots, c)
+  incRef(c)
 
-proc PossibleRoot(gch: var TGcHeap, c: PCell) {.inline.} =
-  if canbeCycleRoot(c): incl(gch.cycleRoots, c)
+proc rtlAddZCT(c: PCell) {.rtl, inl.} =
+  # we MUST access gch as a global here, because this crosses DLL boundaries!
+  addZCT(gch.zct, c)
 
 proc decRef(c: PCell) {.inline.} =
-  when stressGC:
-    if c.refcount <% rcIncrement:
-      writeCell("broken cell", c)
-  assert(c.refcount >=% rcIncrement)
+  gcAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
+  gcAssert(c.refcount >=% rcIncrement, "decRef")
   c.refcount = c.refcount -% rcIncrement
   if c.refcount <% rcIncrement:
-    addZCT(gch.zct, c)
-  elif canBeCycleRoot(c):
-    incl(gch.cycleRoots, c) 
-
-proc incRef(c: PCell) {.inline.} = 
-  c.refcount = c.refcount +% rcIncrement
-  if canBeCycleRoot(c):
-    incl(gch.cycleRoots, c)
-
-proc nimGCref(p: pointer) {.compilerproc, inline.} = incRef(usrToCell(p))
-proc nimGCunref(p: pointer) {.compilerproc, inline.} = decRef(usrToCell(p))
-
-proc asgnRef(dest: ppointer, src: pointer) {.compilerproc, inline.} =
+    rtlAddZCT(c)
+  logCell("decRef", c)
+
+proc nimGCunref(p: pointer) {.compilerproc.} =
+  let cell = usrToCell(p)
+  var L = gch.additionalRoots.len-1
+  var i = L
+  let d = gch.additionalRoots.d
+  while i >= 0:
+    if d[i] == cell:
+      d[i] = d[L]
+      dec gch.additionalRoots.len
+      break
+    dec(i)
+  decRef(usrToCell(p))
+
+include gc_common
+
+template beforeDealloc(gch: var GcHeap; c: PCell; msg: typed) =
+  when false:
+    for i in 0..gch.decStack.len-1:
+      if gch.decStack.d[i] == c:
+        sysAssert(false, msg)
+
+proc nimGCunrefNoCycle(p: pointer) {.compilerproc, inline.} =
+  sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle")
+  decRef(usrToCell(p))
+  sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5")
+
+proc nimGCunrefRC1(p: pointer) {.compilerproc, inline.} =
+  decRef(usrToCell(p))
+
+proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
   # the code generator calls this proc!
-  assert(not isOnStack(dest))
+  gcAssert(not isOnStack(dest), "asgnRef")
   # BUGFIX: first incRef then decRef!
   if src != nil: incRef(usrToCell(src))
-  if dest^ != nil: decRef(usrToCell(dest^))
-  dest^ = src
-
-proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerproc, inline.} =
-  # the code generator calls this proc if it is known at compile time that no 
-  # cycle is possible.
-  if src != nil: 
-    var c = usrToCell(src)
-    c.refcount = c.refcount +% rcIncrement
-  if dest^ != nil: 
-    var c = usrToCell(dest^)
-    c.refcount = c.refcount -% rcIncrement
-    if c.refcount <% rcIncrement:
-      addZCT(gch.zct, c)
-  dest^ = src
+  if dest[] != nil: decRef(usrToCell(dest[]))
+  dest[] = src
+
+proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline,
+  deprecated: "old compiler compat".} = asgnRef(dest, src)
 
-proc unsureAsgnRef(dest: ppointer, src: pointer) =
-  if not IsOnStack(dest):
+proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc.} =
+  # unsureAsgnRef updates the reference counters only if dest is not on the
+  # stack. It is used by the code generator if it cannot decide whether a
+  # reference is in the stack or not (this can happen for var parameters).
+  if not isOnStack(dest):
     if src != nil: incRef(usrToCell(src))
-    if dest^ != nil: decRef(usrToCell(dest^))
-  dest^ = src
+    # XXX finally use assembler for the stack checking instead!
+    # the test for '!= nil' is correct, but I got tired of the segfaults
+    # resulting from the crappy stack checking:
+    if cast[int](dest[]) >=% PageSize: decRef(usrToCell(dest[]))
+  else:
+    # can't be an interior pointer if it's a stack location!
+    gcAssert(interiorAllocatedPtr(gch.region, dest) == nil,
+             "stack loc AND interior pointer")
+  dest[] = src
 
 proc initGC() =
-  when traceGC:
-    for i in low(TCellState)..high(TCellState): Init(states[i])
-  gch.stat.stackScans = 0
-  gch.stat.cycleCollections = 0
-  gch.stat.maxThreshold = 0
-  gch.stat.maxStackSize = 0
-  gch.stat.maxStackCells = 0
-  gch.stat.cycleTableSize = 0
-  # init the rt
-  init(gch.zct)
-  init(gch.tempStack)
-  Init(gch.cycleRoots)
-  Init(gch.decStack)
-  new(gOutOfMem) # reserve space for the EOutOfMemory exception here!
-
-proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
-  var d = cast[TAddress](dest)
+  when not defined(useNimRtl):
+    when traceGC:
+      for i in low(CellState)..high(CellState): init(states[i])
+    gch.cycleThreshold = InitialCycleThreshold
+    gch.zctThreshold = InitialZctThreshold
+    gch.stat.stackScans = 0
+    gch.stat.cycleCollections = 0
+    gch.stat.maxThreshold = 0
+    gch.stat.maxStackSize = 0
+    gch.stat.maxStackCells = 0
+    gch.stat.cycleTableSize = 0
+    # init the rt
+    init(gch.zct)
+    init(gch.tempStack)
+    init(gch.decStack)
+    init(gch.marked)
+    init(gch.additionalRoots)
+    when hasThreadSupport:
+      init(gch.toDispose)
+    gch.gcThreadId = atomicInc(gHeapidGenerator) - 1
+    gcAssert(gch.gcThreadId >= 0, "invalid computed thread ID")
+
+proc cellsetReset(s: var CellSet) =
+  deinit(s)
+  init(s)
+
+{.push stacktrace:off.}
+
+proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
+  var d = cast[int](dest)
   case n.kind
-  of nkNone: assert(false)
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
   of nkList:
-    for i in 0..n.len-1: forAllSlotsAux(dest, n.sons[i], op)
+    for i in 0..n.len-1:
+      # inlined for speed
+      if n.sons[i].kind == nkSlot:
+        if n.sons[i].typ.kind in {tyRef, tyString, tySequence}:
+          doOperation(cast[PPointer](d +% n.sons[i].offset)[], op)
+        else:
+          forAllChildrenAux(cast[pointer](d +% n.sons[i].offset),
+                            n.sons[i].typ, op)
+      else:
+        forAllSlotsAux(dest, n.sons[i], op)
   of nkCase:
     var m = selectBranch(dest, n)
     if m != nil: forAllSlotsAux(dest, m, op)
+  of nkNone: sysAssert(false, "forAllSlotsAux")
 
-proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) =
-  var d = cast[TAddress](dest)
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
+  var d = cast[int](dest)
   if dest == nil: return # nothing to do
   if ntfNoRefs notin mt.flags:
-    case mt.Kind
+    case mt.kind
+    of tyRef, tyString, tySequence: # leaf:
+      doOperation(cast[PPointer](d)[], op)
+    of tyObject, tyTuple:
+      forAllSlotsAux(dest, mt.node, op)
     of tyArray, tyArrayConstr, tyOpenArray:
       for i in 0..(mt.size div mt.base.size)-1:
         forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op)
-    of tyRef, tyString, tySequence: # leaf:
-      doOperation(cast[ppointer](d)^, op)
-    of tyObject, tyTuple, tyPureObject:
-      forAllSlotsAux(dest, mt.node, op)
-    else: nil
-
-proc forAllChildren(cell: PCell, op: TWalkOp) =
-  assert(cell != nil)
-  assert(cell.typ != nil)
-  case cell.typ.Kind
-  of tyRef: # common case
-    forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
-  of tySequence:
-    var d = cast[TAddress](cellToUsr(cell))
-    var s = cast[PGenericSeq](d)
-    if s != nil:
-      for i in 0..s.len-1:
-        forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +%
-          GenericSeqSize), cell.typ.base, op)
-  of tyString: nil
-  else: assert(false)
-
-proc checkCollection {.inline.} =
-  # checks if a collection should be done
-  if recGcLock == 0:
-    collectCT(gch)
-
-proc newObj(typ: PNimType, size: int): pointer =
-  # generates a new object and sets its reference counter to 0
-  assert(typ.kind in {tyRef, tyString, tySequence})
-  checkCollection()
-  var res = cast[PCell](rawAlloc(allocator, size + sizeof(TCell)))
-  zeroMem(res, size+sizeof(TCell))
-  assert((cast[TAddress](res) and (MemAlign-1)) == 0)
-  # now it is buffered in the ZCT
-  res.typ = typ
-  when debugGC:
-    if framePtr != nil and framePtr.prev != nil:
-      res.filename = framePtr.prev.filename
-      res.line = framePtr.prev.line
-  res.refcount = rcZct # refcount is zero, but mark it to be in the ZCT  
-  assert(isAllocatedPtr(allocator, res))
-  # its refcount is zero, so add it to the ZCT:
-  block addToZCT: 
-    # we check the last 8 entries (cache line) for a slot
-    # that could be reused
-    var L = gch.zct.len
-    var d = gch.zct.d
+    else: discard
+
+proc forAllChildren(cell: PCell, op: WalkOp) =
+  gcAssert(cell != nil, "forAllChildren: cell is nil")
+  gcAssert(isAllocatedPtr(gch.region, cell), "forAllChildren: pointer not part of the heap")
+  gcAssert(cell.typ != nil, "forAllChildren: cell.typ is nil")
+  gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: unknown GC'ed type"
+  let marker = cell.typ.marker
+  if marker != nil:
+    marker(cellToUsr(cell), op.int)
+  else:
+    case cell.typ.kind
+    of tyRef: # common case
+      forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
+    of tySequence:
+      var d = cast[int](cellToUsr(cell))
+      var s = cast[PGenericSeq](d)
+      if s != nil:
+        for i in 0..s.len-1:
+          forAllChildrenAux(cast[pointer](d +% align(GenericSeqSize, cell.typ.base.align) +% i *% cell.typ.base.size), cell.typ.base, op)
+    else: discard
+
+proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} =
+  # we check the last 8 entries (cache line) for a slot that could be reused.
+  # In 63% of all cases we succeed here! But we have to optimize the heck
+  # out of this small linear search so that ``newObj`` is not slowed down.
+  #
+  # Slots to try          cache hit
+  # 1                     32%
+  # 4                     59%
+  # 8                     63%
+  # 16                    66%
+  # all slots             68%
+  var L = gch.zct.len
+  var d = gch.zct.d
+  when true:
+    # loop unrolled for performance:
+    template replaceZctEntry(i: untyped) =
+      c = d[i]
+      if c.refcount >=% rcIncrement:
+        c.refcount = c.refcount and not ZctFlag
+        d[i] = res
+        return
+    if L > 8:
+      var c: PCell
+      replaceZctEntry(L-1)
+      replaceZctEntry(L-2)
+      replaceZctEntry(L-3)
+      replaceZctEntry(L-4)
+      replaceZctEntry(L-5)
+      replaceZctEntry(L-6)
+      replaceZctEntry(L-7)
+      replaceZctEntry(L-8)
+      add(gch.zct, res)
+    else:
+      d[L] = res
+      inc(gch.zct.len)
+  else:
     for i in countdown(L-1, max(0, L-8)):
       var c = d[i]
       if c.refcount >=% rcIncrement:
-        c.refcount = c.refcount and not colorMask
+        c.refcount = c.refcount and not ZctFlag
         d[i] = res
-        break addToZCT
+        return
     add(gch.zct, res)
-  when logGC: writeCell("new cell", res)
+
+{.push stackTrace: off, profiler:off.}
+proc gcInvariant*() =
+  sysAssert(allocInv(gch.region), "injected")
+  when declared(markForDebug):
+    markForDebug(gch)
+{.pop.}
+
+template setFrameInfo(c: PCell) =
+  when leakDetector:
+    if framePtr != nil and framePtr.prev != nil:
+      c.filename = framePtr.prev.filename
+      c.line = framePtr.prev.line
+    else:
+      c.filename = nil
+      c.line = 0
+
+proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
+  # generates a new object and sets its reference counter to 0
+  incTypeSize typ, size
+  sysAssert(allocInv(gch.region), "rawNewObj begin")
+  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  collectCT(gch)
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
+  #gcAssert typ.kind in {tyString, tySequence} or size >= typ.base.size, "size too small"
+  gcAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
+  # now it is buffered in the ZCT
+  res.typ = typ
+  setFrameInfo(res)
+  # refcount is zero, color is black, but mark it to be in the ZCT
+  res.refcount = ZctFlag
+  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
+  # its refcount is zero, so add it to the ZCT:
+  addNewObjToZCT(res, gch)
+  logCell("new cell", res)
+  track("rawNewObj", res, size)
   gcTrace(res, csAllocated)
+  when useCellIds:
+    inc gch.idGenerator
+    res.id = gch.idGenerator * 1000_000 + gch.gcThreadId
   result = cellToUsr(res)
+  sysAssert(allocInv(gch.region), "rawNewObj end")
+
+{.pop.} # .stackTrace off
+{.pop.} # .profiler off
+
+proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  result = rawNewObj(typ, size, gch)
+  when defined(memProfiler): nimProfile(size)
+
+proc newObj(typ: PNimType, size: int): pointer {.compilerRtl, noinline.} =
+  result = rawNewObj(typ, size, gch)
+  zeroMem(result, size)
+  when defined(memProfiler): nimProfile(size)
+
+{.push overflowChecks: on.}
+proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
+  # `newObj` already uses locks, so no need for them here.
+  let size = align(GenericSeqSize, typ.base.align) + len * typ.base.size
+  result = newObj(typ, size)
+  cast[PGenericSeq](result).len = len
+  cast[PGenericSeq](result).reserved = len
+  when defined(memProfiler): nimProfile(size)
+{.pop.}
+
+proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl, noinline.} =
+  # generates a new object and sets its reference counter to 1
+  incTypeSize typ, size
+  sysAssert(allocInv(gch.region), "newObjRC1 begin")
+  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  collectCT(gch)
+  sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")
 
-proc newSeq(typ: PNimType, len: int): pointer =
-  result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
+  sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
+  sysAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
+  # now it is buffered in the ZCT
+  res.typ = typ
+  setFrameInfo(res)
+  res.refcount = rcIncrement # refcount is 1
+  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
+  logCell("new cell", res)
+  track("newObjRC1", res, size)
+  gcTrace(res, csAllocated)
+  when useCellIds:
+    inc gch.idGenerator
+    res.id = gch.idGenerator * 1000_000 + gch.gcThreadId
+  result = cellToUsr(res)
+  zeroMem(result, size)
+  sysAssert(allocInv(gch.region), "newObjRC1 end")
+  when defined(memProfiler): nimProfile(size)
+
+{.push overflowChecks: on.}
+proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
+  let size = align(GenericSeqSize, typ.base.align) + len * typ.base.size
+  result = newObjRC1(typ, size)
   cast[PGenericSeq](result).len = len
-  cast[PGenericSeq](result).space = len
+  cast[PGenericSeq](result).reserved = len
+  when defined(memProfiler): nimProfile(size)
+{.pop.}
 
-proc growObj(old: pointer, newsize: int): pointer =
-  checkCollection()
+proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
+  collectCT(gch)
   var ol = usrToCell(old)
-  assert(ol.typ != nil)
-  assert(ol.typ.kind in {tyString, tySequence})
-  var res = cast[PCell](rawAlloc(allocator, newsize + sizeof(TCell)))
-  var elemSize = 1
+  sysAssert(ol.typ != nil, "growObj: 1")
+  gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
+  sysAssert(allocInv(gch.region), "growObj begin")
+
+  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
+  var elemSize,elemAlign = 1
   if ol.typ.kind != tyString:
     elemSize = ol.typ.base.size
-  
-  var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
-  copyMem(res, ol, oldsize + sizeof(TCell))
-  zeroMem(cast[pointer](cast[TAddress](res)+% oldsize +% sizeof(TCell)),
+    elemAlign = ol.typ.base.align
+  incTypeSize ol.typ, newsize
+
+  var oldsize = align(GenericSeqSize, elemAlign) + cast[PGenericSeq](old).len * elemSize
+  copyMem(res, ol, oldsize + sizeof(Cell))
+  zeroMem(cast[pointer](cast[int](res) +% oldsize +% sizeof(Cell)),
           newsize-oldsize)
-  assert((cast[TAddress](res) and (MemAlign-1)) == 0)
-  assert(res.refcount shr rcShift <=% 1)
-  #if res.refcount <% rcIncrement:
-  #  add(gch.zct, res)
-  #else: # XXX: what to do here?
-  #  decRef(ol)
-  if (ol.refcount and colorMask) == rcZct:
-    var j = gch.zct.len-1
-    var d = gch.zct.d
-    while j >= 0: 
-      if d[j] == ol:
-        d[j] = res
-        break
-      dec(j)
-  if canBeCycleRoot(ol): excl(gch.cycleRoots, ol)
-  when logGC:
-    writeCell("growObj old cell", ol)
-    writeCell("growObj new cell", res)
+  sysAssert((cast[int](res) and (MemAlign-1)) == 0, "growObj: 3")
+  # This can be wrong for intermediate temps that are nevertheless on the
+  # heap because of lambda lifting:
+  #gcAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
+  logCell("growObj old cell", ol)
+  logCell("growObj new cell", res)
   gcTrace(ol, csZctFreed)
   gcTrace(res, csAllocated)
-  when reallyDealloc: rawDealloc(allocator, ol)
-  else:
-    assert(ol.typ != nil)
-    zeroMem(ol, sizeof(TCell))
+  track("growObj old", ol, 0)
+  track("growObj new", res, newsize)
+  # since we steal the old seq's contents, we set the old length to 0.
+  cast[PGenericSeq](old).len = 0
+  when useCellIds:
+    inc gch.idGenerator
+    res.id = gch.idGenerator * 1000_000 + gch.gcThreadId
   result = cellToUsr(res)
+  sysAssert(allocInv(gch.region), "growObj end")
+  when defined(memProfiler): nimProfile(newsize-oldsize)
+
+proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
+  result = growObj(old, newsize, gch)
+
+{.push profiler:off, stackTrace:off.}
 
 # ---------------- cycle collector -------------------------------------------
 
-proc doOperation(p: pointer, op: TWalkOp) =
+proc freeCyclicCell(gch: var GcHeap, c: PCell) =
+  prepareDealloc(c)
+  gcTrace(c, csCycFreed)
+  track("cycle collector dealloc cell", c, 0)
+  logCell("cycle collector dealloc cell", c)
+  when reallyDealloc:
+    sysAssert(allocInv(gch.region), "free cyclic cell")
+    beforeDealloc(gch, c, "freeCyclicCell: stack trash")
+    rawDealloc(gch.region, c)
+  else:
+    gcAssert(c.typ != nil, "freeCyclicCell")
+    zeroMem(c, sizeof(Cell))
+
+proc sweep(gch: var GcHeap) =
+  for x in allObjects(gch.region):
+    if isCell(x):
+      # cast to PCell is correct here:
+      var c = cast[PCell](x)
+      if c notin gch.marked: freeCyclicCell(gch, c)
+
+proc markS(gch: var GcHeap, c: PCell) =
+  gcAssert isAllocatedPtr(gch.region, c), "markS: foreign heap root detected A!"
+  incl(gch.marked, c)
+  gcAssert gch.tempStack.len == 0, "stack not empty!"
+  forAllChildren(c, waMarkPrecise)
+  while gch.tempStack.len > 0:
+    dec gch.tempStack.len
+    var d = gch.tempStack.d[gch.tempStack.len]
+    gcAssert isAllocatedPtr(gch.region, d), "markS: foreign heap root detected B!"
+    if not containsOrIncl(gch.marked, d):
+      forAllChildren(d, waMarkPrecise)
+
+proc markGlobals(gch: var GcHeap) {.raises: [].} =
+  if gch.gcThreadId == 0:
+    for i in 0 .. globalMarkersLen-1: globalMarkers[i]()
+  for i in 0 .. threadLocalMarkersLen-1: threadLocalMarkers[i]()
+  let d = gch.additionalRoots.d
+  for i in 0 .. gch.additionalRoots.len-1: markS(gch, d[i])
+
+when logGC:
+  var
+    cycleCheckA: array[100, PCell]
+    cycleCheckALen = 0
+
+  proc alreadySeen(c: PCell): bool =
+    for i in 0 .. cycleCheckALen-1:
+      if cycleCheckA[i] == c: return true
+    if cycleCheckALen == len(cycleCheckA):
+      gcAssert(false, "cycle detection overflow")
+      rawQuit 1
+    cycleCheckA[cycleCheckALen] = c
+    inc cycleCheckALen
+
+  proc debugGraph(s: PCell) =
+    if alreadySeen(s):
+      writeCell("child cell (already seen) ", s)
+    else:
+      writeCell("cell {", s)
+      forAllChildren(s, waDebug)
+      c_printf("}\n")
+
+proc doOperation(p: pointer, op: WalkOp) =
   if p == nil: return
   var c: PCell = usrToCell(p)
-  assert(c != nil)
-  case op # faster than function pointers because of easy prediction
+  gcAssert(c != nil, "doOperation: 1")
+  # the 'case' should be faster than function pointers because of easy
+  # prediction:
+  case op
   of waZctDecRef:
-    assert(c.refcount >=% rcIncrement)
-    c.refcount = c.refcount -% rcIncrement
-    when logGC: writeCell("decref (from doOperation)", c)
-    if c.refcount <% rcIncrement: addZCT(gch.zct, c)
+    #if not isAllocatedPtr(gch.region, c):
+    #  c_printf("[GC] decref bug: %p", c)
+    gcAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
+    gcAssert(c.refcount >=% rcIncrement, "doOperation 2")
+    logCell("decref (from doOperation)", c)
+    track("waZctDecref", p, 0)
+    decRef(c)
   of waPush:
     add(gch.tempStack, c)
-  of waCycleDecRef:
-    assert(c.refcount >=% rcIncrement)
-    c.refcount = c.refcount -% rcIncrement
-
-# we now use a much simpler and non-recursive algorithm for cycle removal
-proc collectCycles(gch: var TGcHeap) =
-  var tabSize = 0
-  for c in elements(gch.cycleRoots):
-    inc(tabSize)
-    forallChildren(c, waCycleDecRef)
-  gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize)
-
-  # restore reference counts (a depth-first traversal is needed):
-  var marker: TCellSet
-  Init(marker)
-  for c in elements(gch.cycleRoots):
-    if c.refcount >=% rcIncrement:
-      if not containsOrIncl(marker, c):
-        gch.tempStack.len = 0
-        forAllChildren(c, waPush)
-        while gch.tempStack.len > 0:
-          dec(gch.tempStack.len)
-          var d = gch.tempStack.d[gch.tempStack.len]
-          d.refcount = d.refcount +% rcIncrement
-          if d in gch.cycleRoots and not containsOrIncl(marker, d):
-            forAllChildren(d, waPush)
-  # remove cycles:
-  for c in elements(gch.cycleRoots):
-    if c.refcount <% rcIncrement:
-      gch.tempStack.len = 0
-      forAllChildren(c, waPush)
-      while gch.tempStack.len > 0:
-        dec(gch.tempStack.len)
-        var d = gch.tempStack.d[gch.tempStack.len]
-        if d.refcount <% rcIncrement:
-          if d notin gch.cycleRoots: # d is leaf of c and not part of cycle
-            addZCT(gch.zct, d)
-            when logGC: writeCell("add to ZCT (from cycle collector)", d)
-      prepareDealloc(c)
-      gcTrace(c, csCycFreed)
-      when logGC: writeCell("cycle collector dealloc cell", c)
-      when reallyDealloc: rawDealloc(allocator, c)
-      else:
-        assert(c.typ != nil)
-        zeroMem(c, sizeof(TCell))
-  Deinit(gch.cycleRoots)
-  Init(gch.cycleRoots)
+  of waMarkGlobal:
+    markS(gch, c)
+  of waMarkPrecise:
+    add(gch.tempStack, c)
+  #of waDebug: debugGraph(c)
+
+proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
+  doOperation(d, WalkOp(op))
 
-proc gcMark(p: pointer) {.inline.} =
+proc collectZCT(gch: var GcHeap): bool {.benign, raises: [].}
+
+proc collectCycles(gch: var GcHeap) {.raises: [].} =
+  when hasThreadSupport:
+    for c in gch.toDispose:
+      nimGCunref(c)
+  # ensure the ZCT 'color' is not used:
+  while gch.zct.len > 0: discard collectZCT(gch)
+  cellsetReset(gch.marked)
+  var d = gch.decStack.d
+  for i in 0..gch.decStack.len-1:
+    sysAssert isAllocatedPtr(gch.region, d[i]), "collectCycles"
+    markS(gch, d[i])
+  markGlobals(gch)
+  sweep(gch)
+
+proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
-  var cell = usrToCell(p)
-  var c = cast[TAddress](cell)
-  if c >% PageSize and (c and (MemAlign-1)) == 0:
+  sysAssert(allocInv(gch.region), "gcMark begin")
+  var c = cast[int](p)
+  if c >% PageSize:
     # fast check: does it look like a cell?
-    if isAllocatedPtr(allocator, cell): 
+    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, p))
+    if objStart != nil:
       # mark the cell:
-      cell.refcount = cell.refcount +% rcIncrement
-      add(gch.decStack, cell)
-
-# ----------------- stack management --------------------------------------
-#  inspired from Smart Eiffel
-
-proc stackSize(): int {.noinline.} =
-  var stackTop: array[0..1, pointer]
-  result = abs(cast[int](addr(stackTop[0])) - cast[int](stackBottom))
-
-when defined(sparc): # For SPARC architecture.
-  proc isOnStack(p: pointer): bool =
-    var stackTop: array [0..1, pointer]
-    var b = cast[TAddress](stackBottom)
-    var a = cast[TAddress](addr(stackTop[0]))
-    var x = cast[TAddress](p)
-    result = x >=% a and x <=% b
-
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
-    when defined(sparcv9):
-      asm  """"flushw \n" """
-    else:
-      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
-
-    var
-      max = stackBottom
-      sp: PPointer
-      stackTop: array[0..1, pointer]
-    sp = addr(stackTop[0])
-    # Addresses decrease as the stack grows.
-    while sp <= max:
-      gcMark(sp^)
-      sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer))
-
-elif defined(ELATE):
-  {.error: "stack marking code is to be written for this architecture".}
-
-elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
-     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses increase as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop: array [0..1, pointer]
-    var a = cast[TAddress](stackBottom)
-    var b = cast[TAddress](addr(stackTop[0]))
-    var x = cast[TAddress](p)
-    result = x >=% a and x <=% b
-
-  var
-    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-      # a little hack to get the size of a TJmpBuf in the generated C code
-      # in a platform independant way
-
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[TAddress](stackBottom)
-      var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
-      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
-      # otherwise sp would be below the registers structure).
-      while sp >=% max:
-        gcMark(cast[ppointer](sp)^)
-        sp = sp -% sizeof(pointer)
-
-else:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses decrease as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop: array [0..1, pointer]
-    var b = cast[TAddress](stackBottom)
-    var a = cast[TAddress](addr(stackTop[0]))
-    var x = cast[TAddress](p)
-    result = x >=% a and x <=% b
-
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
-    # We use a jmp_buf buffer that is in the C stack.
-    # Used to traverse the stack and registers assuming
-    # that 'setjmp' will save registers in the C stack.
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[TAddress](stackBottom)
-      var sp = cast[TAddress](addr(registers))
-      while sp <=% max:
-        gcMark(cast[ppointer](sp)^)
-        sp = sp +% sizeof(pointer)
-
-# ----------------------------------------------------------------------------
-# end of non-portable code
-# ----------------------------------------------------------------------------
-
-proc CollectZCT(gch: var TGcHeap) =
-  # Note: Freeing may add child objects to the ZCT! So essentially we do 
-  # deep freeing, which is bad for incremental operation. In order to 
+      incRef(objStart)
+      add(gch.decStack, objStart)
+    when false:
+      let cell = usrToCell(p)
+      if isAllocatedPtr(gch.region, cell):
+        sysAssert false, "allocated pointer but not interior?"
+        # mark the cell:
+        incRef(cell)
+        add(gch.decStack, cell)
+  sysAssert(allocInv(gch.region), "gcMark end")
+
+#[
+  This method is conditionally marked with an attribute so that it gets ignored by the LLVM ASAN
+  (Address SANitizer) intrumentation as it will raise false errors due to the implementation of
+  garbage collection that is used by Nim. For more information, please see the documentation of
+  `CLANG_NO_SANITIZE_ADDRESS` in `lib/nimbase.h`.
+ ]#
+proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl,
+    codegenDecl: "CLANG_NO_SANITIZE_ADDRESS N_LIB_PRIVATE $# $#$#".} =
+  forEachStackSlot(gch, gcMark)
+
+proc collectZCT(gch: var GcHeap): bool =
+  # Note: Freeing may add child objects to the ZCT! So essentially we do
+  # deep freeing, which is bad for incremental operation. In order to
   # avoid a deep stack, we move objects to keep the ZCT small.
   # This is performance critical!
+  const workPackage = 100
   var L = addr(gch.zct.len)
-  while L^ > 0:
+
+  when withRealTime:
+    var steps = workPackage
+    var t0: Ticks
+    if gch.maxPause > 0: t0 = getticks()
+  while L[] > 0:
     var c = gch.zct.d[0]
+    sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr")
     # remove from ZCT:
-    assert((c.refcount and colorMask) == rcZct)
-    c.refcount = c.refcount and not colorMask
-    gch.zct.d[0] = gch.zct.d[L^ - 1]
-    dec(L^)
-    if c.refcount <% rcIncrement: 
+    gcAssert((c.refcount and ZctFlag) == ZctFlag, "collectZCT")
+
+    c.refcount = c.refcount and not ZctFlag
+    gch.zct.d[0] = gch.zct.d[L[] - 1]
+    dec(L[])
+    when withRealTime: dec steps
+    if c.refcount <% rcIncrement:
       # It may have a RC > 0, if it is in the hardware stack or
       # it has not been removed yet from the ZCT. This is because
-      # ``incref`` does not bother to remove the cell from the ZCT 
+      # ``incref`` does not bother to remove the cell from the ZCT
       # as this might be too slow.
       # In any case, it should be removed from the ZCT. But not
       # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
-      if canBeCycleRoot(c): excl(gch.cycleRoots, c)
-      when logGC: writeCell("zct dealloc cell", c)
+      logCell("zct dealloc cell", c)
+      track("zct dealloc cell", c, 0)
       gcTrace(c, csZctFreed)
       # We are about to free the object, call the finalizer BEFORE its
       # children are deleted as well, because otherwise the finalizer may
       # access invalid memory. This is done by prepareDealloc():
       prepareDealloc(c)
       forAllChildren(c, waZctDecRef)
-      when reallyDealloc: rawDealloc(allocator, c)
+      when reallyDealloc:
+        sysAssert(allocInv(gch.region), "collectZCT: rawDealloc")
+        beforeDealloc(gch, c, "collectZCT: stack trash")
+        rawDealloc(gch.region, c)
       else:
-        assert(c.typ != nil)
-        zeroMem(c, sizeof(TCell))
-
-proc unmarkStackAndRegisters(gch: var TGcHeap) = 
+        sysAssert(c.typ != nil, "collectZCT 2")
+        zeroMem(c, sizeof(Cell))
+    when withRealTime:
+      if steps == 0:
+        steps = workPackage
+        if gch.maxPause > 0:
+          let duration = getticks() - t0
+          # the GC's measuring is not accurate and needs some cleanup actions
+          # (stack unmarking), so subtract some short amount of time in
+          # order to miss deadlines less often:
+          if duration >= gch.maxPause - 50_000:
+            return false
+  result = true
+
+proc unmarkStackAndRegisters(gch: var GcHeap) =
   var d = gch.decStack.d
   for i in 0..gch.decStack.len-1:
-    assert isAllocatedPtr(allocator, d[i])
-    decRef(d[i]) # OPT: cannot create a cycle!
+    sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters"
+    decRef(d[i])
   gch.decStack.len = 0
 
-proc collectCT(gch: var TGcHeap) =
-  if gch.zct.len >= ZctThreshold or (cycleGC and
-      getOccupiedMem() >= cycleThreshold) or stressGC:
+proc collectCTBody(gch: var GcHeap) {.raises: [].} =
+  when withRealTime:
+    let t0 = getticks()
+  sysAssert(allocInv(gch.region), "collectCT: begin")
+
+  when nimCoroutines:
+    for stack in gch.stack.items():
+      gch.stat.maxStackSize = max(gch.stat.maxStackSize, stack.stackSize())
+  else:
     gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
-    assert(gch.decStack.len == 0)
-    markStackAndRegisters(gch)
-    gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
-    inc(gch.stat.stackScans)
-    collectZCT(gch)
+  sysAssert(gch.decStack.len == 0, "collectCT")
+  prepareForInteriorPointerChecking(gch.region)
+  markStackAndRegisters(gch)
+  gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
+  inc(gch.stat.stackScans)
+  if collectZCT(gch):
     when cycleGC:
-      if getOccupiedMem() >= cycleThreshold or stressGC:
+      if getOccupiedMem(gch.region) >= gch.cycleThreshold or alwaysCycleGC:
         collectCycles(gch)
-        collectZCT(gch)
+        #discard collectZCT(gch)
         inc(gch.stat.cycleCollections)
-        cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
-                             cycleIncrease)
-        gch.stat.maxThreshold = max(gch.stat.maxThreshold, cycleThreshold)
-    unmarkStackAndRegisters(gch)
-
-proc GC_fullCollect() =
-  var oldThreshold = cycleThreshold
-  cycleThreshold = 0 # forces cycle collection
-  collectCT(gch)
-  cycleThreshold = oldThreshold
-
-proc GC_getStatistics(): string =
-  GC_disable()
-  result = "[GC] total memory: " & $(getTotalMem()) & "\n" &
-           "[GC] occupied memory: " & $(getOccupiedMem()) & "\n" &
-           "[GC] stack scans: " & $gch.stat.stackScans & "\n" &
-           "[GC] stack cells: " & $gch.stat.maxStackCells & "\n" &
-           "[GC] cycle collections: " & $gch.stat.cycleCollections & "\n" &
-           "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
-           "[GC] zct capacity: " & $gch.zct.cap & "\n" &
-           "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
-           "[GC] max stack size: " & $gch.stat.maxStackSize
-  when traceGC: writeLeakage()
-  GC_enable()
+        gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
+                                 CycleIncrease)
+        gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
+  unmarkStackAndRegisters(gch)
+  sysAssert(allocInv(gch.region), "collectCT: end")
+
+  when withRealTime:
+    let duration = getticks() - t0
+    gch.stat.maxPause = max(gch.stat.maxPause, duration)
+    when defined(reportMissedDeadlines):
+      if gch.maxPause > 0 and duration > gch.maxPause:
+        c_printf("[GC] missed deadline: %ld\n", duration)
+
+proc collectCT(gch: var GcHeap) =
+  if (gch.zct.len >= gch.zctThreshold or (cycleGC and
+      getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
+      gch.recGcLock == 0:
+    when false:
+      prepareForInteriorPointerChecking(gch.region)
+      cellsetReset(gch.marked)
+      markForDebug(gch)
+    collectCTBody(gch)
+    gch.zctThreshold = max(InitialZctThreshold, gch.zct.len * CycleIncrease)
+
+proc GC_collectZct*() =
+  ## Collect the ZCT (zero count table). Unstable, experimental API for
+  ## testing purposes.
+  ## DO NOT USE!
+  collectCTBody(gch)
+
+when withRealTime:
+  proc toNano(x: int): Nanos {.inline.} =
+    result = x * 1000
+
+  proc GC_setMaxPause*(MaxPauseInUs: int) =
+    gch.maxPause = MaxPauseInUs.toNano
+
+  proc GC_step(gch: var GcHeap, us: int, strongAdvice: bool) =
+    gch.maxPause = us.toNano
+    if (gch.zct.len >= gch.zctThreshold or (cycleGC and
+        getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or
+        strongAdvice:
+      collectCTBody(gch)
+      gch.zctThreshold = max(InitialZctThreshold, gch.zct.len * CycleIncrease)
+
+  proc GC_step*(us: int, strongAdvice = false, stackSize = -1) {.noinline.} =
+    if stackSize >= 0:
+      var stackTop {.volatile.}: pointer
+      gch.getActiveStack().pos = addr(stackTop)
+
+      for stack in gch.stack.items():
+        stack.bottomSaved = stack.bottom
+        when stackIncreases:
+          stack.bottom = cast[pointer](
+            cast[int](stack.pos) - sizeof(pointer) * 6 - stackSize)
+        else:
+          stack.bottom = cast[pointer](
+            cast[int](stack.pos) + sizeof(pointer) * 6 + stackSize)
+
+    GC_step(gch, us, strongAdvice)
+
+    if stackSize >= 0:
+      for stack in gch.stack.items():
+        stack.bottom = stack.bottomSaved
+
+when not defined(useNimRtl):
+  proc GC_disable() =
+    inc(gch.recGcLock)
+  proc GC_enable() =
+    when defined(nimDoesntTrackDefects):
+      if gch.recGcLock <= 0:
+        raise newException(AssertionDefect,
+            "API usage error: GC_enable called but GC is already enabled")
+    dec(gch.recGcLock)
+
+  proc GC_setStrategy(strategy: GC_Strategy) =
+    discard
+
+  proc GC_enableMarkAndSweep() =
+    gch.cycleThreshold = InitialCycleThreshold
+
+  proc GC_disableMarkAndSweep() =
+    gch.cycleThreshold = high(typeof(gch.cycleThreshold))-1
+    # set to the max value to suppress the cycle detector
+
+  proc GC_fullCollect() =
+    var oldThreshold = gch.cycleThreshold
+    gch.cycleThreshold = 0 # forces cycle collection
+    collectCT(gch)
+    gch.cycleThreshold = oldThreshold
+
+  proc GC_getStatistics(): string =
+    result = "[GC] total memory: " & $(getTotalMem()) & "\n" &
+             "[GC] occupied memory: " & $(getOccupiedMem()) & "\n" &
+             "[GC] stack scans: " & $gch.stat.stackScans & "\n" &
+             "[GC] stack cells: " & $gch.stat.maxStackCells & "\n" &
+             "[GC] cycle collections: " & $gch.stat.cycleCollections & "\n" &
+             "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
+             "[GC] zct capacity: " & $gch.zct.cap & "\n" &
+             "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
+             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) & "\n"
+    when nimCoroutines:
+      result.add "[GC] number of stacks: " & $gch.stack.len & "\n"
+      for stack in items(gch.stack):
+        result.add "[GC]   stack " & stack.bottom.repr & "[GC]     max stack size " & cast[pointer](stack.maxStackSize).repr & "\n"
+    else:
+      # this caused memory leaks, see #10488 ; find a way without `repr`
+      # maybe using a local copy of strutils.toHex or snprintf
+      when defined(logGC):
+        result.add "[GC] stack bottom: " & gch.stack.bottom.repr
+      result.add "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
+
+{.pop.} # profiler: off, stackTrace: off