summary refs log tree commit diff stats
path: root/lib/system
diff options
context:
space:
mode:
Diffstat (limited to 'lib/system')
-rw-r--r--lib/system/alloc.nim5
-rw-r--r--lib/system/cellsets.nim35
-rw-r--r--lib/system/dyncalls.nim12
-rw-r--r--lib/system/gc.nim2
-rw-r--r--lib/system/gc2.nim1521
-rw-r--r--lib/system/jssys.nim113
-rw-r--r--lib/system/nimscript.nim2
-rw-r--r--lib/system/profiler.nim26
-rw-r--r--lib/system/repr.nim6
-rw-r--r--lib/system/sysstr.nim2
10 files changed, 614 insertions, 1110 deletions
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim
index 3ebbc8c1e..b4462ed83 100644
--- a/lib/system/alloc.nim
+++ b/lib/system/alloc.nim
@@ -40,7 +40,7 @@ when defined(emscripten):
     MAP_PRIVATE = 2'i32        # Changes are private
 
   var MAP_ANONYMOUS {.importc: "MAP_ANONYMOUS", header: "<sys/mman.h>".}: cint
-  type 
+  type
     PEmscriptenMMapBlock = ptr EmscriptenMMapBlock
     EmscriptenMMapBlock {.pure, inheritable.} = object
       realSize: int        # size of previous chunk; for coalescing
@@ -399,6 +399,9 @@ iterator allObjects(m: MemRegion): pointer {.inline.} =
           let c = cast[PBigChunk](c)
           yield addr(c.data)
 
+proc iterToProc*(iter: typed, envType: typedesc; procName: untyped) {.
+                      magic: "Plugin", compileTime.}
+
 proc isCell(p: pointer): bool {.inline.} =
   result = cast[ptr FreeCell](p).zeroField >% 1
 
diff --git a/lib/system/cellsets.nim b/lib/system/cellsets.nim
index bb5de6f42..776a2b7ec 100644
--- a/lib/system/cellsets.nim
+++ b/lib/system/cellsets.nim
@@ -201,6 +201,41 @@ iterator elements(t: CellSet): PCell {.inline.} =
       inc(i)
     r = r.next
 
+when false:
+  type
+    CellSetIter = object
+      p: PPageDesc
+      i, w, j: int
+
+  proc next(it: var CellSetIter): PCell =
+    while true:
+      while it.w != 0:         # test all remaining bits for zero
+        if (it.w and 1) != 0:  # the bit is set!
+          result = cast[PCell]((it.p.key shl PageShift) or
+                               (it.i shl IntShift +% it.j) *% MemAlign)
+
+          inc(it.j)
+          it.w = it.w shr 1
+          return
+        else:
+          inc(it.j)
+          it.w = it.w shr 1
+      # load next w:
+      if it.i >= high(it.p.bits):
+        it.i = 0
+        it.j = 0
+        it.p = it.p.next
+        if it.p == nil: return nil
+      else:
+        inc it.i
+      it.w = it.p.bits[i]
+
+  proc init(it: var CellSetIter; t: CellSet): PCell =
+    it.p = t.head
+    it.i = -1
+    it.w = 0
+    result = it.next
+
 iterator elementsExcept(t, s: CellSet): PCell {.inline.} =
   var r = t.head
   while r != nil:
diff --git a/lib/system/dyncalls.nim b/lib/system/dyncalls.nim
index 908aa551b..6dc8999d1 100644
--- a/lib/system/dyncalls.nim
+++ b/lib/system/dyncalls.nim
@@ -68,7 +68,10 @@ when defined(posix):
 
   proc nimLoadLibrary(path: string): LibHandle =
     result = dlopen(path, RTLD_NOW)
-    #c_fprintf(c_stdout, "%s\n", dlerror())
+    when defined(nimDebugDlOpen):
+      let error = dlerror()
+      if error != nil:
+        c_fprintf(c_stdout, "%s\n", error)
 
   proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
     result = dlsym(lib, name)
@@ -105,7 +108,12 @@ elif defined(windows) or defined(dos):
 
   proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
     result = getProcAddress(cast[THINSTANCE](lib), name)
-    if result == nil: procAddrError(name)
+    if result != nil: return
+    for i in countup(0, 50):
+      var decorated = "_" & $name & "@" & $(i * 4)
+      result = getProcAddress(cast[THINSTANCE](lib), cstring(decorated))
+      if result != nil: return
+    procAddrError(name)
 
 else:
   {.error: "no implementation for dyncalls".}
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index 0c632aeb1..c25cf4606 100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -558,7 +558,7 @@ proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
       # we split the old refcount in 2 parts. XXX This is still not entirely
       # correct if the pointer that receives growObj's result is on the stack.
       # A better fix would be to emit the location specific write barrier for
-      # 'growObj', but this is lost of more work and who knows what new problems
+      # 'growObj', but this is lots of more work and who knows what new problems
       # this would create.
       res.refcount = rcIncrement
       decRef(ol)
diff --git a/lib/system/gc2.nim b/lib/system/gc2.nim
index 4ca0d144f..e68a8586e 100644
--- a/lib/system/gc2.nim
+++ b/lib/system/gc2.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -9,13 +9,15 @@
 
 #            Garbage Collector
 #
-# The basic algorithm is *Deferrent Reference Counting* with cycle detection.
-# This is achieved by combining a Deutsch-Bobrow garbage collector
-# together with Christoper's partial mark-sweep garbage collector.
-#
-# Special care has been taken to avoid recursion as far as possible to avoid
-# stack overflows when traversing deep datastructures. It is well-suited
-# for soft real time applications (like games).
+# The basic algorithm is *Deferred Reference Counting* with an incremental mark
+# and sweep GC to free cycles. It is hard realtime in that if you play
+# according to its rules, no deadline will ever be missed.
+
+# XXX Ensure by smart color masking that the object is not in the ZCT.
+
+when defined(nimCoroutines):
+  import arch
+
 {.push profiler:off.}
 
 const
@@ -29,82 +31,36 @@ const
 when withRealTime and not declared(getTicks):
   include "system/timers"
 when defined(memProfiler):
-  proc nimProfile(requestedSize: int)
-
-const
-  rcShift = 6 # the reference count is shifted so we can use
-              # the least significat bits for additinal flags:
-
-  rcAlive = 0b00000           # object is reachable.
-                              # color *black* in the original paper
-
-  rcCycleCandidate = 0b00001  # possible root of a cycle. *purple*
-
-  rcDecRefApplied = 0b00010   # the first dec-ref phase of the
-                              # collector was already applied to this
-                              # object. *gray*
-
-  rcMaybeDead = 0b00011       # this object is a candidate for deletion
-                              # during the collect cycles algorithm.
-                              # *white*.
-
-  rcReallyDead = 0b00100      # this is proved to be garbage
-
-  rcRetiredBuffer = 0b00101   # this is a seq or string buffer that
-                              # was replaced by a resize operation.
-                              # see growObj for details
+  proc nimProfile(requestedSize: int) {.benign.}
 
-  rcColorMask = RefCount(0b00111)
-
-  rcZct = 0b01000             # already added to ZCT
-  rcInCycleRoots = 0b10000    # already buffered as cycle candidate
-  rcHasStackRef = 0b100000    # the object had a stack ref in the last
-                              # cycle collection
-
-  rcMarkBit = rcHasStackRef   # this is currently used for leak detection
-                              # when traceGC is on
-
-  rcBufferedAnywhere = rcZct or rcInCycleRoots
+type
+  ObjectSpaceIter = object
+    state: range[-1..0]
 
-  rcIncrement = 1 shl rcShift # don't touch the color bits
+iterToProc(allObjects, ptr ObjectSpaceIter, allObjectsAsProc)
 
 const
-  NewObjectsAreCycleRoots = true
-    # the alternative is to use the old strategy of adding cycle roots
-    # in incRef (in the compiler itself, this doesn't change much)
-
-  IncRefRemovesCandidates = false
-    # this is safe only if we can reliably track the fact that the object
-    # has stack references. This could be easily done by adding another bit
-    # to the refcount field and setting it up in unmarkStackAndRegisters.
-    # The bit must also be set for new objects that are not rc1 and it must be
-    # examined in the decref loop in collectCycles.
-    # XXX: not implemented yet as tests didn't show any improvement from this
-
-  MarkingSkipsAcyclicObjects = true
-    # Acyclic objects can be safely ignored in the mark and scan phases,
-    # because they cannot contribute to the internal count.
-    # XXX: if we generate specialized `markCyclic` and `markAcyclic`
-    # procs we can further optimize this as there won't be need for any
-    # checks in the code
-
-  MinimumStackMarking = false
-    # Try to scan only the user stack and ignore the part of the stack
-    # belonging to the GC itself. see setStackTop for further info.
-    # XXX: still has problems in release mode in the compiler itself.
-    # investigate how it affects growObj
-
-  CollectCyclesStats = false
-
+  rcIncrement = 0b1000 # so that lowest 3 bits are not touched
+  rcBlackOrig = 0b000
+  rcWhiteOrig = 0b001
+  rcGrey = 0b010   # traditional color for incremental mark&sweep
+  rcUnused = 0b011
+  ZctFlag = 0b100  # in ZCT
+  rcShift = 3      # shift by rcShift to get the reference counter
+  colorMask = 0b011
 type
   WalkOp = enum
-    waPush
+    waMarkGlobal,    # part of the backup mark&sweep
+    waMarkGrey,
+    waZctDecRef #, waDebug
 
-  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall.}
+  Phase {.pure.} = enum
+    None, Marking, Sweeping
+  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
     # A ref type can have a finalizer that is called before the object's
     # storage is freed.
 
-  GcStat {.final, pure.} = object
+  GcStat = object
     stackScans: int          # number of performed stack scans (for statistics)
     cycleCollections: int    # number of performed full collections
     maxThreshold: int        # max threshold that has been set
@@ -113,134 +69,78 @@ type
     cycleTableSize: int      # max entries in cycle table
     maxPause: int64          # max measured GC pause in nanoseconds
 
-  GcHeap {.final, pure.} = object # this contains the zero count and
-                                   # non-zero count table
+  GcStack = object
+    prev: ptr GcStack
+    next: ptr GcStack
+    starts: pointer
+    pos: pointer
+    maxStackSize: int
+
+  GcHeap = object # this contains the zero count and
+                  # non-zero count table
+    black: int    # either 0 or 1.
+    stack: ptr GcStack
     stackBottom: pointer
-    stackTop: pointer
+    phase: Phase
     cycleThreshold: int
+    when useCellIds:
+      idGenerator: int
     zct: CellSeq             # the zero count table
     decStack: CellSeq        # cells in the stack that are to decref again
-    cycleRoots: CellSeq
-    tempStack: CellSeq       # temporary stack for recursion elimination
-    freeStack: CellSeq       # objects ready to be freed
+    greyStack: CellSeq
     recGcLock: int           # prevent recursion via finalizers; no thread lock
-    cycleRootsTrimIdx: int   # Trimming is a light-weight collection of the
-                             # cycle roots table that uses a cheap linear scan
-                             # to find only possitively dead objects.
-                             # One strategy is to perform it only for new objects
-                             # allocated between the invocations of collectZCT.
-                             # This index indicates the start of the range of
-                             # such new objects within the table.
     when withRealTime:
       maxPause: Nanos        # max allowed pause in nanoseconds; active if > 0
     region: MemRegion        # garbage collected region
     stat: GcStat
-{.deprecated: [TWalkOp: WalkOp, TFinalizer: Finalizer, TGcStat: GcStat,
-              TGcHeap: GcHeap].}
+    additionalRoots: CellSeq # dummy roots for GC_ref/unref
+    spaceIter: ObjectSpaceIter
+
 var
-  gch* {.rtlThreadVar.}: GcHeap
+  gch {.rtlThreadVar.}: GcHeap
 
 when not defined(useNimRtl):
   instantiateForRegion(gch.region)
 
-template acquire(gch: GcHeap) =
-  when hasThreadSupport and hasSharedHeap:
-    AcquireSys(HeapLock)
-
-template release(gch: GcHeap) =
-  when hasThreadSupport and hasSharedHeap:
-    releaseSys(HeapLock)
-
-template setColor(c: PCell, color) =
-  c.refcount = (c.refcount and not rcColorMask) or color
-
-template color(c: PCell): expr =
-  c.refcount and rcColorMask
-
-template isBitDown(c: PCell, bit): expr =
-  (c.refcount and bit) == 0
-
-template isBitUp(c: PCell, bit): expr =
-  (c.refcount and bit) != 0
-
-template setBit(c: PCell, bit): expr =
-  c.refcount = c.refcount or bit
-
-template isDead(c: Pcell): expr =
-  c.isBitUp(rcReallyDead) # also covers rcRetiredBuffer
-
-template clearBit(c: PCell, bit): expr =
-  c.refcount = c.refcount and (not RefCount(bit))
-
-when debugGC:
-  var gcCollectionIdx = 0
-
-  proc colorStr(c: PCell): cstring =
-    let color = c.color
-    case color
-    of rcAlive: return "alive"
-    of rcMaybeDead: return "maybedead"
-    of rcCycleCandidate: return "candidate"
-    of rcDecRefApplied: return "marked"
-    of rcRetiredBuffer: return "retired"
-    of rcReallyDead: return "dead"
-    else: return "unknown?"
-
-  proc inCycleRootsStr(c: PCell): cstring =
-    if c.isBitUp(rcInCycleRoots): result = "cycleroot"
-    else: result = ""
-
-  proc inZctStr(c: PCell): cstring =
-    if c.isBitUp(rcZct): result = "zct"
-    else: result = ""
-
-  proc writeCell*(msg: CString, c: PCell, force = false) =
-    var kind = -1
-    if c.typ != nil: kind = ord(c.typ.kind)
-    when trackAllocationSource:
-      c_fprintf(c_stdout, "[GC %d] %s: %p %d rc=%ld %s %s %s from %s(%ld)\n",
-                gcCollectionIdx,
-                msg, c, kind, c.refcount shr rcShift,
-                c.colorStr, c.inCycleRootsStr, c.inZctStr,
-                c.filename, c.line)
-    else:
-      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n",
-                msg, c, kind, c.refcount shr rcShift)
-
-proc addZCT(zct: var CellSeq, c: PCell) {.noinline.} =
-  if c.isBitDown(rcZct):
-    c.setBit rcZct
-    zct.add c
-
-template setStackTop(gch) =
-  # This must be called immediately after we enter the GC code
-  # to minimize the size of the scanned stack. The stack consumed
-  # by the GC procs may amount to 200-400 bytes depending on the
-  # build settings and this contributes to false-positives
-  # in the conservative stack marking
-  when MinimumStackMarking:
-    var stackTop {.volatile.}: pointer
-    gch.stackTop = addr(stackTop)
-
-template addCycleRoot(cycleRoots: var CellSeq, c: PCell) =
-  if c.color != rcCycleCandidate:
-    c.setColor rcCycleCandidate
-
-    # the object may be buffered already. for example, consider:
-    # decref; incref; decref
-    if c.isBitDown(rcInCycleRoots):
-      c.setBit rcInCycleRoots
-      cycleRoots.add c
+proc initGC() =
+  when not defined(useNimRtl):
+    when traceGC:
+      for i in low(CellState)..high(CellState): init(states[i])
+    gch.cycleThreshold = InitialCycleThreshold
+    gch.stat.stackScans = 0
+    gch.stat.cycleCollections = 0
+    gch.stat.maxThreshold = 0
+    gch.stat.maxStackSize = 0
+    gch.stat.maxStackCells = 0
+    gch.stat.cycleTableSize = 0
+    # init the rt
+    init(gch.zct)
+    init(gch.decStack)
+    init(gch.additionalRoots)
+    init(gch.greyStack)
+
+template gcAssert(cond: bool, msg: string) =
+  when defined(useGcAssert):
+    if not cond:
+      echo "[GCASSERT] ", msg
+      GC_disable()
+      writeStackTrace()
+      quit 1
+
+proc addZCT(s: var CellSeq, c: PCell) {.noinline.} =
+  if (c.refcount and ZctFlag) == 0:
+    c.refcount = c.refcount or ZctFlag
+    add(s, c)
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
   result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell)))
 
-proc usrToCell*(usr: pointer): PCell {.inline.} =
+proc usrToCell(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
   result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
 
-proc canbeCycleRoot(c: PCell): bool {.inline.} =
+proc canBeCycleRoot(c: PCell): bool {.inline.} =
   result = ntfAcyclic notin c.typ.flags
 
 proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
@@ -254,14 +154,40 @@ proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
 when BitsPerPage mod (sizeof(int)*8) != 0:
   {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
 
+template color(c): expr = c.refCount and colorMask
+template setColor(c, col) =
+  c.refcount = c.refcount and not colorMask or col
+
+proc writeCell(msg: cstring, c: PCell) =
+  var kind = -1
+  if c.typ != nil: kind = ord(c.typ.kind)
+  when leakDetector:
+    c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n",
+              msg, c, kind, c.refcount shr rcShift, c.filename, c.line)
+  else:
+    c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld; color=%ld\n",
+              msg, c, kind, c.refcount shr rcShift, c.color)
+
+template gcTrace(cell, state: expr): stmt {.immediate.} =
+  when traceGC: traceCell(cell, state)
+
 # forward declarations:
-proc collectCT(gch: var GcHeap)
-proc isOnStack*(p: pointer): bool {.noinline.}
-proc forAllChildren(cell: PCell, op: WalkOp)
-proc doOperation(p: pointer, op: WalkOp)
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp)
+proc collectCT(gch: var GcHeap) {.benign.}
+proc isOnStack(p: pointer): bool {.noinline, benign.}
+proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
+proc doOperation(p: pointer, op: WalkOp) {.benign.}
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
 # we need the prototype here for debugging purposes
 
+when hasThreadSupport and hasSharedHeap:
+  template `--`(x: expr): expr = atomicDec(x, rcIncrement) <% rcIncrement
+  template `++`(x: expr): stmt = discard atomicInc(x, rcIncrement)
+else:
+  template `--`(x: expr): expr =
+    dec(x, rcIncrement)
+    x <% rcIncrement
+  template `++`(x: expr): stmt = inc(x, rcIncrement)
+
 proc prepareDealloc(cell: PCell) =
   if cell.typ.finalizer != nil:
     # the finalizer could invoke something that
@@ -273,246 +199,127 @@ proc prepareDealloc(cell: PCell) =
     (cast[Finalizer](cell.typ.finalizer))(cellToUsr(cell))
     dec(gch.recGcLock)
 
-when traceGC:
-  # traceGC is a special switch to enable extensive debugging
-  type
-    CellState = enum
-      csAllocated, csFreed
-  {.deprecated: [TCellState: CellState].}
-  var
-    states: array[CellState, CellSet]
-
-  proc traceCell(c: PCell, state: CellState) =
-    case state
-    of csAllocated:
-      if c in states[csAllocated]:
-        writeCell("attempt to alloc an already allocated cell", c)
-        sysAssert(false, "traceCell 1")
-      excl(states[csFreed], c)
-      # writecell("allocated", c)
-    of csFreed:
-      if c in states[csFreed]:
-        writeCell("attempt to free a cell twice", c)
-        sysAssert(false, "traceCell 2")
-      if c notin states[csAllocated]:
-        writeCell("attempt to free not an allocated cell", c)
-        sysAssert(false, "traceCell 3")
-      excl(states[csAllocated], c)
-      # writecell("freed", c)
-    incl(states[state], c)
-
-  proc computeCellWeight(c: PCell): int =
-    var x: CellSet
-    x.init
-
-    let startLen = gch.tempStack.len
-    c.forAllChildren waPush
-
-    while startLen != gch.tempStack.len:
-      dec gch.tempStack.len
-      var c = gch.tempStack.d[gch.tempStack.len]
-      if c in states[csFreed]: continue
-      inc result
-      if c notin x:
-        x.incl c
-        c.forAllChildren waPush
-
-  template markChildrenRec(cell) =
-    let startLen = gch.tempStack.len
-    cell.forAllChildren waPush
-    let isMarked = cell.isBitUp(rcMarkBit)
-    while startLen != gch.tempStack.len:
-      dec gch.tempStack.len
-      var c = gch.tempStack.d[gch.tempStack.len]
-      if c in states[csFreed]: continue
-      if c.isBitDown(rcMarkBit):
-        c.setBit rcMarkBit
-        c.forAllChildren waPush
-    if c.isBitUp(rcMarkBit) and not isMarked:
-      writecell("cyclic cell", cell)
-      cprintf "Weight %d\n", cell.computeCellWeight
-
-  proc writeLeakage(onlyRoots: bool) =
-    if onlyRoots:
-      for c in elements(states[csAllocated]):
-        if c notin states[csFreed]:
-          markChildrenRec(c)
-    var f = 0
-    var a = 0
-    for c in elements(states[csAllocated]):
-      inc a
-      if c in states[csFreed]: inc f
-      elif c.isBitDown(rcMarkBit):
-        writeCell("leak", c)
-        cprintf "Weight %d\n", c.computeCellWeight
-    cfprintf(cstdout, "Allocations: %ld; freed: %ld\n", a, f)
-
-template gcTrace(cell, state: expr): stmt {.immediate.} =
-  when logGC: writeCell($state, cell)
-  when traceGC: traceCell(cell, state)
-
-template WithHeapLock(blk: stmt): stmt =
-  when hasThreadSupport and hasSharedHeap: AcquireSys(HeapLock)
-  blk
-  when hasThreadSupport and hasSharedHeap: ReleaseSys(HeapLock)
-
 proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} =
   # we MUST access gch as a global here, because this crosses DLL boundaries!
-  WithHeapLock: addCycleRoot(gch.cycleRoots, c)
+  discard
 
 proc rtlAddZCT(c: PCell) {.rtl, inl.} =
   # we MUST access gch as a global here, because this crosses DLL boundaries!
-  WithHeapLock: addZCT(gch.zct, c)
+  addZCT(gch.zct, c)
 
-type
-  CyclicMode = enum
-    Cyclic,
-    Acyclic,
-    MaybeCyclic
-
-  ReleaseType = enum
-    AddToZTC
-    FreeImmediately
-
-  HeapType = enum
-    LocalHeap
-    SharedHeap
-{.deprecated: [TCyclicMode: CyclicMode, TReleaseType: ReleaseType,
-              THeapType: HeapType].}
-
-template `++` (rc: RefCount, heapType: HeapType): stmt =
-  when heapType == SharedHeap:
-    discard atomicInc(rc, rcIncrement)
-  else:
-    inc rc, rcIncrement
-
-template `--`(rc: RefCount): expr =
-  dec rc, rcIncrement
-  rc <% rcIncrement
-
-template `--` (rc: RefCount, heapType: HeapType): expr =
-  (when heapType == SharedHeap: atomicDec(rc, rcIncrement) <% rcIncrement else: --rc)
-
-template doDecRef(cc: PCell,
-                  heapType = LocalHeap,
-                  cycleFlag = MaybeCyclic): stmt =
-  var c = cc
-  sysAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
-  # XXX: move this elesewhere
-
-  sysAssert(c.refcount >=% rcIncrement, "decRef")
-  if c.refcount--(heapType):
-    # this is the last reference from the heap
-    # add to a zero-count-table that will be matched against stack pointers
+proc decRef(c: PCell) {.inline.} =
+  gcAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
+  gcAssert(c.refcount >=% rcIncrement, "decRef")
+  if --c.refcount:
     rtlAddZCT(c)
-  else:
-    when cycleFlag != Acyclic:
-      if cycleFlag == Cyclic or canBeCycleRoot(c):
-        # a cycle may have been broken
-        rtlAddCycleRoot(c)
-
-template doIncRef(cc: PCell,
-                 heapType = LocalHeap,
-                 cycleFlag = MaybeCyclic): stmt =
-  var c = cc
-  c.refcount++(heapType)
-  when cycleFlag != Acyclic:
-    when NewObjectsAreCycleRoots:
-      if canbeCycleRoot(c):
-        addCycleRoot(gch.cycleRoots, c)
-    elif IncRefRemovesCandidates:
-      c.setColor rcAlive
-  # XXX: this is not really atomic enough!
-
-proc nimGCref(p: pointer) {.compilerProc, inline.} = doIncRef(usrToCell(p))
-proc nimGCunref(p: pointer) {.compilerProc, inline.} = doDecRef(usrToCell(p))
+
+proc incRef(c: PCell) {.inline.} =
+  gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
+  c.refcount = c.refcount +% rcIncrement
+
+proc nimGCref(p: pointer) {.compilerProc.} =
+  let cell = usrToCell(p)
+  incRef(cell)
+  add(gch.additionalRoots, cell)
+
+proc nimGCunref(p: pointer) {.compilerProc.} =
+  let cell = usrToCell(p)
+  decRef(cell)
+  var L = gch.additionalRoots.len-1
+  var i = L
+  let d = gch.additionalRoots.d
+  while i >= 0:
+    if d[i] == cell:
+      d[i] = d[L]
+      dec gch.additionalRoots.len
+      break
+    dec(i)
+
+template markGrey(x: PCell) =
+  if x.color == 1-gch.black and gch.phase == Phase.Marking:
+    x.setColor(rcGrey)
+    add(gch.greyStack, x)
+
+proc GC_addCycleRoot*[T](p: ref T) {.inline.} =
+  ## adds 'p' to the cycle candidate set for the cycle collector. It is
+  ## necessary if you used the 'acyclic' pragma for optimization
+  ## purposes and need to break cycles manually.
+  rtlAddCycleRoot(usrToCell(cast[pointer](p)))
 
 proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} =
   sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle")
   var c = usrToCell(p)
-  sysAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr")
-  if c.refcount--(LocalHeap):
+  gcAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr")
+  if --c.refcount:
     rtlAddZCT(c)
     sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2")
   sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5")
 
-template doAsgnRef(dest: PPointer, src: pointer,
-                  heapType = LocalHeap, cycleFlag = MaybeCyclic): stmt =
-  sysAssert(not isOnStack(dest), "asgnRef")
-  # BUGFIX: first incRef then decRef!
-  if src != nil: doIncRef(usrToCell(src), heapType, cycleFlag)
-  if dest[] != nil: doDecRef(usrToCell(dest[]), heapType, cycleFlag)
-  dest[] = src
-
 proc asgnRef(dest: PPointer, src: pointer) {.compilerProc, inline.} =
   # the code generator calls this proc!
-  doAsgnRef(dest, src, LocalHeap, MaybeCyclic)
+  gcAssert(not isOnStack(dest), "asgnRef")
+  # BUGFIX: first incRef then decRef!
+  if src != nil:
+    let s = usrToCell(src)
+    incRef(s)
+    markGrey(s)
+  if dest[] != nil: decRef(usrToCell(dest[]))
+  dest[] = src
 
 proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerProc, inline.} =
   # the code generator calls this proc if it is known at compile time that no
   # cycle is possible.
-  doAsgnRef(dest, src, LocalHeap, Acyclic)
+  gcAssert(not isOnStack(dest), "asgnRefNoCycle")
+  if src != nil:
+    var c = usrToCell(src)
+    ++c.refcount
+    markGrey(c)
+  if dest[] != nil:
+    var c = usrToCell(dest[])
+    if --c.refcount:
+      rtlAddZCT(c)
+  dest[] = src
 
 proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerProc.} =
   # unsureAsgnRef updates the reference counters only if dest is not on the
   # stack. It is used by the code generator if it cannot decide wether a
   # reference is in the stack or not (this can happen for var parameters).
   if not isOnStack(dest):
-    if src != nil: doIncRef(usrToCell(src))
-    # XXX we must detect a shared heap here
-    # better idea may be to just eliminate the need for unsureAsgnRef
-    #
+    if src != nil:
+      let s = usrToCell(src)
+      incRef(s)
+      markGrey(s)
     # XXX finally use assembler for the stack checking instead!
     # the test for '!= nil' is correct, but I got tired of the segfaults
     # resulting from the crappy stack checking:
-    if cast[int](dest[]) >=% PageSize: doDecRef(usrToCell(dest[]))
+    if cast[int](dest[]) >=% PageSize: decRef(usrToCell(dest[]))
   else:
     # can't be an interior pointer if it's a stack location!
-    sysAssert(interiorAllocatedPtr(gch.region, dest)==nil,
-              "stack loc AND interior pointer")
+    gcAssert(interiorAllocatedPtr(gch.region, dest) == nil,
+             "stack loc AND interior pointer")
   dest[] = src
 
-when hasThreadSupport and hasSharedHeap:
-  # shared heap version of the above procs
-  proc asgnRefSh(dest: PPointer, src: pointer) {.compilerProc, inline.} =
-    doAsgnRef(dest, src, SharedHeap, MaybeCyclic)
-
-  proc asgnRefNoCycleSh(dest: PPointer, src: pointer) {.compilerProc, inline.} =
-    doAsgnRef(dest, src, SharedHeap, Acyclic)
+type
+  GlobalMarkerProc = proc () {.nimcall, benign.}
+var
+  globalMarkersLen: int
+  globalMarkers: array[0.. 7_000, GlobalMarkerProc]
 
-proc initGC() =
-  when not defined(useNimRtl):
-    when traceGC:
-      for i in low(CellState)..high(CellState): init(states[i])
-    gch.cycleThreshold = InitialCycleThreshold
-    gch.stat.stackScans = 0
-    gch.stat.cycleCollections = 0
-    gch.stat.maxThreshold = 0
-    gch.stat.maxStackSize = 0
-    gch.stat.maxStackCells = 0
-    gch.stat.cycleTableSize = 0
-    # init the rt
-    init(gch.zct)
-    init(gch.tempStack)
-    init(gch.freeStack)
-    init(gch.cycleRoots)
-    init(gch.decStack)
+proc nimRegisterGlobalMarker(markerProc: GlobalMarkerProc) {.compilerProc.} =
+  if globalMarkersLen <= high(globalMarkers):
+    globalMarkers[globalMarkersLen] = markerProc
+    inc globalMarkersLen
+  else:
+    echo "[GC] cannot register global variable; too many global variables"
+    quit 1
 
-proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) =
+proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
   var d = cast[ByteAddress](dest)
   case n.kind
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
   of nkList:
     for i in 0..n.len-1:
-      # inlined for speed
-      if n.sons[i].kind == nkSlot:
-        if n.sons[i].typ.kind in {tyRef, tyString, tySequence}:
-          doOperation(cast[PPointer](d +% n.sons[i].offset)[], op)
-        else:
-          forAllChildrenAux(cast[pointer](d +% n.sons[i].offset),
-                            n.sons[i].typ, op)
-      else:
-        forAllSlotsAux(dest, n.sons[i], op)
+      forAllSlotsAux(dest, n.sons[i], op)
   of nkCase:
     var m = selectBranch(dest, n)
     if m != nil: forAllSlotsAux(dest, m, op)
@@ -533,9 +340,10 @@ proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
     else: discard
 
 proc forAllChildren(cell: PCell, op: WalkOp) =
-  sysAssert(cell != nil, "forAllChildren: 1")
-  sysAssert(cell.typ != nil, "forAllChildren: 2")
-  sysAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3"
+  gcAssert(cell != nil, "forAllChildren: 1")
+  gcAssert(isAllocatedPtr(gch.region, cell), "forAllChildren: 2")
+  gcAssert(cell.typ != nil, "forAllChildren: 3")
+  gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 4"
   let marker = cell.typ.marker
   if marker != nil:
     marker(cellToUsr(cell), op.int)
@@ -547,10 +355,9 @@ proc forAllChildren(cell: PCell, op: WalkOp) =
       var d = cast[ByteAddress](cellToUsr(cell))
       var s = cast[PGenericSeq](d)
       if s != nil:
-        let baseAddr = d +% GenericSeqSize
         for i in 0..s.len-1:
-          forAllChildrenAux(cast[pointer](baseAddr +% i *% cell.typ.base.size),
-                            cell.typ.base, op)
+          forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +%
+            GenericSeqSize), cell.typ.base, op)
     else: discard
 
 proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} =
@@ -571,7 +378,7 @@ proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} =
     template replaceZctEntry(i: expr) =
       c = d[i]
       if c.refcount >=% rcIncrement:
-        c.clearBit(rcZct)
+        c.refcount = c.refcount and not ZctFlag
         d[i] = res
         return
     if L > 8:
@@ -592,408 +399,335 @@ proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} =
     for i in countdown(L-1, max(0, L-8)):
       var c = d[i]
       if c.refcount >=% rcIncrement:
-        c.clearBit(rcZct)
+        c.refcount = c.refcount and not ZctFlag
         d[i] = res
         return
     add(gch.zct, res)
 
-proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap, rc1 = false): pointer =
+{.push stackTrace: off, profiler:off.}
+proc gcInvariant*() =
+  sysAssert(allocInv(gch.region), "injected")
+  when declared(markForDebug):
+    markForDebug(gch)
+{.pop.}
+
+proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   # generates a new object and sets its reference counter to 0
-  acquire(gch)
   sysAssert(allocInv(gch.region), "rawNewObj begin")
-  sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
-
+  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
   collectCT(gch)
-  sysAssert(allocInv(gch.region), "rawNewObj after collect")
-
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
-  sysAssert(allocInv(gch.region), "rawNewObj after rawAlloc")
-
-  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
-
+  gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  # now it is buffered in the ZCT
   res.typ = typ
-
-  when trackAllocationSource and not hasThreadSupport:
-    if framePtr != nil and framePtr.prev != nil and framePtr.prev.prev != nil:
-      res.filename = framePtr.prev.prev.filename
-      res.line = framePtr.prev.prev.line
-    else:
-      res.filename = "nofile"
-
-  if rc1:
-    res.refcount = rcIncrement # refcount is 1
-  else:
-    # its refcount is zero, so add it to the ZCT:
-    res.refcount = rcZct
-    addNewObjToZCT(res, gch)
-
-    if NewObjectsAreCycleRoots and canBeCycleRoot(res):
-      res.setBit(rcInCycleRoots)
-      res.setColor rcCycleCandidate
-      gch.cycleRoots.add res
-
+  when leakDetector and not hasThreadSupport:
+    if framePtr != nil and framePtr.prev != nil:
+      res.filename = framePtr.prev.filename
+      res.line = framePtr.prev.line
+  # refcount is zero, color is black, but mark it to be in the ZCT
+  res.refcount = ZctFlag or gch.black
   sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
-
+  # its refcount is zero, so add it to the ZCT:
+  addNewObjToZCT(res, gch)
   when logGC: writeCell("new cell", res)
   gcTrace(res, csAllocated)
-  release(gch)
+  when useCellIds:
+    inc gch.idGenerator
+    res.id = gch.idGenerator
   result = cellToUsr(res)
   sysAssert(allocInv(gch.region), "rawNewObj end")
 
 {.pop.}
 
-proc freeCell(gch: var GcHeap, c: PCell) =
-  # prepareDealloc(c)
-  gcTrace(c, csFreed)
-
-  when reallyDealloc: rawDealloc(gch.region, c)
-  else:
-    sysAssert(c.typ != nil, "collectCycles")
-    zeroMem(c, sizeof(Cell))
-
-template eraseAt(cells: var CellSeq, at: int): stmt =
-  cells.d[at] = cells.d[cells.len - 1]
-  dec cells.len
-
-template trimAt(roots: var CellSeq, at: int): stmt =
-  # This will remove a cycle root candidate during trimming.
-  # a candidate is removed either because it received a refup and
-  # it's no longer a candidate or because it received further refdowns
-  # and now it's dead for sure.
-  let c = roots.d[at]
-  c.clearBit(rcInCycleRoots)
-  roots.eraseAt(at)
-  if c.isBitUp(rcReallyDead) and c.refcount <% rcIncrement:
-    # This case covers both dead objects and retired buffers
-    # That's why we must also check the refcount (it may be
-    # kept possitive by stack references).
-    freeCell(gch, c)
+proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  result = rawNewObj(typ, size, gch)
+  when defined(memProfiler): nimProfile(size)
 
 proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  setStackTop(gch)
-  result = rawNewObj(typ, size, gch, false)
+  result = rawNewObj(typ, size, gch)
   zeroMem(result, size)
   when defined(memProfiler): nimProfile(size)
 
-proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  setStackTop(gch)
-  result = rawNewObj(typ, size, gch, false)
-  when defined(memProfiler): nimProfile(size)
-
 proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  setStackTop(gch)
   # `newObj` already uses locks, so no need for them here.
   let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
   result = newObj(typ, size)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
+  when defined(memProfiler): nimProfile(size)
 
 proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  setStackTop(gch)
-  result = rawNewObj(typ, size, gch, true)
+  # generates a new object and sets its reference counter to 1
+  sysAssert(allocInv(gch.region), "newObjRC1 begin")
+  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  collectCT(gch)
+  sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")
+
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
+  sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
+  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  # now it is buffered in the ZCT
+  res.typ = typ
+  when leakDetector and not hasThreadSupport:
+    if framePtr != nil and framePtr.prev != nil:
+      res.filename = framePtr.prev.filename
+      res.line = framePtr.prev.line
+  res.refcount = rcIncrement or gch.black # refcount is 1
+  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
+  when logGC: writeCell("new cell", res)
+  gcTrace(res, csAllocated)
+  when useCellIds:
+    inc gch.idGenerator
+    res.id = gch.idGenerator
+  result = cellToUsr(res)
+  zeroMem(result, size)
+  sysAssert(allocInv(gch.region), "newObjRC1 end")
   when defined(memProfiler): nimProfile(size)
 
 proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  setStackTop(gch)
   let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
   result = newObjRC1(typ, size)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
+  when defined(memProfiler): nimProfile(size)
 
 proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
-  acquire(gch)
   collectCT(gch)
   var ol = usrToCell(old)
+  gcAssert(isAllocatedPtr(gch.region, ol), "growObj: freed pointer?")
+
   sysAssert(ol.typ != nil, "growObj: 1")
-  sysAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
+  gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
   sysAssert(allocInv(gch.region), "growObj begin")
 
   var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
-  var elemSize = if ol.typ.kind != tyString: ol.typ.base.size
-                 else: 1
+  var elemSize = 1
+  if ol.typ.kind != tyString: elemSize = ol.typ.base.size
 
-  var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
-
-  # XXX: This should happen outside
-  # call user-defined move code
-  # call user-defined default constructor
+  let oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
   copyMem(res, ol, oldsize + sizeof(Cell))
-  zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)),
+  zeroMem(cast[pointer](cast[ByteAddress](res) +% oldsize +% sizeof(Cell)),
           newsize-oldsize)
-
   sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
-  sysAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
-
-  when false:
-    if ol.isBitUp(rcZct):
-      var j = gch.zct.len-1
-      var d = gch.zct.d
-      while j >= 0:
-        if d[j] == ol:
-          d[j] = res
-          break
-        dec(j)
-
-    if ol.isBitUp(rcInCycleRoots):
-      for i in 0 .. <gch.cycleRoots.len:
-        if gch.cycleRoots.d[i] == ol:
-          eraseAt(gch.cycleRoots, i)
-
-    freeCell(gch, ol)
-
-  else:
-    # the new buffer inherits the GC state of the old one
-    if res.isBitUp(rcZct): gch.zct.add res
-    if res.isBitUp(rcInCycleRoots): gch.cycleRoots.add res
-
-    # Pay attention to what's going on here! We're not releasing the old memory.
-    # This is because at this point there may be an interior pointer pointing
-    # into this buffer somewhere on the stack (due to `var` parameters now and
-    # and `let` and `var:var` stack locations in the future).
-    # We'll release the memory in the next GC cycle. If we release it here,
-    # we cannot guarantee that no memory will be corrupted when only safe
-    # language features are used. Accessing the memory after the seq/string
-    # has been invalidated may still result in logic errors in the user code.
-    # We may improve on that by protecting the page in debug builds or
-    # by providing a warning when we detect a stack pointer into it.
-    let bufferFlags = ol.refcount and rcBufferedAnywhere
-    if bufferFlags == 0:
-      # we need this in order to collect it safely later
-      ol.refcount = rcRetiredBuffer or rcZct
-      gch.zct.add ol
-    else:
-      ol.refcount = rcRetiredBuffer or bufferFlags
-
-    when logGC:
-      writeCell("growObj old cell", ol)
-      writeCell("growObj new cell", res)
-
+  # This can be wrong for intermediate temps that are nevertheless on the
+  # heap because of lambda lifting:
+  #gcAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
+  when logGC:
+    writeCell("growObj old cell", ol)
+    writeCell("growObj new cell", res)
+  gcTrace(ol, csZctFreed)
   gcTrace(res, csAllocated)
-  release(gch)
+  when reallyDealloc:
+    sysAssert(allocInv(gch.region), "growObj before dealloc")
+    if ol.refcount shr rcShift <=% 1:
+      # free immediately to save space:
+      if (ol.refcount and ZctFlag) != 0:
+        var j = gch.zct.len-1
+        var d = gch.zct.d
+        while j >= 0:
+          if d[j] == ol:
+            d[j] = res
+            break
+          dec(j)
+      rawDealloc(gch.region, ol)
+    else:
+      # we split the old refcount in 2 parts. XXX This is still not entirely
+      # correct if the pointer that receives growObj's result is on the stack.
+      # A better fix would be to emit the location specific write barrier for
+      # 'growObj', but this is lots of more work and who knows what new problems
+      # this would create.
+      res.refcount = rcIncrement or gch.black
+      decRef(ol)
+  else:
+    sysAssert(ol.typ != nil, "growObj: 5")
+    zeroMem(ol, sizeof(Cell))
+  when useCellIds:
+    inc gch.idGenerator
+    res.id = gch.idGenerator
   result = cellToUsr(res)
   sysAssert(allocInv(gch.region), "growObj end")
   when defined(memProfiler): nimProfile(newsize-oldsize)
 
 proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
-  setStackTop(gch)
   result = growObj(old, newsize, gch)
 
 {.push profiler:off.}
 
-# ---------------- cycle collector -------------------------------------------
 
-proc doOperation(p: pointer, op: WalkOp) =
-  if p == nil: return
-  var c: PCell = usrToCell(p)
-  sysAssert(c != nil, "doOperation: 1")
-  gch.tempStack.add c
+template takeStartTime(workPackageSize) {.dirty.} =
+  const workPackage = workPackageSize
+  var debugticker = 1000
+  when withRealTime:
+    var steps = workPackage
+    var t0: Ticks
+    if gch.maxPause > 0: t0 = getticks()
 
-proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
-  doOperation(d, WalkOp(op))
+template takeTime {.dirty.} =
+  when withRealTime: dec steps
+  dec debugticker
+
+template checkTime {.dirty.} =
+  if debugticker <= 0:
+    echo "in loop"
+    debugticker = 1000
+  when withRealTime:
+    if steps == 0:
+      steps = workPackage
+      if gch.maxPause > 0:
+        let duration = getticks() - t0
+        # the GC's measuring is not accurate and needs some cleanup actions
+        # (stack unmarking), so subtract some short amount of time in
+        # order to miss deadlines less often:
+        if duration >= gch.maxPause - 50_000:
+          return false
 
-type
-  RecursionType = enum
-    FromChildren,
-    FromRoot
-{.deprecated: [TRecursionType: RecursionType].}
+# ---------------- cycle collector -------------------------------------------
 
-proc collectZCT(gch: var GcHeap): bool
+proc freeCyclicCell(gch: var GcHeap, c: PCell) =
+  gcAssert(isAllocatedPtr(gch.region, c), "freeCyclicCell: freed pointer?")
 
-template pseudoRecursion(typ: RecursionType, body: stmt): stmt =
-  discard
+  var d = gch.decStack.d
+  for i in 0..gch.decStack.len-1:
+    gcAssert d[i] != c, "wtf man, freeing obviously alive stuff?!!"
+
+  prepareDealloc(c)
+  gcTrace(c, csCycFreed)
+  when logGC: writeCell("cycle collector dealloc cell", c)
+  when reallyDealloc:
+    sysAssert(allocInv(gch.region), "free cyclic cell")
+    rawDealloc(gch.region, c)
+  else:
+    gcAssert(c.typ != nil, "freeCyclicCell")
+    zeroMem(c, sizeof(Cell))
 
-proc trimCycleRoots(gch: var GcHeap, startIdx = gch.cycleRootsTrimIdx) =
-  var i = startIdx
-  while i < gch.cycleRoots.len:
-    if gch.cycleRoots.d[i].color != rcCycleCandidate:
-      gch.cycleRoots.trimAt i
-    else:
-      inc i
+proc sweep(gch: var GcHeap): bool =
+  takeStartTime(100)
+  echo "loop start"
+  let black = gch.black
+  while true:
+    let x = allObjectsAsProc(gch.region, addr gch.spaceIter)
+    if gch.spaceIter.state < 0: break
+    takeTime()
+    if isCell(x):
+      # cast to PCell is correct here:
+      var c = cast[PCell](x)
+      gcAssert c.color != rcGrey, "cell is still grey?"
+      if c.color != black: freeCyclicCell(gch, c)
+      # Since this is incremental, we MUST not set the object to 'white' here.
+      # We could set all the remaining objects to white after the 'sweep'
+      # completed but instead we flip the meaning of black/white to save one
+      # traversal over the heap!
+    checkTime()
+  # prepare for next iteration:
+  echo "loop end"
+  gch.spaceIter = ObjectSpaceIter()
+  result = true
 
-  gch.cycleRootsTrimIdx = gch.cycleRoots.len
+proc markRoot(gch: var GcHeap, c: PCell) =
+  # since we start with 'black' cells, we need to mark them here too:
+  if c.color != rcGrey:
+    c.setColor(rcGrey)
+    add(gch.greyStack, c)
 
-# we now use a much simpler and non-recursive algorithm for cycle removal
-proc collectCycles(gch: var GcHeap) =
-  if gch.cycleRoots.len == 0: return
-  gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, gch.cycleRoots.len)
+proc markIncremental(gch: var GcHeap): bool =
+  var L = addr(gch.greyStack.len)
+  takeStartTime(100)
+  while L[] > 0:
+    var c = gch.greyStack.d[0]
+    sysAssert(isAllocatedPtr(gch.region, c), "markIncremental: isAllocatedPtr")
+    gch.greyStack.d[0] = gch.greyStack.d[L[] - 1]
+    dec(L[])
+    takeTime()
+    if c.color == rcGrey:
+      c.setColor(gch.black)
+      forAllChildren(c, waMarkGrey)
+    checkTime()
+  gcAssert gch.greyStack.len == 0, "markIncremental: greystack not empty "
+  result = true
 
-  when CollectCyclesStats:
-    let l0 = gch.cycleRoots.len
-    let tStart = getTicks()
+proc markGlobals(gch: var GcHeap) =
+  for i in 0 .. < globalMarkersLen: globalMarkers[i]()
 
+proc markLocals(gch: var GcHeap) =
+  var d = gch.decStack.d
+  for i in 0 .. < gch.decStack.len:
+    sysAssert isAllocatedPtr(gch.region, d[i]), "markLocals"
+    markRoot(gch, d[i])
+
+when logGC:
   var
-    decrefs = 0
-    increfs = 0
-    collected = 0
-    maybedeads = 0
-
-  template ignoreObject(c: PCell): expr =
-    # This controls which objects will be ignored in the mark and scan stages
-    (when MarkingSkipsAcyclicObjects: not canbeCycleRoot(c) else: false)
-    # not canbeCycleRoot(c)
-    # false
-    # c.isBitUp(rcHasStackRef)
-
-  template earlyMarkAliveRec(cell) =
-    let startLen = gch.tempStack.len
-    cell.setColor rcAlive
-    cell.forAllChildren waPush
-
-    while startLen != gch.tempStack.len:
-      dec gch.tempStack.len
-      var c = gch.tempStack.d[gch.tempStack.len]
-      if c.color != rcAlive:
-        c.setColor rcAlive
-        c.forAllChildren waPush
-
-  template earlyMarkAlive(stackRoots) =
-    # This marks all objects reachable from the stack as alive before any
-    # of the other stages is executed. Such objects cannot be garbage and
-    # they don't need to participate in the recursive decref/incref.
-    for i in 0 .. <stackRoots.len:
-      var c = stackRoots.d[i]
-      # c.setBit rcHasStackRef
-      earlyMarkAliveRec(c)
-
-  earlyMarkAlive(gch.decStack)
-
-  when CollectCyclesStats:
-    let tAfterEarlyMarkAlive = getTicks()
-
-  template recursiveDecRef(cell) =
-    let startLen = gch.tempStack.len
-    cell.setColor rcDecRefApplied
-    cell.forAllChildren waPush
-
-    while startLen != gch.tempStack.len:
-      dec gch.tempStack.len
-      var c = gch.tempStack.d[gch.tempStack.len]
-      if ignoreObject(c): continue
-
-      sysAssert(c.refcount >=% rcIncrement, "recursive dec ref")
-      dec c.refcount, rcIncrement
-      inc decrefs
-      if c.color != rcDecRefApplied:
-        c.setColor rcDecRefApplied
-        c.forAllChildren waPush
-
-  template markRoots(roots) =
-    var i = 0
-    while i < roots.len:
-      if roots.d[i].color == rcCycleCandidate:
-        recursiveDecRef(roots.d[i])
-        inc i
-      else:
-        roots.trimAt i
-
-  markRoots(gch.cycleRoots)
-
-  when CollectCyclesStats:
-    let tAfterMark = getTicks()
-    c_printf "COLLECT CYCLES %d: %d/%d\n", gcCollectionIdx, gch.cycleRoots.len, l0
-
-  template recursiveMarkAlive(cell) =
-    let startLen = gch.tempStack.len
-    cell.setColor rcAlive
-    cell.forAllChildren waPush
-
-    while startLen != gch.tempStack.len:
-      dec gch.tempStack.len
-      var c = gch.tempStack.d[gch.tempStack.len]
-      if ignoreObject(c): continue
-      inc c.refcount, rcIncrement
-      inc increfs
-
-      if c.color != rcAlive:
-        c.setColor rcAlive
-        c.forAllChildren waPush
-
-  template scanRoots(roots) =
-    for i in 0 .. <roots.len:
-      let startLen = gch.tempStack.len
-      gch.tempStack.add roots.d[i]
-
-      while startLen != gch.tempStack.len:
-        dec gch.tempStack.len
-        var c = gch.tempStack.d[gch.tempStack.len]
-        if ignoreObject(c): continue
-        if c.color == rcDecRefApplied:
-          if c.refcount >=% rcIncrement:
-            recursiveMarkAlive(c)
-          else:
-            # note that this is not necessarily the ultimate
-            # destiny of the object. we may still mark it alive
-            # later if we encounter another node from where it's
-            # reachable.
-            c.setColor rcMaybeDead
-            inc maybedeads
-            c.forAllChildren waPush
-
-  scanRoots(gch.cycleRoots)
-
-  when CollectCyclesStats:
-    let tAfterScan = getTicks()
-
-  template collectDead(roots) =
-    for i in 0 .. <roots.len:
-      var c = roots.d[i]
-      c.clearBit(rcInCycleRoots)
-
-      let startLen = gch.tempStack.len
-      gch.tempStack.add c
-
-      while startLen != gch.tempStack.len:
-        dec gch.tempStack.len
-        var c = gch.tempStack.d[gch.tempStack.len]
-        when MarkingSkipsAcyclicObjects:
-          if not canbeCycleRoot(c):
-            # This is an acyclic object reachable from a dead cyclic object
-            # We must do a normal decref here that may add the acyclic object
-            # to the ZCT
-            doDecRef(c, LocalHeap, Cyclic)
-            continue
-        if c.color == rcMaybeDead and not c.isBitUp(rcInCycleRoots):
-          c.setColor(rcReallyDead)
-          inc collected
-          c.forAllChildren waPush
-          # we need to postpone the actual deallocation in order to allow
-          # the finalizers to run while the data structures are still intact
-          gch.freeStack.add c
-          prepareDealloc(c)
-
-    for i in 0 .. <gch.freeStack.len:
-      freeCell(gch, gch.freeStack.d[i])
-
-  collectDead(gch.cycleRoots)
-
-  when CollectCyclesStats:
-    let tFinal = getTicks()
-    cprintf "times:\n  early mark alive: %d ms\n  mark: %d ms\n  scan: %d ms\n  collect: %d ms\n  decrefs: %d\n  increfs: %d\n  marked dead: %d\n  collected: %d\n",
-      (tAfterEarlyMarkAlive - tStart)  div 1_000_000,
-      (tAfterMark - tAfterEarlyMarkAlive) div 1_000_000,
-      (tAfterScan - tAfterMark) div 1_000_000,
-      (tFinal - tAfterScan) div 1_000_000,
-      decrefs,
-      increfs,
-      maybedeads,
-      collected
-
-  deinit(gch.cycleRoots)
-  init(gch.cycleRoots)
-
-  deinit(gch.freeStack)
-  init(gch.freeStack)
-
-  when MarkingSkipsAcyclicObjects:
-    # Collect the acyclic objects that became unreachable due to collected
-    # cyclic objects.
-    discard collectZCT(gch)
-    # collectZCT may add new cycle candidates and we may decide to loop here
-    # if gch.cycleRoots.len > 0: repeat
-
-var gcDebugging* = false
-
-var seqdbg* : proc (s: PGenericSeq) {.cdecl.}
+    cycleCheckA: array[100, PCell]
+    cycleCheckALen = 0
+
+  proc alreadySeen(c: PCell): bool =
+    for i in 0 .. <cycleCheckALen:
+      if cycleCheckA[i] == c: return true
+    if cycleCheckALen == len(cycleCheckA):
+      gcAssert(false, "cycle detection overflow")
+      quit 1
+    cycleCheckA[cycleCheckALen] = c
+    inc cycleCheckALen
+
+  proc debugGraph(s: PCell) =
+    if alreadySeen(s):
+      writeCell("child cell (already seen) ", s)
+    else:
+      writeCell("cell {", s)
+      forAllChildren(s, waDebug)
+      c_fprintf(c_stdout, "}\n")
+
+proc doOperation(p: pointer, op: WalkOp) =
+  if p == nil: return
+  var c: PCell = usrToCell(p)
+  gcAssert(c != nil, "doOperation: 1")
+  # the 'case' should be faster than function pointers because of easy
+  # prediction:
+  case op
+  of waZctDecRef:
+    #if not isAllocatedPtr(gch.region, c):
+    #  c_fprintf(c_stdout, "[GC] decref bug: %p", c)
+    gcAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
+    gcAssert(c.refcount >=% rcIncrement, "doOperation 2")
+    #c.refcount = c.refcount -% rcIncrement
+    when logGC: writeCell("decref (from doOperation)", c)
+    decRef(c)
+    #if c.refcount <% rcIncrement: addZCT(gch.zct, c)
+  of waMarkGlobal:
+    when hasThreadSupport:
+      # could point to a cell which we don't own and don't want to touch/trace
+      if isAllocatedPtr(gch.region, c):
+        markRoot(gch, c)
+    else:
+      markRoot(gch, c)
+  of waMarkGrey:
+    if c.color == 1-gch.black:
+      c.setColor(rcGrey)
+      add(gch.greyStack, c)
+  #of waDebug: debugGraph(c)
+
+proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
+  doOperation(d, WalkOp(op))
+
+proc collectZCT(gch: var GcHeap): bool {.benign.}
+
+proc collectCycles(gch: var GcHeap): bool =
+  # ensure the ZCT 'color' is not used:
+  while gch.zct.len > 0: discard collectZCT(gch)
+  case gch.phase
+  of Phase.None, Phase.Marking:
+    #if gch.phase == Phase.None:
+    gch.phase = Phase.Marking
+    markGlobals(gch)
+    markLocals(gch)
+    if markIncremental(gch):
+      gch.phase = Phase.Sweeping
+  of Phase.Sweeping:
+    gcAssert gch.greyStack.len == 0, "greystack not empty"
+    if sweep(gch):
+      gch.phase = Phase.None
+      # flip black/white meanings:
+      gch.black = 1 - gch.black
+      result = true
 
 proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
@@ -1005,235 +739,33 @@ proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
     var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
     if objStart != nil:
       # mark the cell:
-      if objStart.color != rcReallyDead:
-        if gcDebugging:
-          # writeCell("marking ", objStart)
-          discard
-        else:
-          inc objStart.refcount, rcIncrement
-          gch.decStack.add objStart
-      else:
-        # With incremental clean-up, objects spend some time
-        # in various lists before being deallocated.
-        # We just found a reference on the stack to an object,
-        # which we have previously labeled as unreachable.
-        # This is either a bug in the GC or a pure accidental
-        # coincidence due to the conservative stack marking.
-        when debugGC:
-          # writeCell("marking dead object", objStart)
-          discard
-    when false:
-      if isAllocatedPtr(gch.region, cell):
-        sysAssert false, "allocated pointer but not interior?"
-        # mark the cell:
-        inc cell.refcount, rcIncrement
-        add(gch.decStack, cell)
+      objStart.refcount = objStart.refcount +% rcIncrement
+      add(gch.decStack, objStart)
   sysAssert(allocInv(gch.region), "gcMark end")
 
-proc markThreadStacks(gch: var GcHeap) =
-  when hasThreadSupport and hasSharedHeap:
-    {.error: "not fully implemented".}
-    var it = threadList
-    while it != nil:
-      # mark registers:
-      for i in 0 .. high(it.registers): gcMark(gch, it.registers[i])
-      var sp = cast[ByteAddress](it.stackBottom)
-      var max = cast[ByteAddress](it.stackTop)
-      # XXX stack direction?
-      # XXX unroll this loop:
-      while sp <=% max:
-        gcMark(gch, cast[PPointer](sp)[])
-        sp = sp +% sizeof(pointer)
-      it = it.next
-
-# ----------------- stack management --------------------------------------
-#  inspired from Smart Eiffel
-
-when defined(sparc):
-  const stackIncreases = false
-elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
-     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
-  const stackIncreases = true
-else:
-  const stackIncreases = false
-
-when not defined(useNimRtl):
-  {.push stack_trace: off.}
-  proc setStackBottom(theStackBottom: pointer) =
-    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
-    # the first init must be the one that defines the stack bottom:
-    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
-    else:
-      var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[ByteAddress](gch.stackBottom)
-      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
-      when stackIncreases:
-        gch.stackBottom = cast[pointer](min(a, b))
-      else:
-        gch.stackBottom = cast[pointer](max(a, b))
-  {.pop.}
-
-proc stackSize(): int {.noinline.} =
-  var stackTop {.volatile.}: pointer
-  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
+include gc_common
 
-var
-  jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-    # a little hack to get the size of a JmpBuf in the generated C code
-    # in a platform independent way
-
-when defined(sparc): # For SPARC architecture.
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-    when defined(sparcv9):
-      asm  """"flushw \n" """
-    else:
-      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
-
-    var
-      max = gch.stackBottom
-      sp: PPointer
-      stackTop: array[0..1, pointer]
-    sp = addr(stackTop[0])
-    # Addresses decrease as the stack grows.
-    while sp <= max:
-      gcMark(gch, sp[])
-      sp = cast[PPointer](cast[ByteAddress](sp) +% sizeof(pointer))
-
-elif defined(ELATE):
-  {.error: "stack marking code is to be written for this architecture".}
-
-elif stackIncreases:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses increase as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var a = cast[ByteAddress](gch.stackBottom)
-    var b = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[ByteAddress](gch.stackBottom)
-      var sp = cast[ByteAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
-      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
-      # otherwise sp would be below the registers structure).
-      while sp >=% max:
-        gcMark(gch, cast[PPointer](sp)[])
-        sp = sp -% sizeof(pointer)
-
-else:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses decrease as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-    # We use a jmp_buf buffer that is in the C stack.
-    # Used to traverse the stack and registers assuming
-    # that 'setjmp' will save registers in the C stack.
-    type PStackSlice = ptr array [0..7, pointer]
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      when MinimumStackMarking:
-        # mark the registers
-        var jmpbufPtr = cast[ByteAddress](addr(registers))
-        var jmpbufEnd = jmpbufPtr +% jmpbufSize
-
-        while jmpbufPtr <=% jmpbufEnd:
-          gcMark(gch, cast[PPointer](jmpbufPtr)[])
-          jmpbufPtr = jmpbufPtr +% sizeof(pointer)
-
-        var sp = cast[ByteAddress](gch.stackTop)
-      else:
-        var sp = cast[ByteAddress](addr(registers))
-      # mark the user stack
-      var max = cast[ByteAddress](gch.stackBottom)
-      # loop unrolled:
-      while sp <% max - 8*sizeof(pointer):
-        gcMark(gch, cast[PStackSlice](sp)[0])
-        gcMark(gch, cast[PStackSlice](sp)[1])
-        gcMark(gch, cast[PStackSlice](sp)[2])
-        gcMark(gch, cast[PStackSlice](sp)[3])
-        gcMark(gch, cast[PStackSlice](sp)[4])
-        gcMark(gch, cast[PStackSlice](sp)[5])
-        gcMark(gch, cast[PStackSlice](sp)[6])
-        gcMark(gch, cast[PStackSlice](sp)[7])
-        sp = sp +% sizeof(pointer)*8
-      # last few entries:
-      while sp <=% max:
-        gcMark(gch, cast[PPointer](sp)[])
-        sp = sp +% sizeof(pointer)
-
-# ----------------------------------------------------------------------------
-# end of non-portable code
-# ----------------------------------------------------------------------------
-
-proc releaseCell(gch: var GcHeap, cell: PCell) =
-  if cell.color != rcReallyDead:
-    prepareDealloc(cell)
-    cell.setColor rcReallyDead
-
-    let l1 = gch.tempStack.len
-    cell.forAllChildren waPush
-    let l2 = gch.tempStack.len
-    for i in l1 .. <l2:
-      var cc = gch.tempStack.d[i]
-      if cc.refcount--(LocalHeap):
-        releaseCell(gch, cc)
-      else:
-        if canbeCycleRoot(cc):
-          addCycleRoot(gch.cycleRoots, cc)
-
-    gch.tempStack.len = l1
-
-  if cell.isBitDown(rcBufferedAnywhere):
-    freeCell(gch, cell)
-  # else:
-  # This object is either buffered in the cycleRoots list and we'll leave
-  # it there to be collected in the next collectCycles or it's pending in
-  # the ZCT:
-  # (e.g. we are now cleaning the 15th object, but this one is 18th in the
-  #  list. Note that this can happen only if we reached this point by the
-  #  recursion).
-  # We can ignore it now as the ZCT cleaner will reach it soon.
+proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
+  forEachStackSlot(gch, gcMark)
 
 proc collectZCT(gch: var GcHeap): bool =
-  const workPackage = 100
+  # Note: Freeing may add child objects to the ZCT! So essentially we do
+  # deep freeing, which is bad for incremental operation. In order to
+  # avoid a deep stack, we move objects to keep the ZCT small.
+  # This is performance critical!
   var L = addr(gch.zct.len)
-
-  when withRealtime:
-    var steps = workPackage
-    var t0: Ticks
-    if gch.maxPause > 0: t0 = getticks()
+  takeStartTime(100)
 
   while L[] > 0:
     var c = gch.zct.d[0]
-    sysAssert c.isBitUp(rcZct), "collectZCT: rcZct missing!"
-    sysAssert(isAllocatedPtr(gch.region, c), "collectZCT: isAllocatedPtr")
-
+    sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr")
     # remove from ZCT:
-    c.clearBit(rcZct)
+    gcAssert((c.refcount and ZctFlag) == ZctFlag, "collectZCT")
+
+    c.refcount = c.refcount and not ZctFlag
     gch.zct.d[0] = gch.zct.d[L[] - 1]
     dec(L[])
-    when withRealtime: dec steps
+    takeTime()
     if c.refcount <% rcIncrement:
       # It may have a RC > 0, if it is in the hardware stack or
       # it has not been removed yet from the ZCT. This is because
@@ -1241,92 +773,78 @@ proc collectZCT(gch: var GcHeap): bool =
       # as this might be too slow.
       # In any case, it should be removed from the ZCT. But not
       # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
-      if c.color == rcRetiredBuffer:
-        if c.isBitDown(rcInCycleRoots):
-          freeCell(gch, c)
+      when logGC: writeCell("zct dealloc cell", c)
+      gcTrace(c, csZctFreed)
+      # We are about to free the object, call the finalizer BEFORE its
+      # children are deleted as well, because otherwise the finalizer may
+      # access invalid memory. This is done by prepareDealloc():
+      prepareDealloc(c)
+      forAllChildren(c, waZctDecRef)
+      when reallyDealloc:
+        sysAssert(allocInv(gch.region), "collectZCT: rawDealloc")
+        rawDealloc(gch.region, c)
       else:
-        # if c.color == rcReallyDead: writeCell("ReallyDead in ZCT?", c)
-        releaseCell(gch, c)
-    when withRealtime:
-      if steps == 0:
-        steps = workPackage
-        if gch.maxPause > 0:
-          let duration = getticks() - t0
-          # the GC's measuring is not accurate and needs some cleanup actions
-          # (stack unmarking), so subtract some short amount of time in to
-          # order to miss deadlines less often:
-          if duration >= gch.maxPause - 50_000:
-            return false
+        sysAssert(c.typ != nil, "collectZCT 2")
+        zeroMem(c, sizeof(Cell))
+    checkTime()
   result = true
-  gch.trimCycleRoots
-  #deInit(gch.zct)
-  #init(gch.zct)
 
 proc unmarkStackAndRegisters(gch: var GcHeap) =
   var d = gch.decStack.d
-  for i in 0 .. <gch.decStack.len:
+  for i in 0..gch.decStack.len-1:
     sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters"
-    # XXX: just call doDecRef?
-    var c = d[i]
-    sysAssert c.typ != nil, "unmarkStackAndRegisters 2"
-
-    if c.color == rcRetiredBuffer:
-      continue
-
-    # XXX no need for an atomic dec here:
-    if c.refcount--(LocalHeap):
-      # the object survived only because of a stack reference
-      # it still doesn't have heap references
-      addZCT(gch.zct, c)
-
-    if canbeCycleRoot(c):
-      # any cyclic object reachable from the stack can be turned into
-      # a leak if it's orphaned through the stack reference
-      # that's because the write-barrier won't be executed for stack
-      # locations
-      addCycleRoot(gch.cycleRoots, c)
-
+    decRef(d[i])
   gch.decStack.len = 0
 
 proc collectCTBody(gch: var GcHeap) =
-  when withRealtime:
+  when withRealTime:
     let t0 = getticks()
-  when debugGC: inc gcCollectionIdx
   sysAssert(allocInv(gch.region), "collectCT: begin")
 
-  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+  when not defined(nimCoroutines):
+    gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
   sysAssert(gch.decStack.len == 0, "collectCT")
   prepareForInteriorPointerChecking(gch.region)
   markStackAndRegisters(gch)
-  markThreadStacks(gch)
   gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
   inc(gch.stat.stackScans)
   if collectZCT(gch):
     when cycleGC:
       if getOccupiedMem(gch.region) >= gch.cycleThreshold or alwaysCycleGC:
-        collectCycles(gch)
-        sysAssert gch.zct.len == 0, "zct is not null after collect cycles"
-        inc(gch.stat.cycleCollections)
-        gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
-                                 CycleIncrease)
-        gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
+        if collectCycles(gch):
+          inc(gch.stat.cycleCollections)
+          gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
+                                   CycleIncrease)
+          gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
   unmarkStackAndRegisters(gch)
   sysAssert(allocInv(gch.region), "collectCT: end")
 
-  when withRealtime:
+  when withRealTime:
     let duration = getticks() - t0
     gch.stat.maxPause = max(gch.stat.maxPause, duration)
     when defined(reportMissedDeadlines):
       if gch.maxPause > 0 and duration > gch.maxPause:
         c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration)
 
+when defined(nimCoroutines):
+  proc currentStackSizes(): int =
+    for stack in items(gch.stack):
+      result = result + stackSize(stack.starts, stack.pos)
+
 proc collectCT(gch: var GcHeap) =
-  if (gch.zct.len >= ZctThreshold or (cycleGC and
+  # stackMarkCosts prevents some pathological behaviour: Stack marking
+  # becomes more expensive with large stacks and large stacks mean that
+  # cells with RC=0 are more likely to be kept alive by the stack.
+  when defined(nimCoroutines):
+    let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold)
+  else:
+    let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
+  if (gch.zct.len >= stackMarkCosts or (cycleGC and
       getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
       gch.recGcLock == 0:
     collectCTBody(gch)
 
-when withRealtime:
+when withRealTime:
   proc toNano(x: int): Nanos {.inline.} =
     result = x * 1000
 
@@ -1334,13 +852,11 @@ when withRealtime:
     gch.maxPause = MaxPauseInUs.toNano
 
   proc GC_step(gch: var GcHeap, us: int, strongAdvice: bool) =
-    acquire(gch)
     gch.maxPause = us.toNano
     if (gch.zct.len >= ZctThreshold or (cycleGC and
         getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or
         strongAdvice:
       collectCTBody(gch)
-    release(gch)
 
   proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice)
 
@@ -1358,11 +874,7 @@ when not defined(useNimRtl):
         dec(gch.recGcLock)
 
   proc GC_setStrategy(strategy: GC_Strategy) =
-    case strategy
-    of gcThroughput: discard
-    of gcResponsiveness: discard
-    of gcOptimizeSpace: discard
-    of gcOptimizeTime: discard
+    discard
 
   proc GC_enableMarkAndSweep() =
     gch.cycleThreshold = InitialCycleThreshold
@@ -1372,13 +884,10 @@ when not defined(useNimRtl):
     # set to the max value to suppress the cycle detector
 
   proc GC_fullCollect() =
-    setStackTop(gch)
-    acquire(gch)
     var oldThreshold = gch.cycleThreshold
     gch.cycleThreshold = 0 # forces cycle collection
     collectCT(gch)
     gch.cycleThreshold = oldThreshold
-    release(gch)
 
   proc GC_getStatistics(): string =
     GC_disable()
@@ -1390,9 +899,13 @@ when not defined(useNimRtl):
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
              "[GC] zct capacity: " & $gch.zct.cap & "\n" &
              "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
-             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" &
              "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
-    when traceGC: writeLeakage(true)
+    when defined(nimCoroutines):
+      result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
+      for stack in items(gch.stack):
+        result = result & "[GC]   stack " & stack.starts.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
+    else:
+      result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
     GC_enable()
 
 {.pop.}
diff --git a/lib/system/jssys.nim b/lib/system/jssys.nim
index 5bcddc5e6..5bac54772 100644
--- a/lib/system/jssys.nim
+++ b/lib/system/jssys.nim
@@ -33,6 +33,9 @@ type
     lineNumber {.importc.}: int
     message {.importc.}: cstring
     stack {.importc.}: cstring
+
+  JSRef = ref RootObj # Fake type.
+
 {.deprecated: [TSafePoint: SafePoint, TCallFrame: CallFrame].}
 
 var
@@ -282,61 +285,6 @@ proc eqStrings(a, b: string): bool {.asmNoStackFrame, compilerProc.} =
     return true;
   """
 
-type
-  Document {.importc.} = object of RootObj
-    write: proc (text: cstring) {.nimcall.}
-    writeln: proc (text: cstring) {.nimcall.}
-    createAttribute: proc (identifier: cstring): ref Node {.nimcall.}
-    createElement: proc (identifier: cstring): ref Node {.nimcall.}
-    createTextNode: proc (identifier: cstring): ref Node {.nimcall.}
-    getElementById: proc (id: cstring): ref Node {.nimcall.}
-    getElementsByName: proc (name: cstring): seq[ref Node] {.nimcall.}
-    getElementsByTagName: proc (name: cstring): seq[ref Node] {.nimcall.}
-
-  NodeType* = enum
-    ElementNode = 1,
-    AttributeNode,
-    TextNode,
-    CDATANode,
-    EntityRefNode,
-    EntityNode,
-    ProcessingInstructionNode,
-    CommentNode,
-    DocumentNode,
-    DocumentTypeNode,
-    DocumentFragmentNode,
-    NotationNode
-  Node* {.importc.} = object of RootObj
-    attributes*: seq[ref Node]
-    childNodes*: seq[ref Node]
-    data*: cstring
-    firstChild*: ref Node
-    lastChild*: ref Node
-    nextSibling*: ref Node
-    nodeName*: cstring
-    nodeType*: NodeType
-    nodeValue*: cstring
-    parentNode*: ref Node
-    previousSibling*: ref Node
-    appendChild*: proc (child: ref Node) {.nimcall.}
-    appendData*: proc (data: cstring) {.nimcall.}
-    cloneNode*: proc (copyContent: bool) {.nimcall.}
-    deleteData*: proc (start, len: int) {.nimcall.}
-    getAttribute*: proc (attr: cstring): cstring {.nimcall.}
-    getAttributeNode*: proc (attr: cstring): ref Node {.nimcall.}
-    getElementsByTagName*: proc (): seq[ref Node] {.nimcall.}
-    hasChildNodes*: proc (): bool {.nimcall.}
-    insertBefore*: proc (newNode, before: ref Node) {.nimcall.}
-    insertData*: proc (position: int, data: cstring) {.nimcall.}
-    removeAttribute*: proc (attr: cstring) {.nimcall.}
-    removeAttributeNode*: proc (attr: ref Node) {.nimcall.}
-    removeChild*: proc (child: ref Node) {.nimcall.}
-    replaceChild*: proc (newNode, oldNode: ref Node) {.nimcall.}
-    replaceData*: proc (start, len: int, text: cstring) {.nimcall.}
-    setAttribute*: proc (name, value: cstring) {.nimcall.}
-    setAttributeNode*: proc (attr: ref Node) {.nimcall.}
-{.deprecated: [TNode: Node, TNodeType: NodeType, TDocument: Document].}
-
 when defined(kwin):
   proc rawEcho {.compilerproc, asmNoStackFrame.} =
     asm """
@@ -360,28 +308,28 @@ elif defined(nodejs):
     """
 
 else:
-  var
-    document {.importc, nodecl.}: ref Document
-
   proc ewriteln(x: cstring) =
-    var node = document.getElementsByTagName("body")[0]
-    if node != nil:
-      node.appendChild(document.createTextNode(x))
-      node.appendChild(document.createElement("br"))
-    else:
+    var node : JSRef
+    {.emit: "`node` = document.getElementsByTagName('body')[0];".}
+    if node.isNil:
       raise newException(ValueError, "<body> element does not exist yet!")
+    {.emit: """
+    `node`.appendChild(document.createTextNode(`x`));
+    `node`.appendChild(document.createElement("br"));
+    """.}
 
   proc rawEcho {.compilerproc.} =
-    var node = document.getElementsByTagName("body")[0]
-    if node == nil:
+    var node : JSRef
+    {.emit: "`node` = document.getElementsByTagName('body')[0];".}
+    if node.isNil:
       raise newException(IOError, "<body> element does not exist yet!")
-    asm """
-      for (var i = 0; i < arguments.length; ++i) {
-        var x = `toJSStr`(arguments[i]);
-        `node`.appendChild(document.createTextNode(x))
-      }
-    """
-    node.appendChild(document.createElement("br"))
+    {.emit: """
+    for (var i = 0; i < arguments.length; ++i) {
+      var x = `toJSStr`(arguments[i]);
+      `node`.appendChild(document.createTextNode(x));
+    }
+    `node`.appendChild(document.createElement("br"));
+    """.}
 
 # Arithmetic:
 proc addInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
@@ -532,8 +480,6 @@ proc nimMax(a, b: int): int {.compilerproc.} = return if a >= b: a else: b
 type NimString = string # hack for hti.nim
 include "system/hti"
 
-type JSRef = ref RootObj # Fake type.
-
 proc isFatPointer(ti: PNimType): bool =
   # This has to be consistent with the code generator!
   return ti.base.kind notin {tyObject,
@@ -587,15 +533,20 @@ proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef =
     nimCopyAux(result, src, ti.node)
   of tySequence, tyArrayConstr, tyOpenArray, tyArray:
     asm """
-      if (`dest` === null || `dest` === undefined) {
-        `dest` = new Array(`src`.length);
+      if (`src` === null) {
+        `result` = null;
       }
       else {
-        `dest`.length = `src`.length;
-      }
-      `result` = `dest`;
-      for (var i = 0; i < `src`.length; ++i) {
-        `result`[i] = nimCopy(`result`[i], `src`[i], `ti`.base);
+        if (`dest` === null || `dest` === undefined) {
+          `dest` = new Array(`src`.length);
+        }
+        else {
+          `dest`.length = `src`.length;
+        }
+        `result` = `dest`;
+        for (var i = 0; i < `src`.length; ++i) {
+          `result`[i] = nimCopy(`result`[i], `src`[i], `ti`.base);
+        }
       }
     """
   of tyString:
diff --git a/lib/system/nimscript.nim b/lib/system/nimscript.nim
index aaba11324..772d25343 100644
--- a/lib/system/nimscript.nim
+++ b/lib/system/nimscript.nim
@@ -242,7 +242,7 @@ template task*(name: untyped; description: string; body: untyped): untyped =
   ## .. code-block:: nim
   ##  task build, "default build is via the C backend":
   ##    setCommand "c"
-  proc `name Task`() = body
+  proc `name Task`*() = body
 
   let cmd = getCommand()
   if cmd.len == 0 or cmd ==? "help":
diff --git a/lib/system/profiler.nim b/lib/system/profiler.nim
index 4f600417e..ae8ff4e19 100644
--- a/lib/system/profiler.nim
+++ b/lib/system/profiler.nim
@@ -50,10 +50,15 @@ proc captureStackTrace(f: PFrame, st: var StackTrace) =
     inc(i)
     b = b.prev
 
+var
+  profilingRequestedHook*: proc (): bool {.nimcall, benign.}
+    ## set this variable to provide a procedure that implements a profiler in
+    ## user space. See the `nimprof` module for a reference implementation.
+
 when defined(memProfiler):
   type
     MemProfilerHook* = proc (st: StackTrace, requestedSize: int) {.nimcall, benign.}
-  {.deprecated: [TMemProfilerHook: MemProfilerHook].}
+
   var
     profilerHook*: MemProfilerHook
       ## set this variable to provide a procedure that implements a profiler in
@@ -65,17 +70,13 @@ when defined(memProfiler):
     hook(st, requestedSize)
 
   proc nimProfile(requestedSize: int) =
-    if not isNil(profilerHook):
+    if not isNil(profilingRequestedHook) and profilingRequestedHook():
       callProfilerHook(profilerHook, requestedSize)
 else:
-  const
-    SamplingInterval = 50_000
-      # set this to change the default sampling interval
   var
     profilerHook*: ProfilerHook
       ## set this variable to provide a procedure that implements a profiler in
       ## user space. See the `nimprof` module for a reference implementation.
-    gTicker {.threadvar.}: int
 
   proc callProfilerHook(hook: ProfilerHook) {.noinline.} =
     # 'noinline' so that 'nimProfile' does not perform the stack allocation
@@ -86,16 +87,7 @@ else:
 
   proc nimProfile() =
     ## This is invoked by the compiler in every loop and on every proc entry!
-    if gTicker == 0:
-      gTicker = -1
-      if not isNil(profilerHook):
-        # disable recursive calls: XXX should use try..finally,
-        # but that's too expensive!
-        let oldHook = profilerHook
-        profilerHook = nil
-        callProfilerHook(oldHook)
-        profilerHook = oldHook
-      gTicker = SamplingInterval
-    dec gTicker
+    if not isNil(profilingRequestedHook) and profilingRequestedHook():
+      callProfilerHook(profilerHook)
 
 {.pop.}
diff --git a/lib/system/repr.nim b/lib/system/repr.nim
index 1f81a0813..986994203 100644
--- a/lib/system/repr.nim
+++ b/lib/system/repr.nim
@@ -259,8 +259,10 @@ when not defined(useNimRtl):
     of tyInt16: add result, $int(cast[ptr int16](p)[])
     of tyInt32: add result, $int(cast[ptr int32](p)[])
     of tyInt64: add result, $(cast[ptr int64](p)[])
-    of tyUInt8: add result, $ze(cast[ptr int8](p)[])
-    of tyUInt16: add result, $ze(cast[ptr int16](p)[])
+    of tyUInt8: add result, $(cast[ptr uint8](p)[])
+    of tyUInt16: add result, $(cast[ptr uint16](p)[])
+    of tyUInt32: add result, $(cast[ptr uint32](p)[])
+    of tyUInt64: add result, $(cast[ptr uint64](p)[])
 
     of tyFloat: add result, $(cast[ptr float](p)[])
     of tyFloat32: add result, $(cast[ptr float32](p)[])
diff --git a/lib/system/sysstr.nim b/lib/system/sysstr.nim
index 326c601bd..e2137e8f4 100644
--- a/lib/system/sysstr.nim
+++ b/lib/system/sysstr.nim
@@ -229,7 +229,7 @@ proc setLengthSeq(seq: PGenericSeq, elemSize, newLen: int): PGenericSeq {.
     # we need to decref here, otherwise the GC leaks!
     when not defined(boehmGC) and not defined(nogc) and
          not defined(gcMarkAndSweep) and not defined(gogc):
-      when compileOption("gc", "v2"):
+      when false: # compileOption("gc", "v2"):
         for i in newLen..result.len-1:
           let len0 = gch.tempStack.len
           forAllChildrenAux(cast[pointer](cast[ByteAddress](result) +%