12 files changed, 257 insertions, 426 deletions
diff --git a/lib/system/assign.nim b/lib/system/assign.nim
index 61c33e51b..115df61a7 100644
--- a/lib/system/assign.nim
+++ b/lib/system/assign.nim
@@ -63,12 +63,17 @@ proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) =
     sysAssert(dest != nil, "genericAssignAux 3")
     unsureAsgnRef(x, newSeq(mt, seq.len))
     var dst = cast[ByteAddress](cast[PPointer](dest)[])
-    for i in 0..seq.len-1:
-      genericAssignAux(
-        cast[pointer](dst +% i*% mt.base.size +% GenericSeqSize),
-        cast[pointer](cast[ByteAddress](s2) +% i *% mt.base.size +%
-                     GenericSeqSize),
-        mt.base, shallow)
+    if ntfNoRefs in mt.base.flags:
+      copyMem(cast[pointer](dst +% GenericSeqSize),
+              cast[pointer](cast[ByteAddress](s2) +% GenericSeqSize),
+              seq.len * mt.base.size)
+    else:
+      for i in 0..seq.len-1:
+        genericAssignAux(
+          cast[pointer](dst +% i*% mt.base.size +% GenericSeqSize),
+          cast[pointer](cast[ByteAddress](s2) +% i *% mt.base.size +%
+                      GenericSeqSize),
+          mt.base, shallow)
   of tyObject:
     if mt.base != nil:
       genericAssignAux(dest, src, mt.base, shallow)
@@ -89,6 +94,19 @@ proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) =
                        cast[pointer](s +% i*% mt.base.size), mt.base, shallow)
   of tyRef:
     unsureAsgnRef(cast[PPointer](dest), cast[PPointer](s)[])
+  of tyOptAsRef:
+    let s2 = cast[PPointer](src)[]
+    let d = cast[PPointer](dest)
+    if s2 == nil:
+      unsureAsgnRef(d, s2)
+    else:
+      when declared(usrToCell):
+        let realType = usrToCell(s2).typ
+      else:
+        let realType = if mt.base.kind == tyObject: cast[ptr PNimType](s2)[]
+                       else: mt.base
+      var z = newObj(realType, realType.base.size)
+      genericAssignAux(d, addr z, mt.base, shallow)
   else:
     copyMem(dest, src, mt.size) # copy raw bits
 
@@ -115,6 +133,7 @@ when false:
     of tyPtr: k = "ptr"
     of tyRef: k = "ref"
     of tyVar: k = "var"
+    of tyOptAsRef: k = "optref"
     of tySequence: k = "seq"
     of tyProc: k = "proc"
     of tyPointer: k = "range"
@@ -195,7 +214,7 @@ proc genericReset(dest: pointer, mt: PNimType) =
   var d = cast[ByteAddress](dest)
   sysAssert(mt != nil, "genericReset 2")
   case mt.kind
-  of tyString, tyRef, tySequence:
+  of tyString, tyRef, tyOptAsRef, tySequence:
     unsureAsgnRef(cast[PPointer](dest), nil)
   of tyTuple:
     genericResetAux(dest, mt.node)
diff --git a/lib/system/channels.nim b/lib/system/channels.nim
index 1b90e245f..df6c6d41e 100644
--- a/lib/system/channels.nim
+++ b/lib/system/channels.nim
@@ -144,7 +144,7 @@ proc storeAux(dest, src: pointer, mt: PNimType, t: PRawChannel,
     for i in 0..(mt.size div mt.base.size)-1:
       storeAux(cast[pointer](d +% i*% mt.base.size),
                cast[pointer](s +% i*% mt.base.size), mt.base, t, mode)
-  of tyRef:
+  of tyRef, tyOptAsRef:
     var s = cast[PPointer](src)[]
     var x = cast[PPointer](dest)
     if s == nil:
diff --git a/lib/system/deepcopy.nim b/lib/system/deepcopy.nim
index 65ba2278c..51e138e5e 100644
--- a/lib/system/deepcopy.nim
+++ b/lib/system/deepcopy.nim
@@ -124,7 +124,7 @@ proc genericDeepCopyAux(dest, src: pointer, mt: PNimType; tab: var PtrTable) =
     for i in 0..(mt.size div mt.base.size)-1:
       genericDeepCopyAux(cast[pointer](d +% i*% mt.base.size),
                          cast[pointer](s +% i*% mt.base.size), mt.base, tab)
-  of tyRef:
+  of tyRef, tyOptAsRef:
     let s2 = cast[PPointer](src)[]
     if s2 == nil:
       unsureAsgnRef(cast[PPointer](dest), s2)
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index 80aa5cf1b..a2ff72a30 100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -349,7 +349,7 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
     for i in 0..n.len-1:
       # inlined for speed
       if n.sons[i].kind == nkSlot:
-        if n.sons[i].typ.kind in {tyRef, tyString, tySequence}:
+        if n.sons[i].typ.kind in {tyRef, tyOptAsRef, tyString, tySequence}:
           doOperation(cast[PPointer](d +% n.sons[i].offset)[], op)
         else:
           forAllChildrenAux(cast[pointer](d +% n.sons[i].offset),
@@ -366,7 +366,7 @@ proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
   if dest == nil: return # nothing to do
   if ntfNoRefs notin mt.flags:
     case mt.kind
-    of tyRef, tyString, tySequence: # leaf:
+    of tyRef, tyOptAsRef, tyString, tySequence: # leaf:
       doOperation(cast[PPointer](d)[], op)
     of tyObject, tyTuple:
       forAllSlotsAux(dest, mt.node, op)
@@ -379,13 +379,13 @@ proc forAllChildren(cell: PCell, op: WalkOp) =
   gcAssert(cell != nil, "forAllChildren: 1")
   gcAssert(isAllocatedPtr(gch.region, cell), "forAllChildren: 2")
   gcAssert(cell.typ != nil, "forAllChildren: 3")
-  gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 4"
+  gcAssert cell.typ.kind in {tyRef, tyOptAsRef, tySequence, tyString}, "forAllChildren: 4"
   let marker = cell.typ.marker
   if marker != nil:
     marker(cellToUsr(cell), op.int)
   else:
     case cell.typ.kind
-    of tyRef: # common case
+    of tyRef, tyOptAsRef: # common case
       forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
     of tySequence:
       var d = cast[ByteAddress](cellToUsr(cell))
@@ -461,7 +461,7 @@ proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   incTypeSize typ, size
   sysAssert(allocInv(gch.region), "rawNewObj begin")
   acquire(gch)
-  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  gcAssert(typ.kind in {tyRef, tyOptAsRef, tyString, tySequence}, "newObj: 1")
   collectCT(gch)
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
   #gcAssert typ.kind in {tyString, tySequence} or size >= typ.base.size, "size too small"
@@ -509,7 +509,7 @@ proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
   incTypeSize typ, size
   sysAssert(allocInv(gch.region), "newObjRC1 begin")
   acquire(gch)
-  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  gcAssert(typ.kind in {tyRef, tyOptAsRef, tyString, tySequence}, "newObj: 1")
   collectCT(gch)
   sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")
 
@@ -945,10 +945,10 @@ when not defined(useNimRtl):
              "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
              "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) & "\n"
     when nimCoroutines:
-      result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
+      result.add "[GC] number of stacks: " & $gch.stack.len & "\n"
       for stack in items(gch.stack):
-        result = result & "[GC]   stack " & stack.bottom.repr & "[GC]     max stack size " & cast[pointer](stack.maxStackSize).repr & "\n"
+        result.add "[GC]   stack " & stack.bottom.repr & "[GC]     max stack size " & cast[pointer](stack.maxStackSize).repr & "\n"
     else:
-      result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
+      result.add "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
 
 {.pop.} # profiler: off, stackTrace: off
diff --git a/lib/system/gc2.nim b/lib/system/gc2.nim
index 083c06fe3..6dffc323e 100644
--- a/lib/system/gc2.nim
+++ b/lib/system/gc2.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2015 Andreas Rumpf
+#        (c) Copyright 2017 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -9,17 +9,19 @@
 
 #            Garbage Collector
 #
-# The basic algorithm is *Deferred Reference Counting* with an incremental mark
+# The basic algorithm is an incremental mark
 # and sweep GC to free cycles. It is hard realtime in that if you play
 # according to its rules, no deadline will ever be missed.
-
-# XXX Ensure by smart color masking that the object is not in the ZCT.
+# Since this kind of collector is very bad at recycling dead objects
+# early, Nim's codegen emits ``nimEscape`` calls at strategic
+# places. For this to work even 'unsureAsgnRef' needs to mark things
+# so that only return values need to be considered in ``nimEscape``.
 
 {.push profiler:off.}
 
 const
   CycleIncrease = 2 # is a multiplicative increase
-  InitialCycleThreshold = 4*1024*1024 # X MB because cycle checking is slow
+  InitialCycleThreshold = 512*1024 # start collecting after 500KB
   ZctThreshold = 500  # we collect garbage if the ZCT's size
                       # reaches this threshold
                       # this seems to be a good value
@@ -40,13 +42,11 @@ type
 iterToProc(allObjects, ptr ObjectSpaceIter, allObjectsAsProc)
 
 const
-  rcIncrement = 0b1000 # so that lowest 3 bits are not touched
+  escapedBit = 0b1000 # so that lowest 3 bits are not touched
   rcBlackOrig = 0b000
   rcWhiteOrig = 0b001
   rcGrey = 0b010   # traditional color for incremental mark&sweep
   rcUnused = 0b011
-  ZctFlag = 0b100  # in ZCT
-  rcShift = 3      # shift by rcShift to get the reference counter
   colorMask = 0b011
 type
   WalkOp = enum
@@ -63,13 +63,13 @@ type
 
   GcStat = object
     stackScans: int          # number of performed stack scans (for statistics)
-    cycleCollections: int    # number of performed full collections
+    completedCollections: int    # number of performed full collections
     maxThreshold: int        # max threshold that has been set
     maxStackSize: int        # max stack size
     maxStackCells: int       # max stack cells in ``decStack``
     cycleTableSize: int      # max entries in cycle table
     maxPause: int64          # max measured GC pause in nanoseconds
-  
+
   GcStack {.final, pure.} = object
     when nimCoroutines:
       prev: ptr GcStack
@@ -93,15 +93,13 @@ type
     cycleThreshold: int
     when useCellIds:
       idGenerator: int
-    zct: CellSeq             # the zero count table
-    decStack: CellSeq        # cells in the stack that are to decref again
     greyStack: CellSeq
     recGcLock: int           # prevent recursion via finalizers; no thread lock
     when withRealTime:
       maxPause: Nanos        # max allowed pause in nanoseconds; active if > 0
     region: MemRegion        # garbage collected region
     stat: GcStat
-    additionalRoots: CellSeq # dummy roots for GC_ref/unref
+    additionalRoots: CellSeq # explicit roots for GC_ref/unref
     spaceIter: ObjectSpaceIter
     pDumpHeapFile: pointer # File that is used for GC_dumpHeap
     when hasThreadSupport:
@@ -113,19 +111,25 @@ var
 when not defined(useNimRtl):
   instantiateForRegion(gch.region)
 
+template acquire(gch: GcHeap) =
+  when hasThreadSupport and hasSharedHeap:
+    acquireSys(HeapLock)
+
+template release(gch: GcHeap) =
+  when hasThreadSupport and hasSharedHeap:
+    releaseSys(HeapLock)
+
 proc initGC() =
   when not defined(useNimRtl):
     gch.red = (1-gch.black)
     gch.cycleThreshold = InitialCycleThreshold
     gch.stat.stackScans = 0
-    gch.stat.cycleCollections = 0
+    gch.stat.completedCollections = 0
     gch.stat.maxThreshold = 0
     gch.stat.maxStackSize = 0
     gch.stat.maxStackCells = 0
     gch.stat.cycleTableSize = 0
     # init the rt
-    init(gch.zct)
-    init(gch.decStack)
     init(gch.additionalRoots)
     init(gch.greyStack)
     when hasThreadSupport:
@@ -147,11 +151,6 @@ template gcAssert(cond: bool, msg: string) =
       writeStackTrace()
       quit 1
 
-proc addZCT(s: var CellSeq, c: PCell) {.noinline.} =
-  if (c.refcount and ZctFlag) == 0:
-    c.refcount = c.refcount or ZctFlag
-    add(s, c)
-
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
   result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell)))
@@ -168,7 +167,7 @@ proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
   result = usrToCell(c).typ
 
 proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
-  result = int(usrToCell(p).refcount) shr rcShift
+  result = 0
 
 # this that has to equals zero, otherwise we have to round up UnitsPerPage:
 when BitsPerPage mod (sizeof(int)*8) != 0:
@@ -178,6 +177,12 @@ template color(c): expr = c.refCount and colorMask
 template setColor(c, col) =
   c.refcount = c.refcount and not colorMask or col
 
+template markAsEscaped(c: PCell) =
+  c.refcount = c.refcount or escapedBit
+
+template didEscape(c: PCell): bool =
+  (c.refCount and escapedBit) != 0
+
 proc writeCell(file: File; msg: cstring, c: PCell) =
   var kind = -1
   if c.typ != nil: kind = ord(c.typ.kind)
@@ -189,18 +194,18 @@ proc writeCell(file: File; msg: cstring, c: PCell) =
   else:
     let id = c
   when leakDetector:
-    c_fprintf(file, "%s %p %d rc=%ld color=%c from %s(%ld)\n",
-              msg, id, kind, c.refcount shr rcShift, col, c.filename, c.line)
+    c_fprintf(file, "%s %p %d escaped=%ld color=%c from %s(%ld)\n",
+              msg, id, kind, didEscape(c), col, c.filename, c.line)
   else:
-    c_fprintf(file, "%s %p %d rc=%ld color=%c\n",
-              msg, id, kind, c.refcount shr rcShift, col)
+    c_fprintf(file, "%s %p %d escaped=%ld color=%c\n",
+              msg, id, kind, didEscape(c), col)
 
 proc writeCell(msg: cstring, c: PCell) =
   stdout.writeCell(msg, c)
 
 proc myastToStr[T](x: T): string {.magic: "AstToStr", noSideEffect.}
 
-template gcTrace(cell, state: expr): stmt {.immediate.} =
+template gcTrace(cell, state: untyped) =
   when traceGC: writeCell(myastToStr(state), cell)
 
 # forward declarations:
@@ -211,52 +216,17 @@ proc doOperation(p: pointer, op: WalkOp) {.benign.}
 proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
 # we need the prototype here for debugging purposes
 
-when hasThreadSupport and hasSharedHeap:
-  template `--`(x: expr): expr = atomicDec(x, rcIncrement) <% rcIncrement
-  template `++`(x: expr): stmt = discard atomicInc(x, rcIncrement)
-else:
-  template `--`(x: expr): expr =
-    dec(x, rcIncrement)
-    x <% rcIncrement
-  template `++`(x: expr): stmt = inc(x, rcIncrement)
-
-proc prepareDealloc(cell: PCell) =
-  if cell.typ.finalizer != nil:
-    # the finalizer could invoke something that
-    # allocates memory; this could trigger a garbage
-    # collection. Since we are already collecting we
-    # prevend recursive entering here by a lock.
-    # XXX: we should set the cell's children to nil!
-    inc(gch.recGcLock)
-    (cast[Finalizer](cell.typ.finalizer))(cellToUsr(cell))
-    dec(gch.recGcLock)
-
 proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} =
   # we MUST access gch as a global here, because this crosses DLL boundaries!
   discard
 
-proc rtlAddZCT(c: PCell) {.rtl, inl.} =
-  # we MUST access gch as a global here, because this crosses DLL boundaries!
-  addZCT(gch.zct, c)
-
-proc decRef(c: PCell) {.inline.} =
-  gcAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
-  gcAssert(c.refcount >=% rcIncrement, "decRef")
-  if --c.refcount:
-    rtlAddZCT(c)
-
-proc incRef(c: PCell) {.inline.} =
-  gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
-  c.refcount = c.refcount +% rcIncrement
-
 proc nimGCref(p: pointer) {.compilerProc.} =
   let cell = usrToCell(p)
-  incRef(cell)
+  markAsEscaped(cell)
   add(gch.additionalRoots, cell)
 
 proc nimGCunref(p: pointer) {.compilerProc.} =
   let cell = usrToCell(p)
-  decRef(cell)
   var L = gch.additionalRoots.len-1
   var i = L
   let d = gch.additionalRoots.d
@@ -267,6 +237,12 @@ proc nimGCunref(p: pointer) {.compilerProc.} =
       break
     dec(i)
 
+proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} =
+  discard "can we do some freeing here?"
+
+proc nimGCunrefRC1(p: pointer) {.compilerProc, inline.} =
+  discard "can we do some freeing here?"
+
 template markGrey(x: PCell) =
   if x.color != 1-gch.black and gch.phase == Phase.Marking:
     if not isAllocatedPtr(gch.region, x):
@@ -280,59 +256,32 @@ proc GC_addCycleRoot*[T](p: ref T) {.inline.} =
   ## adds 'p' to the cycle candidate set for the cycle collector. It is
   ## necessary if you used the 'acyclic' pragma for optimization
   ## purposes and need to break cycles manually.
-  rtlAddCycleRoot(usrToCell(cast[pointer](p)))
-
-proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} =
-  sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle")
-  var c = usrToCell(p)
-  gcAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr")
-  if --c.refcount:
-    rtlAddZCT(c)
-    sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2")
-  sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5")
+  discard
 
-proc asgnRef(dest: PPointer, src: pointer) {.compilerProc, inline.} =
-  # the code generator calls this proc!
+template asgnRefImpl =
   gcAssert(not isOnStack(dest), "asgnRef")
   # BUGFIX: first incRef then decRef!
   if src != nil:
     let s = usrToCell(src)
-    incRef(s)
+    markAsEscaped(s)
     markGrey(s)
-  if dest[] != nil: decRef(usrToCell(dest[]))
   dest[] = src
 
+proc asgnRef(dest: PPointer, src: pointer) {.compilerProc, inline.} =
+  # the code generator calls this proc!
+  asgnRefImpl()
+
 proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerProc, inline.} =
-  # the code generator calls this proc if it is known at compile time that no
-  # cycle is possible.
-  gcAssert(not isOnStack(dest), "asgnRefNoCycle")
-  if src != nil:
-    var c = usrToCell(src)
-    ++c.refcount
-    markGrey(c)
-  if dest[] != nil:
-    var c = usrToCell(dest[])
-    if --c.refcount:
-      rtlAddZCT(c)
-  dest[] = src
+  asgnRefImpl()
 
 proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerProc.} =
-  # unsureAsgnRef updates the reference counters only if dest is not on the
+  # unsureAsgnRef marks 'src' as grey only if dest is not on the
   # stack. It is used by the code generator if it cannot decide wether a
   # reference is in the stack or not (this can happen for var parameters).
-  if not isOnStack(dest):
-    if src != nil:
-      let s = usrToCell(src)
-      incRef(s)
-      markGrey(s)
-    # XXX finally use assembler for the stack checking instead!
-    # the test for '!= nil' is correct, but I got tired of the segfaults
-    # resulting from the crappy stack checking:
-    if cast[int](dest[]) >=% PageSize: decRef(usrToCell(dest[]))
-  else:
-    # can't be an interior pointer if it's a stack location!
-    gcAssert(interiorAllocatedPtr(gch.region, dest) == nil,
-             "stack loc AND interior pointer")
+  if src != nil:
+    let s = usrToCell(src)
+    markAsEscaped(s)
+    if not isOnStack(dest): markGrey(s)
   dest[] = src
 
 type
@@ -366,7 +315,7 @@ proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
   if dest == nil: return # nothing to do
   if ntfNoRefs notin mt.flags:
     case mt.kind
-    of tyRef, tyString, tySequence: # leaf:
+    of tyRef, tyOptAsRef, tyString, tySequence: # leaf:
       doOperation(cast[PPointer](d)[], op)
     of tyObject, tyTuple:
       forAllSlotsAux(dest, mt.node, op)
@@ -379,13 +328,13 @@ proc forAllChildren(cell: PCell, op: WalkOp) =
   gcAssert(cell != nil, "forAllChildren: 1")
   gcAssert(isAllocatedPtr(gch.region, cell), "forAllChildren: 2")
   gcAssert(cell.typ != nil, "forAllChildren: 3")
-  gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 4"
+  gcAssert cell.typ.kind in {tyRef, tyOptAsRef, tySequence, tyString}, "forAllChildren: 4"
   let marker = cell.typ.marker
   if marker != nil:
     marker(cellToUsr(cell), op.int)
   else:
     case cell.typ.kind
-    of tyRef: # common case
+    of tyRef, tyOptAsRef: # common case
       forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
     of tySequence:
       var d = cast[ByteAddress](cellToUsr(cell))
@@ -396,50 +345,6 @@ proc forAllChildren(cell: PCell, op: WalkOp) =
             GenericSeqSize), cell.typ.base, op)
     else: discard
 
-proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} =
-  # we check the last 8 entries (cache line) for a slot that could be reused.
-  # In 63% of all cases we succeed here! But we have to optimize the heck
-  # out of this small linear search so that ``newObj`` is not slowed down.
-  #
-  # Slots to try          cache hit
-  # 1                     32%
-  # 4                     59%
-  # 8                     63%
-  # 16                    66%
-  # all slots             68%
-  var L = gch.zct.len
-  var d = gch.zct.d
-  when true:
-    # loop unrolled for performance:
-    template replaceZctEntry(i: expr) =
-      c = d[i]
-      if c.refcount >=% rcIncrement:
-        c.refcount = c.refcount and not ZctFlag
-        d[i] = res
-        return
-    if L > 8:
-      var c: PCell
-      replaceZctEntry(L-1)
-      replaceZctEntry(L-2)
-      replaceZctEntry(L-3)
-      replaceZctEntry(L-4)
-      replaceZctEntry(L-5)
-      replaceZctEntry(L-6)
-      replaceZctEntry(L-7)
-      replaceZctEntry(L-8)
-      add(gch.zct, res)
-    else:
-      d[L] = res
-      inc(gch.zct.len)
-  else:
-    for i in countdown(L-1, max(0, L-8)):
-      var c = d[i]
-      if c.refcount >=% rcIncrement:
-        c.refcount = c.refcount and not ZctFlag
-        d[i] = res
-        return
-    add(gch.zct, res)
-
 {.push stackTrace: off, profiler:off.}
 proc gcInvariant*() =
   sysAssert(allocInv(gch.region), "injected")
@@ -447,10 +352,12 @@ proc gcInvariant*() =
     markForDebug(gch)
 {.pop.}
 
+include gc_common
+
 proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   # generates a new object and sets its reference counter to 0
   sysAssert(allocInv(gch.region), "rawNewObj begin")
-  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  gcAssert(typ.kind in {tyRef, tyOptAsRef, tyString, tySequence}, "newObj: 1")
   collectCT(gch)
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
   gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
@@ -461,10 +368,8 @@ proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
       res.filename = framePtr.prev.filename
       res.line = framePtr.prev.line
   # refcount is zero, color is black, but mark it to be in the ZCT
-  res.refcount = ZctFlag or allocColor()
+  res.refcount = allocColor()
   sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
-  # its refcount is zero, so add it to the ZCT:
-  addNewObjToZCT(res, gch)
   when logGC: writeCell("new cell", res)
   gcTrace(res, csAllocated)
   when useCellIds:
@@ -493,95 +398,38 @@ proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
   when defined(memProfiler): nimProfile(size)
 
 proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  # generates a new object and sets its reference counter to 1
-  sysAssert(allocInv(gch.region), "newObjRC1 begin")
-  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
-  collectCT(gch)
-  sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")
-
-  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
-  sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
-  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
-  # now it is buffered in the ZCT
-  res.typ = typ
-  when leakDetector:
-    if framePtr != nil and framePtr.prev != nil:
-      res.filename = framePtr.prev.filename
-      res.line = framePtr.prev.line
-  res.refcount = rcIncrement or allocColor() # refcount is 1
-  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
-  when logGC: writeCell("new cell", res)
-  gcTrace(res, csAllocated)
-  when useCellIds:
-    inc gch.idGenerator
-    res.id = gch.idGenerator
-  result = cellToUsr(res)
-  zeroMem(result, size)
-  sysAssert(allocInv(gch.region), "newObjRC1 end")
-  when defined(memProfiler): nimProfile(size)
+  result = newObj(typ, size)
 
 proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
-  result = newObjRC1(typ, size)
-  cast[PGenericSeq](result).len = len
-  cast[PGenericSeq](result).reserved = len
-  when defined(memProfiler): nimProfile(size)
+  result = newSeq(typ, len)
 
 proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
+  acquire(gch)
   collectCT(gch)
   var ol = usrToCell(old)
-  gcAssert(isAllocatedPtr(gch.region, ol), "growObj: freed pointer?")
-
   sysAssert(ol.typ != nil, "growObj: 1")
   gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
-  sysAssert(allocInv(gch.region), "growObj begin")
 
   var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
   var elemSize = 1
   if ol.typ.kind != tyString: elemSize = ol.typ.base.size
+  incTypeSize ol.typ, newsize
 
-  let oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
+  var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
   copyMem(res, ol, oldsize + sizeof(Cell))
-  zeroMem(cast[pointer](cast[ByteAddress](res) +% oldsize +% sizeof(Cell)),
+  zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)),
           newsize-oldsize)
   sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
-  # This can be wrong for intermediate temps that are nevertheless on the
-  # heap because of lambda lifting:
-  #gcAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
-  when logGC:
-    writeCell("growObj old cell", ol)
-    writeCell("growObj new cell", res)
-  gcTrace(ol, csZctFreed)
-  gcTrace(res, csAllocated)
-  when reallyDealloc:
-    sysAssert(allocInv(gch.region), "growObj before dealloc")
-    if ol.refcount shr rcShift <=% 1:
-      # free immediately to save space:
-      if (ol.refcount and ZctFlag) != 0:
-        var j = gch.zct.len-1
-        var d = gch.zct.d
-        while j >= 0:
-          if d[j] == ol:
-            d[j] = res
-            break
-          dec(j)
-      rawDealloc(gch.region, ol)
+  when false:
+    # this is wrong since seqs can be shared via 'shallow':
+    when reallyDealloc: rawDealloc(gch.region, ol)
     else:
-      # we split the old refcount in 2 parts. XXX This is still not entirely
-      # correct if the pointer that receives growObj's result is on the stack.
-      # A better fix would be to emit the location specific write barrier for
-      # 'growObj', but this is lots of more work and who knows what new problems
-      # this would create.
-      res.refcount = rcIncrement or allocColor()
-      decRef(ol)
-  else:
-    sysAssert(ol.typ != nil, "growObj: 5")
-    zeroMem(ol, sizeof(Cell))
+      zeroMem(ol, sizeof(Cell))
   when useCellIds:
     inc gch.idGenerator
     res.id = gch.idGenerator
+  release(gch)
   result = cellToUsr(res)
-  sysAssert(allocInv(gch.region), "growObj end")
   when defined(memProfiler): nimProfile(newsize-oldsize)
 
 proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
@@ -637,12 +485,13 @@ proc GC_dumpHeap*(file: File) =
   ## can be translated into "dot" syntax via the "heapdump2dot" tool.
   gch.pDumpHeapFile = file
   var spaceIter: ObjectSpaceIter
-  var d = gch.decStack.d
-  for i in 0 .. < gch.decStack.len:
-    if isAllocatedPtr(gch.region, d[i]):
-      c_fprintf(file, "onstack %p\n", d[i])
-    else:
-      c_fprintf(file, "onstack_invalid %p\n", d[i])
+  when false:
+    var d = gch.decStack.d
+    for i in 0 .. < gch.decStack.len:
+      if isAllocatedPtr(gch.region, d[i]):
+        c_fprintf(file, "onstack %p\n", d[i])
+      else:
+        c_fprintf(file, "onstack_invalid %p\n", d[i])
   for i in 0 .. < globalMarkersLen: globalMarkers[i]()
   while true:
     let x = allObjectsAsProc(gch.region, addr spaceIter)
@@ -667,14 +516,6 @@ proc GC_dumpHeap() =
 
 proc freeCyclicCell(gch: var GcHeap, c: PCell) =
   gcAssert(isAllocatedPtr(gch.region, c), "freeCyclicCell: freed pointer?")
-
-  var d = gch.decStack.d
-  for i in 0..gch.decStack.len-1:
-    if d[i] == c:
-      writeCell("freeing ", c)
-      GC_dumpHeap()
-    gcAssert d[i] != c, "wtf man, freeing obviously alive stuff?!!"
-
   prepareDealloc(c)
   gcTrace(c, csCycFreed)
   when logGC: writeCell("cycle collector dealloc cell", c)
@@ -713,15 +554,6 @@ proc markRoot(gch: var GcHeap, c: PCell) {.inline.} =
   if c.color == 1-gch.black:
     c.setColor(rcGrey)
     add(gch.greyStack, c)
-  elif c.color == rcGrey:
-    var isGrey = false
-    var d = gch.decStack.d
-    for i in 0..gch.decStack.len-1:
-      if d[i] == c:
-        isGrey = true
-        break
-    if not isGrey:
-      gcAssert false, "markRoot: root is already grey?!"
 
 proc markIncremental(gch: var GcHeap): bool =
   var L = addr(gch.greyStack.len)
@@ -741,30 +573,14 @@ proc markIncremental(gch: var GcHeap): bool =
       c.setColor(gch.black)
       forAllChildren(c, waMarkGrey)
     elif c.color == (1-gch.black):
-      gcAssert false, "wtf why are there white object in the greystack?"
+      gcAssert false, "wtf why are there white objects in the greystack?"
     checkTime()
   gcAssert gch.greyStack.len == 0, "markIncremental: greystack not empty "
-
-  # assert that all local roots are black by now:
-  var d = gch.decStack.d
-  var errors = false
-  for i in 0..gch.decStack.len-1:
-    gcAssert(isAllocatedPtr(gch.region, d[i]), "markIncremental: isAllocatedPtr 2")
-    if d[i].color != gch.black:
-      writeCell("not black ", d[i])
-      errors = true
-  gcAssert(not errors, "wtf something wrong hre")
   result = true
 
 proc markGlobals(gch: var GcHeap) =
   for i in 0 .. < globalMarkersLen: globalMarkers[i]()
 
-proc markLocals(gch: var GcHeap) =
-  var d = gch.decStack.d
-  for i in 0 .. < gch.decStack.len:
-    sysAssert isAllocatedPtr(gch.region, d[i]), "markLocals"
-    markRoot(gch, d[i])
-
 proc doOperation(p: pointer, op: WalkOp) =
   if p == nil: return
   var c: PCell = usrToCell(p)
@@ -776,11 +592,7 @@ proc doOperation(p: pointer, op: WalkOp) =
     #if not isAllocatedPtr(gch.region, c):
     #  c_fprintf(stdout, "[GC] decref bug: %p", c)
     gcAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
-    gcAssert(c.refcount >=% rcIncrement, "doOperation 2")
-    #c.refcount = c.refcount -% rcIncrement
-    when logGC: writeCell("decref (from doOperation)", c)
-    decRef(c)
-    #if c.refcount <% rcIncrement: addZCT(gch.zct, c)
+    discard "use me for nimEscape?"
   of waMarkGlobal:
     template handleRoot =
       if gch.dumpHeapFile.isNil:
@@ -811,107 +623,54 @@ proc doOperation(p: pointer, op: WalkOp) =
 proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
   doOperation(d, WalkOp(op))
 
-proc collectZCT(gch: var GcHeap): bool {.benign.}
-
-proc collectCycles(gch: var GcHeap): bool =
-  when hasThreadSupport:
-    for c in gch.toDispose:
-      nimGCunref(c)
+proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
+  # the addresses are not as cells on the stack, so turn them to cells:
+  sysAssert(allocInv(gch.region), "gcMark begin")
+  var cell = usrToCell(p)
+  var c = cast[ByteAddress](cell)
+  if c >% PageSize:
+    # fast check: does it look like a cell?
+    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
+    if objStart != nil:
+      # mark the cell:
+      markRoot(gch, objStart)
+  sysAssert(allocInv(gch.region), "gcMark end")
 
-  # ensure the ZCT 'color' is not used:
-  while gch.zct.len > 0: discard collectZCT(gch)
+proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
+  forEachStackSlot(gch, gcMark)
 
+proc collectALittle(gch: var GcHeap): bool =
   case gch.phase
   of Phase.None:
-    gch.phase = Phase.Marking
-    markGlobals(gch)
-
-    c_fprintf(stdout, "collectCycles: introduced bug E %ld\n", gch.phase)
-    discard allocInv(gch.region)
+    if getOccupiedMem(gch.region) >= gch.cycleThreshold:
+      gch.phase = Phase.Marking
+      markGlobals(gch)
+      result = collectALittle(gch)
+      #when false: c_fprintf(stdout, "collectALittle: introduced bug E %ld\n", gch.phase)
+      #discard allocInv(gch.region)
   of Phase.Marking:
-    # since locals do not have a write barrier, we need
-    # to keep re-scanning them :-( but there is really nothing we can
-    # do about that.
-    markLocals(gch)
+    when hasThreadSupport:
+      for c in gch.toDispose:
+        nimGCunref(c)
+    prepareForInteriorPointerChecking(gch.region)
+    markStackAndRegisters(gch)
+    inc(gch.stat.stackScans)
     if markIncremental(gch):
       gch.phase = Phase.Sweeping
       gch.red = 1 - gch.red
   of Phase.Sweeping:
     gcAssert gch.greyStack.len == 0, "greystack not empty"
+    when hasThreadSupport:
+      for c in gch.toDispose:
+        nimGCunref(c)
     if sweep(gch):
       gch.phase = Phase.None
       # flip black/white meanings:
       gch.black = 1 - gch.black
       gcAssert gch.red == 1 - gch.black, "red color is wrong"
+      inc(gch.stat.completedCollections)
       result = true
 
-proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
-  # the addresses are not as cells on the stack, so turn them to cells:
-  sysAssert(allocInv(gch.region), "gcMark begin")
-  var cell = usrToCell(p)
-  var c = cast[ByteAddress](cell)
-  if c >% PageSize:
-    # fast check: does it look like a cell?
-    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
-    if objStart != nil:
-      # mark the cell:
-      objStart.refcount = objStart.refcount +% rcIncrement
-      add(gch.decStack, objStart)
-  sysAssert(allocInv(gch.region), "gcMark end")
-
-include gc_common
-
-proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-  forEachStackSlot(gch, gcMark)
-
-proc collectZCT(gch: var GcHeap): bool =
-  # Note: Freeing may add child objects to the ZCT! So essentially we do
-  # deep freeing, which is bad for incremental operation. In order to
-  # avoid a deep stack, we move objects to keep the ZCT small.
-  # This is performance critical!
-  var L = addr(gch.zct.len)
-  takeStartTime(100)
-
-  while L[] > 0:
-    var c = gch.zct.d[0]
-    sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr")
-    # remove from ZCT:
-    gcAssert((c.refcount and ZctFlag) == ZctFlag, "collectZCT")
-
-    c.refcount = c.refcount and not ZctFlag
-    gch.zct.d[0] = gch.zct.d[L[] - 1]
-    dec(L[])
-    takeTime()
-    if c.refcount <% rcIncrement and c.color != rcGrey:
-      # It may have a RC > 0, if it is in the hardware stack or
-      # it has not been removed yet from the ZCT. This is because
-      # ``incref`` does not bother to remove the cell from the ZCT
-      # as this might be too slow.
-      # In any case, it should be removed from the ZCT. But not
-      # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
-      when logGC: writeCell("zct dealloc cell", c)
-      gcTrace(c, csZctFreed)
-      # We are about to free the object, call the finalizer BEFORE its
-      # children are deleted as well, because otherwise the finalizer may
-      # access invalid memory. This is done by prepareDealloc():
-      prepareDealloc(c)
-      forAllChildren(c, waZctDecRef)
-      when reallyDealloc:
-        sysAssert(allocInv(gch.region), "collectZCT: rawDealloc")
-        rawDealloc(gch.region, c)
-      else:
-        sysAssert(c.typ != nil, "collectZCT 2")
-        zeroMem(c, sizeof(Cell))
-    checkTime()
-  result = true
-
-proc unmarkStackAndRegisters(gch: var GcHeap) =
-  var d = gch.decStack.d
-  for i in 0..gch.decStack.len-1:
-    sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters"
-    decRef(d[i])
-  gch.decStack.len = 0
-
 proc collectCTBody(gch: var GcHeap) =
   when withRealTime:
     let t0 = getticks()
@@ -919,22 +678,12 @@ proc collectCTBody(gch: var GcHeap) =
 
   when not nimCoroutines:
     gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
-  sysAssert(gch.decStack.len == 0, "collectCT")
-  prepareForInteriorPointerChecking(gch.region)
-  markStackAndRegisters(gch)
-  gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
-  inc(gch.stat.stackScans)
-  if collectZCT(gch):
-    when cycleGC:
-      if getOccupiedMem(gch.region) >= gch.cycleThreshold or alwaysCycleGC:
-        if collectCycles(gch):
-          inc(gch.stat.cycleCollections)
-          gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
-                                   CycleIncrease)
-          gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
-  unmarkStackAndRegisters(gch)
+  #gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
+  if collectALittle(gch):
+    gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
+                              CycleIncrease)
+    gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
   sysAssert(allocInv(gch.region), "collectCT: end")
-
   when withRealTime:
     let duration = getticks() - t0
     gch.stat.maxPause = max(gch.stat.maxPause, duration)
@@ -955,7 +704,7 @@ proc collectCT(gch: var GcHeap) =
     let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold)
   else:
     let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
-  if (gch.zct.len >= stackMarkCosts or (cycleGC and
+  if (gch.greyStack.len >= stackMarkCosts or (cycleGC and
       getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
       gch.recGcLock == 0:
     collectCTBody(gch)
@@ -969,10 +718,9 @@ when withRealTime:
 
   proc GC_step(gch: var GcHeap, us: int, strongAdvice: bool) =
     gch.maxPause = us.toNano
-    if (gch.zct.len >= ZctThreshold or (cycleGC and
-        getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or
-        strongAdvice:
-      collectCTBody(gch)
+    #if (getOccupiedMem(gch.region)>=gch.cycleThreshold) or
+    #    alwaysGC or strongAdvice:
+    collectCTBody(gch)
 
   proc GC_step*(us: int, strongAdvice = false, stackSize = -1) {.noinline.} =
     if stackSize >= 0:
@@ -1010,12 +758,8 @@ when not defined(useNimRtl):
   proc GC_setStrategy(strategy: GC_Strategy) =
     discard
 
-  proc GC_enableMarkAndSweep() =
-    gch.cycleThreshold = InitialCycleThreshold
-
-  proc GC_disableMarkAndSweep() =
-    gch.cycleThreshold = high(gch.cycleThreshold)-1
-    # set to the max value to suppress the cycle detector
+  proc GC_enableMarkAndSweep() = discard
+  proc GC_disableMarkAndSweep() = discard
 
   proc GC_fullCollect() =
     var oldThreshold = gch.cycleThreshold
@@ -1029,17 +773,17 @@ when not defined(useNimRtl):
              "[GC] occupied memory: " & $(getOccupiedMem()) & "\n" &
              "[GC] stack scans: " & $gch.stat.stackScans & "\n" &
              "[GC] stack cells: " & $gch.stat.maxStackCells & "\n" &
-             "[GC] cycle collections: " & $gch.stat.cycleCollections & "\n" &
+             "[GC] completed collections: " & $gch.stat.completedCollections & "\n" &
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
-             "[GC] zct capacity: " & $gch.zct.cap & "\n" &
+             "[GC] grey stack capacity: " & $gch.greyStack.cap & "\n" &
              "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
-             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
+             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) & "\n"
     when nimCoroutines:
-      result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
+      result.add "[GC] number of stacks: " & $gch.stack.len & "\n"
       for stack in items(gch.stack):
-        result = result & "[GC]   stack " & stack.bottom.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
+        result.add "[GC]   stack " & stack.bottom.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
     else:
-      result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
+      result.add "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
     GC_enable()
 
 {.pop.}
diff --git a/lib/system/gc_common.nim b/lib/system/gc_common.nim
index 220331e96..484a4db9a 100644
--- a/lib/system/gc_common.nim
+++ b/lib/system/gc_common.nim
@@ -373,12 +373,22 @@ proc deallocHeap*(runFinalizers = true; allowGcAfterwards = true) =
   ## is true. If ``allowGcAfterwards`` is true, a minimal amount of allocation
   ## happens to ensure the GC can continue to work after the call
   ## to ``deallocHeap``.
+  template deallocCell(x) =
+    if isCell(x):
+      # cast to PCell is correct here:
+      prepareDealloc(cast[PCell](x))
+
   if runFinalizers:
-    for x in allObjects(gch.region):
-      if isCell(x):
-        # cast to PCell is correct here:
-        var c = cast[PCell](x)
-        prepareDealloc(c)
+    when not declared(allObjectsAsProc):
+      for x in allObjects(gch.region):
+        deallocCell(x)
+    else:
+      var spaceIter: ObjectSpaceIter
+      while true:
+        let x = allObjectsAsProc(gch.region, addr spaceIter)
+        if spaceIter.state < 0: break
+        deallocCell(x)
+
   deallocOsPages(gch.region)
   zeroMem(addr gch.region, sizeof(gch.region))
   if allowGcAfterwards:
diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim
index e03140d05..cfc0dfa8a 100644
--- a/lib/system/gc_ms.nim
+++ b/lib/system/gc_ms.nim
@@ -252,7 +252,7 @@ proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
   if dest == nil: return # nothing to do
   if ntfNoRefs notin mt.flags:
     case mt.kind
-    of tyRef, tyString, tySequence: # leaf:
+    of tyRef, tyOptAsRef, tyString, tySequence: # leaf:
       doOperation(cast[PPointer](d)[], op)
     of tyObject, tyTuple:
       forAllSlotsAux(dest, mt.node, op)
@@ -264,13 +264,13 @@ proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
 proc forAllChildren(cell: PCell, op: WalkOp) =
   gcAssert(cell != nil, "forAllChildren: 1")
   gcAssert(cell.typ != nil, "forAllChildren: 2")
-  gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3"
+  gcAssert cell.typ.kind in {tyRef, tyOptAsRef, tySequence, tyString}, "forAllChildren: 3"
   let marker = cell.typ.marker
   if marker != nil:
     marker(cellToUsr(cell), op.int)
   else:
     case cell.typ.kind
-    of tyRef: # common case
+    of tyRef, tyOptAsRef: # common case
       forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
     of tySequence:
       var d = cast[ByteAddress](cellToUsr(cell))
@@ -285,7 +285,7 @@ proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   # generates a new object and sets its reference counter to 0
   incTypeSize typ, size
   acquire(gch)
-  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  gcAssert(typ.kind in {tyRef, tyOptAsRef, tyString, tySequence}, "newObj: 1")
   collectCT(gch)
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
   gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
@@ -526,10 +526,10 @@ when not defined(useNimRtl):
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
              "[GC] freed objects: " & $gch.stat.freedObjects & "\n"
     when nimCoroutines:
-      result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
+      result.add "[GC] number of stacks: " & $gch.stack.len & "\n"
       for stack in items(gch.stack):
-        result = result & "[GC]   stack " & stack.bottom.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
+        result.add "[GC]   stack " & stack.bottom.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
     else:
-      result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
+      result.add "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
 
 {.pop.}
diff --git a/lib/system/hti.nim b/lib/system/hti.nim
index 69f4f9508..45b1d1cd3 100644
--- a/lib/system/hti.nim
+++ b/lib/system/hti.nim
@@ -62,6 +62,21 @@ type
     tyUInt16,
     tyUInt32,
     tyUInt64,
+    tyOptAsRef, tyUnused1, tyUnused2,
+    tyVarargsHidden,
+    tyUnusedHidden,
+    tyProxyHidden,
+    tyBuiltInTypeClassHidden,
+    tyUserTypeClassHidden,
+    tyUserTypeClassInstHidden,
+    tyCompositeTypeClassHidden,
+    tyInferredHidden,
+    tyAndHidden, tyOrHidden, tyNotHidden,
+    tyAnythingHidden,
+    tyStaticHidden,
+    tyFromExprHidden,
+    tyOpt,
+    tyVoidHidden
 
   TNimNodeKind = enum nkNone, nkSlot, nkList, nkCase
   TNimNode {.codegenType.} = object
diff --git a/lib/system/mmdisp.nim b/lib/system/mmdisp.nim
index d2160fdac..824934966 100644
--- a/lib/system/mmdisp.nim
+++ b/lib/system/mmdisp.nim
@@ -564,7 +564,11 @@ else:
 
 when not declared(nimNewSeqOfCap):
   proc nimNewSeqOfCap(typ: PNimType, cap: int): pointer {.compilerproc.} =
-    result = newObj(typ, addInt(mulInt(cap, typ.base.size), GenericSeqSize))
+    let s = addInt(mulInt(cap, typ.base.size), GenericSeqSize)
+    when declared(newObjNoInit):
+      result = if ntfNoRefs in typ.base.flags: newObjNoInit(typ, s) else: newObj(typ, s)
+    else:
+      result = newObj(typ, s)
     cast[PGenericSeq](result).len = 0
     cast[PGenericSeq](result).reserved = cap
 
diff --git a/lib/system/nimscript.nim b/lib/system/nimscript.nim
index 73bb91fef..f5b9cf3ed 100644
--- a/lib/system/nimscript.nim
+++ b/lib/system/nimscript.nim
@@ -11,6 +11,15 @@
 # Nim's configuration system now uses Nim for scripting. This module provides
 # a few things that are required for this to work.
 
+const
+  buildOS* {.magic: "BuildOS".}: string = ""
+    ## The OS this build is running on. Can be different from ``system.hostOS``
+    ## for cross compilations.
+
+  buildCPU* {.magic: "BuildCPU".}: string = ""
+    ## The CPU this build is running on. Can be different from ``system.hostCPU``
+    ## for cross compilations.
+
 template builtin = discard
 
 # We know the effects better than the compiler:
diff --git a/lib/system/sysstr.nim b/lib/system/sysstr.nim
index c3150cb07..90201202c 100644
--- a/lib/system/sysstr.nim
+++ b/lib/system/sysstr.nim
@@ -95,6 +95,9 @@ proc cstrToNimstr(str: cstring): NimString {.compilerRtl.} =
   if str == nil: NimString(nil)
   else: toNimStr(str, str.len)
 
+template wasMoved(x: NimString): bool = false
+# (x.reserved and seqShallowFlag) != 0
+
 proc copyString(src: NimString): NimString {.compilerRtl.} =
   if src != nil:
     if (src.reserved and seqShallowFlag) != 0:
@@ -103,6 +106,16 @@ proc copyString(src: NimString): NimString {.compilerRtl.} =
       result = rawNewStringNoInit(src.len)
       result.len = src.len
       copyMem(addr(result.data), addr(src.data), src.len + 1)
+      sysAssert((seqShallowFlag and result.reserved) == 0, "copyString")
+      when defined(nimShallowStrings):
+        if (src.reserved and strlitFlag) != 0:
+          result.reserved = (result.reserved and not strlitFlag) or seqShallowFlag
+
+proc newOwnedString(src: NimString; n: int): NimString =
+  result = rawNewStringNoInit(n)
+  result.len = n
+  copyMem(addr(result.data), addr(src.data), n)
+  result.data[n] = '\0'
 
 proc copyStringRC1(src: NimString): NimString {.compilerRtl.} =
   if src != nil:
@@ -116,6 +129,10 @@ proc copyStringRC1(src: NimString): NimString {.compilerRtl.} =
       result = rawNewStringNoInit(src.len)
     result.len = src.len
     copyMem(addr(result.data), addr(src.data), src.len + 1)
+    sysAssert((seqShallowFlag and result.reserved) == 0, "copyStringRC1")
+    when defined(nimShallowStrings):
+      if (src.reserved and strlitFlag) != 0:
+        result.reserved = (result.reserved and not strlitFlag) or seqShallowFlag
 
 proc copyDeepString(src: NimString): NimString {.inline.} =
   if src != nil:
@@ -140,9 +157,12 @@ proc addChar(s: NimString, c: char): NimString =
   # is compilerproc!
   result = s
   if result.len >= result.space:
-    result.reserved = resize(result.space)
+    let r = resize(result.space)
     result = cast[NimString](growObj(result,
-      sizeof(TGenericSeq) + result.reserved + 1))
+      sizeof(TGenericSeq) + r + 1))
+    result.reserved = r
+  elif wasMoved(s):
+    result = newOwnedString(s, s.len)
   result.data[result.len] = c
   result.data[result.len+1] = '\0'
   inc(result.len)
@@ -179,7 +199,7 @@ proc addChar(s: NimString, c: char): NimString =
 #   s = rawNewString(0);
 
 proc resizeString(dest: NimString, addlen: int): NimString {.compilerRtl.} =
-  if dest.len + addlen <= dest.space:
+  if dest.len + addlen <= dest.space and not wasMoved(dest):
     result = dest
   else: # slow path:
     var sp = max(resize(dest.space), dest.len + addlen)
@@ -200,7 +220,9 @@ proc appendChar(dest: NimString, c: char) {.compilerproc, inline.} =
 
 proc setLengthStr(s: NimString, newLen: int): NimString {.compilerRtl.} =
   var n = max(newLen, 0)
-  if n <= s.space:
+  if wasMoved(s):
+    result = newOwnedString(s, n)
+  elif n <= s.space:
     result = s
   else:
     result = resizeString(s, n)
@@ -218,26 +240,29 @@ proc incrSeq(seq: PGenericSeq, elemSize: int): PGenericSeq {.compilerProc.} =
   #  seq[seq->len-1] = x;
   result = seq
   if result.len >= result.space:
-    result.reserved = resize(result.space)
-    result = cast[PGenericSeq](growObj(result, elemSize * result.reserved +
+    let r = resize(result.space)
+    result = cast[PGenericSeq](growObj(result, elemSize * r +
                                GenericSeqSize))
+    result.reserved = r
   inc(result.len)
 
 proc incrSeqV2(seq: PGenericSeq, elemSize: int): PGenericSeq {.compilerProc.} =
   # incrSeq version 2
   result = seq
   if result.len >= result.space:
-    result.reserved = resize(result.space)
-    result = cast[PGenericSeq](growObj(result, elemSize * result.reserved +
+    let r = resize(result.space)
+    result = cast[PGenericSeq](growObj(result, elemSize * r +
                                GenericSeqSize))
+    result.reserved = r
 
 proc setLengthSeq(seq: PGenericSeq, elemSize, newLen: int): PGenericSeq {.
     compilerRtl.} =
   result = seq
   if result.space < newLen:
-    result.reserved = max(resize(result.space), newLen)
-    result = cast[PGenericSeq](growObj(result, elemSize * result.reserved +
+    let r = max(resize(result.space), newLen)
+    result = cast[PGenericSeq](growObj(result, elemSize * r +
                                GenericSeqSize))
+    result.reserved = r
   elif newLen < result.len:
     # we need to decref here, otherwise the GC leaks!
     when not defined(boehmGC) and not defined(nogc) and
diff --git a/lib/system/threads.nim b/lib/system/threads.nim
index a7a811844..96c045e6b 100644
--- a/lib/system/threads.nim
+++ b/lib/system/threads.nim
@@ -127,7 +127,8 @@ elif defined(genode):
   proc initThread(s: var SysThread,
                   stackSize: culonglong,
                   entry: GenodeThreadProc,
-                  arg: pointer) {.
+                  arg: pointer,
+                  affinity: cuint) {.
     importcpp: "#.initThread(genodeEnv, @)".}
 
   proc threadVarAlloc(): ThreadVarSlot = 0
@@ -567,6 +568,9 @@ when hostOS == "windows":
     setThreadAffinityMask(t.sys, uint(1 shl cpu))
 
 elif defined(genode):
+  var affinityOffset: cuint = 1
+  # CPU affinity offset for next thread, safe to roll-over
+
   proc createThread*[TArg](t: var Thread[TArg],
                            tp: proc (arg: TArg) {.thread, nimcall.},
                            param: TArg) =
@@ -577,7 +581,8 @@ elif defined(genode):
     when hasSharedHeap: t.stackSize = ThreadStackSize
     t.sys.initThread(
       ThreadStackSize.culonglong,
-      threadProcWrapper[TArg], addr(t))
+      threadProcWrapper[TArg], addr(t), affinityOffset)
+    inc affinityOffset
 
   proc pinToCpu*[Arg](t: var Thread[Arg]; cpu: Natural) =
     {.hint: "cannot change Genode thread CPU affinity after initialization".}