summary refs log tree commit diff stats
path: root/lib/system
diff options
authorAraq <>2013-02-07 01:57:10 +0100
committerAraq <>2013-02-07 01:57:10 +0100
commitab6f793408c10935bad98071bdae4009f6873d5c (patch)
tree99cec4dd7cd520e040c45927d0eab16b27c4270c /lib/system
parentf96d612e980af065217cddaf0a8521189977a37b (diff)
first version of a simple mark&sweep GC; activate with --gc:markAndSweep
Diffstat (limited to 'lib/system')
5 files changed, 781 insertions, 69 deletions
diff --git a/lib/system/cellsets.nim b/lib/system/cellsets.nim
index 5de4ca811..d3886dfda 100755
--- a/lib/system/cellsets.nim
+++ b/lib/system/cellsets.nim
@@ -1,7 +1,7 @@
 #            Nimrod's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
+#        (c) Copyright 2013 Andreas Rumpf
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -39,7 +39,7 @@ type
     len, cap: int
     d: PCellArray
-# ------------------- cell set handling ---------------------------------------
+# ------------------- cell seq handling ---------------------------------------
 proc contains(s: TCellSeq, c: PCell): bool {.inline.} =
   for i in 0 .. s.len-1:
@@ -68,6 +68,40 @@ proc deinit(s: var TCellSeq) =
   s.len = 0
   s.cap = 0
+# ------------------- cyclic cell temporary data structure --------------------
+  TCycleCell = object
+    cell: PCell
+    oldRefcount, newRefcount: TRefCount
+  PCycleCellArray = ptr array[0..100_000_000, TCycleCell]
+  TCycleCellSeq {.final, pure.} = object
+    len, cap: int
+    d: PCycleCellArray
+proc reserveSlot(s: var TCycleCellSeq): int =
+  if s.len >= s.cap:
+    s.cap = s.cap * 3 div 2
+    var d = cast[PCycleCellArray](Alloc(s.cap * sizeof(TCycleCell)))
+    copyMem(d, s.d, s.len * sizeof(TCycleCell))
+    Dealloc(s.d)
+    s.d = d
+  result = s.len
+  inc(s.len)
+proc init(s: var TCycleCellSeq, cap: int = 1024) =
+  s.len = 0
+  s.cap = cap
+  s.d = cast[PCycleCellArray](Alloc(cap * sizeof(TCycleCell)))
+proc deinit(s: var TCycleCellSeq) = 
+  Dealloc(s.d)
+  s.d = nil
+  s.len = 0
+  s.cap = 0
+# ------------------- cell set handling ---------------------------------------
   InitCellSetSize = 1024 # must be a power of two!
@@ -196,3 +230,21 @@ iterator elements(t: TCellSet): PCell {.inline.} =
     r =
+iterator elementsWithout(t, s: TCellSet): PCell {.inline.} =
+  var r = t.head
+  while r != nil:
+    let ss = CellSetGet(s, r.key)
+    var i = 0
+    while i <= high(r.bits):
+      var w = r.bits[i]
+      if ss != nil:
+        w = w and not ss.bits[i]
+      var j = 0
+      while w != 0:
+        if (w and 1) != 0:
+          yield cast[PCell]((r.key shl PageShift) or
+                              (i shl IntShift +% j) *% MemAlign)
+        inc(j)
+        w = w shr 1
+      inc(i)
+    r =
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index ec656e0ef..d864cf78e 100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -1,7 +1,7 @@
 #            Nimrod's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
+#        (c) Copyright 2013 Andreas Rumpf
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -88,6 +88,12 @@ template release(gch: TGcHeap) =
   when hasThreadSupport and hasSharedHeap:
+template gcAssert(cond: bool, msg: string) =
+  when defined(useGcAssert):
+    if not cond:
+      echo "[GCASSERT] ", msg
+      quit 1
 proc addZCT(s: var TCellSeq, c: PCell) {.noinline.} =
   if (c.refcount and rcZct) == 0:
     c.refcount = c.refcount and not colorMask or rcZct
@@ -115,16 +121,15 @@ proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
 when BitsPerPage mod (sizeof(int)*8) != 0:
   {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
-when debugGC:
-  proc writeCell(msg: CString, c: PCell) =
-    var kind = -1
-    if c.typ != nil: kind = ord(c.typ.kind)
-    when leakDetector:
-      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n",
-                msg, c, kind, c.refcount shr rcShift, c.filename, c.line)
-    else:
-      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n",
-                msg, c, kind, c.refcount shr rcShift)
+proc writeCell(msg: CString, c: PCell) =
+  var kind = -1
+  if c.typ != nil: kind = ord(c.typ.kind)
+  when leakDetector:
+    c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n",
+              msg, c, kind, c.refcount shr rcShift, c.filename, c.line)
+  else:
+    c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n",
+              msg, c, kind, c.refcount shr rcShift)
 when traceGC:
   # traceGC is a special switch to enable extensive debugging
@@ -226,8 +231,8 @@ proc rtlAddZCT(c: PCell) {.rtl, inl.} =
 proc decRef(c: PCell) {.inline.} =
-  sysAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
-  sysAssert(c.refcount >=% rcIncrement, "decRef")
+  gcAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
+  gcAssert(c.refcount >=% rcIncrement, "decRef")
   if --c.refcount:
   elif canBeCycleRoot(c):
@@ -236,7 +241,7 @@ proc decRef(c: PCell) {.inline.} =
 proc incRef(c: PCell) {.inline.} = 
-  sysAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
+  gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
   if canBeCycleRoot(c):
@@ -247,7 +252,7 @@ proc nimGCunref(p: pointer) {.compilerProc, inline.} = decRef(usrToCell(p))
 proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} =
   sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle")
   var c = usrToCell(p)
-  sysAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr")
+  gcAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr")
   if --c.refcount:
     sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2")
@@ -255,7 +260,7 @@ proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} =
 proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} =
   # the code generator calls this proc!
-  sysAssert(not isOnStack(dest), "asgnRef")
+  gcAssert(not isOnStack(dest), "asgnRef")
   # BUGFIX: first incRef then decRef!
   if src != nil: incRef(usrToCell(src))
   if dest[] != nil: decRef(usrToCell(dest[]))
@@ -285,8 +290,8 @@ proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerProc.} =
     if cast[int](dest[]) >=% PageSize: decRef(usrToCell(dest[]))
     # can't be an interior pointer if it's a stack location!
-    sysAssert(interiorAllocatedPtr(gch.region, dest)==nil, 
-              "stack loc AND interior pointer")
+    gcAssert(interiorAllocatedPtr(gch.region, dest)==nil, 
+             "stack loc AND interior pointer")
   dest[] = src
 proc initGC() =
@@ -341,9 +346,9 @@ proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) =
     else: nil
 proc forAllChildren(cell: PCell, op: TWalkOp) =
-  sysAssert(cell != nil, "forAllChildren: 1")
-  sysAssert(cell.typ != nil, "forAllChildren: 2")
-  sysAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3"
+  gcAssert(cell != nil, "forAllChildren: 1")
+  gcAssert(cell.typ != nil, "forAllChildren: 2")
+  gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3"
   let marker = cell.typ.marker
   if marker != nil:
@@ -407,11 +412,11 @@ proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
 proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
   # generates a new object and sets its reference counter to 0
-  sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
   sysAssert(allocInv(gch.region), "rawNewObj begin")
   var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
-  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  gcAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
   when leakDetector and not hasThreadSupport:
@@ -447,7 +452,7 @@ proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
   # generates a new object and sets its reference counter to 1
   sysAssert(allocInv(gch.region), "newObjRC1 begin")
-  sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
   sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")
@@ -482,7 +487,7 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
   var ol = usrToCell(old)
   sysAssert(ol.typ != nil, "growObj: 1")
-  sysAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
+  gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
   sysAssert(allocInv(gch.region), "growObj begin")
   var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell)))
@@ -532,70 +537,197 @@ proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
 proc doOperation(p: pointer, op: TWalkOp) =
   if p == nil: return
   var c: PCell = usrToCell(p)
-  sysAssert(c != nil, "doOperation: 1")
+  gcAssert(c != nil, "doOperation: 1")
   case op # faster than function pointers because of easy prediction
   of waZctDecRef:
     #if not isAllocatedPtr(gch.region, c):
     #  return
     #  c_fprintf(c_stdout, "[GC] decref bug: %p", c) 
-    sysAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
-    sysAssert(c.refcount >=% rcIncrement, "doOperation 2")
+    gcAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
+    gcAssert(c.refcount >=% rcIncrement, "doOperation 2")
     c.refcount = c.refcount -% rcIncrement
     when logGC: writeCell("decref (from doOperation)", c)
     if c.refcount <% rcIncrement: addZCT(gch.zct, c)
+    # XXX bug here: needs the full write barrier
   of waPush:
     add(gch.tempStack, c)
   of waCycleDecRef:
-    sysAssert(c.refcount >=% rcIncrement, "doOperation 3")
+    gcAssert(c.refcount >=% rcIncrement, "doOperation 3")
     c.refcount = c.refcount -% rcIncrement
 proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
   doOperation(d, TWalkOp(op))
+proc freeCyclicCell(gch: var TGcHeap, c: PCell) =
+  prepareDealloc(c)
+  gcTrace(c, csCycFreed)
+  when logGC: writeCell("cycle collector dealloc cell", c)
+  when reallyDealloc: rawDealloc(gch.region, c)
+  else:
+    gcAssert(c.typ != nil, "freeCyclicCell")
+    zeroMem(c, sizeof(TCell))
 # we now use a much simpler and non-recursive algorithm for cycle removal
-proc collectCycles(gch: var TGcHeap) =
-  var tabSize = 0
-  for c in elements(gch.cycleRoots):
-    inc(tabSize)
-    forallChildren(c, waCycleDecRef)
-  if tabSize == 0: return
-  gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize)
-  # restore reference counts (a depth-first traversal is needed):
-  var marker: TCellSet
-  Init(marker)
-  for c in elements(gch.cycleRoots):
-    if c.refcount >=% rcIncrement:
-      if not containsOrIncl(marker, c):
+proc CollectZCT(gch: var TGcHeap): bool
+when false:
+  template color(c): expr = c.refCount and colorMask
+  template setColor(c, col) = c.refCount and not colorMask or col
+  proc markGray(s: PCell) =
+    if s.color != rcGray:
+      setColor(s, rcGray)
+      forAllChildren(s, waMarkGray)
+  proc scan(s: PCell) =
+    if s.color == rcGray:
+      scanBlack(s)
+    else:
+      s.setColor(rcWhite)
+      forAllChildren(s, waScan)
+  proc scanBlack(s: PCell) =
+    s.setColor(rcBlack)
+    forAllChildren(s, waScanBlack)
+  proc collectWhite(s: PCell) =
+    if s.color == rcWhite and not buffered(s):
+      s.setcolor(rcBlack)
+      forAllChildren(s, waCollectWhite)
+      freeCyclicCell(gch, s)
+  proc MarkRoots(gch: var TGcHeap) =
+    for s in elements(gch.cycleRoots):
+      if s.color == rcPurple and s.refCount >=% rcIncrement:
+        markGray(s)
+      else:
+        # since we cannot remove from 'cycleRoots' easily, we use the ZCT as
+        # a temporary buffer:
+        addZCT(gch.zct, s)
+    var freed = 0
+    for i in 0 .. < gch.zct.len:
+      let c = gch.zct.d[i]
+      # if black and rc == 0:
+      excl(gch.cycleRoots, c)
+      if c.refcount == 0:
+        freeCyclicCell(gch, c)
+        inc freed
+  proc collectRoots(gch: var TGcHeap) =
+    for s in elements(gch.cycleRoots):
+      collectWhite(s)
+  proc collectCycles(gch: var TGcHeap) =
+    while gch.zct.len > 0: discard collectZCT(gch)
+    markRoots(gch)
+    scanRoots(gch)
+    collectRoots(gch)
+    var tabSize = 0
+    # while RemoveInnerRCs, we misuse the ZCT as a "candidates to be freed"
+    # buffer; the ZCT is guaranteed to be empty here.
+    # However, since the RC is in flux in the following traversals, it can be
+    # that we store cells with RC > 0 in the ZCT. This needs to be checked for
+    # in the final loop over the ZCT.
+    var marker: TCellSet
+    Init(marker)
+    var 
+      decs = 0
+      incs = 0
+    for c in elements(gch.cycleRoots):
+      inc(tabSize)
+      if c.refcount >=% rcIncrement and not containsOrIncl(marker, c):
         gch.tempStack.len = 0
         forAllChildren(c, waPush)
         while gch.tempStack.len > 0:
           var d = gch.tempStack.d[gch.tempStack.len]
+          gcAssert d.refcount >=% rcIncrement, "child's RC corrupted!"
+          d.refcount = d.refcount -% rcIncrement
+          writeCell("decref (cycle)", d)
+          inc decs
+          if d.refcount <% rcIncrement:
+            addZCT(gch.zct, d)
+            if not containsOrIncl(marker, d):
+              forAllChildren(d, waPush)
+      #forallChildren(c, waCycleDecRef)
+    if tabSize == 0: return
+    gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize)
+    # restore reference counts (a depth-first traversal is needed);
+    # We need to restore the cycle roots with RC > 0 plus the marked
+    for c in elements(gch.cycleRoots):
+      excl(marker, c)
+      if c.refcount >=% rcIncrement:
+        gch.tempStack.len = 0
+        var loopIter = 0
+        forAllChildren(c, waPush)
+        while gch.tempStack.len > 0:
+          dec(gch.tempStack.len)
+          var d = gch.tempStack.d[gch.tempStack.len]
           d.refcount = d.refcount +% rcIncrement
-          if d in gch.cycleRoots and not containsOrIncl(marker, d):
+          writeCell("incref (cycle)", d)
+          writeCell("from ", c)
+          cfprintf(cstdout, "depth: %ld\n", loopIter)
+          inc incs
+          if contains(marker, d):
+            excl(marker, d)
+            inc loopIter
             forAllChildren(d, waPush)
-  # remove cycles:
-  for c in elements(gch.cycleRoots):
-    if c.refcount <% rcIncrement:
+    gcAssert incs <= decs, "too many increments!"
+    Deinit(marker)
+    # remove cycles: free nodes with RC == 0, but do nothing with their children:
+    var freed = 0
+    for i in 0 .. < gch.zct.len:
+      let c = gch.zct.d[i]
+      if c.refcount <% rcIncrement:
+        freeCyclicCell(gch, c)
+        inc freed
+    cfprintf(cstdout, "freed cyclic objects: %ld; zct: %ld; decs: %ld; incs: %ld\n",
+      freed, gch.zct.len, decs, incs)
+    gch.zct.len = 0
+    if freed == 0:
+      gcAssert incs == decs, "graph corrupted!"
+    when false:
+      gcAssert gch.tempStack.len == 0, "tempStack not empty (A)"
       gch.tempStack.len = 0
-      forAllChildren(c, waPush)
-      while gch.tempStack.len > 0:
-        dec(gch.tempStack.len)
-        var d = gch.tempStack.d[gch.tempStack.len]
-        if d.refcount <% rcIncrement:
-          if d notin gch.cycleRoots: # d is leaf of c and not part of cycle
-            addZCT(gch.zct, d)
-            when logGC: writeCell("add to ZCT (from cycle collector)", d)
-      prepareDealloc(c)
-      gcTrace(c, csCycFreed)
-      when logGC: writeCell("cycle collector dealloc cell", c)
-      when reallyDealloc: rawDealloc(gch.region, c)
-      else:
-        sysAssert(c.typ != nil, "collectCycles")
-        zeroMem(c, sizeof(TCell))
-  Deinit(gch.cycleRoots)
-  Init(gch.cycleRoots)
+      for c in elements(gch.cycleRoots):
+        if c.refcount <% rcIncrement:
+          gcAssert gch.tempStack.len == 0, "tempStack not empty (B)"
+          forAllChildren(c, waPush)
+          while gch.tempStack.len > 0:
+            dec(gch.tempStack.len)
+            var d = gch.tempStack.d[gch.tempStack.len]
+            if d.refcount <% rcIncrement:
+              if d notin gch.cycleRoots: # d is leaf of c and not part of cycle
+                freeCyclicCell(gch, d)
+                when logGC: writeCell("add to ZCT (from cycle collector)", d)
+          freeCyclicCell(gch, c)
+    Deinit(gch.cycleRoots)
+    Init(gch.cycleRoots)
+    # alive cycles need to be kept in 'cycleRoots' if they are referenced
+    # from the stack; otherwise the write barrier will add the cycle root again
+    # anyway!
+    when false:
+      block addBackStackRoots:
+        var d = gch.decStack.d
+        var cycleRootsLen = 0
+        for i in 0..gch.decStack.len-1:
+          var c = d[i]
+          gcAssert isAllocatedPtr(gch.region, c), "addBackStackRoots"
+          gcAssert c.refcount >=% rcIncrement, "addBackStackRoots: dead cell"
+          if canBeCycleRoot(c):
+            if c notin gch.cycleRoots: inc cycleRootsLen
+            incl(gch.cycleRoots, c)
+          gcAssert c.typ != nil, "addBackStackRoots 2"
+        if cycleRootsLen != 0:
+          cfprintf(cstdout, "cycle roots: %ld\n", cycleRootsLen)
+proc collectCycles(gch: var TGcHeap) =
+  # it's broken anyway
+  nil
 proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
@@ -808,7 +940,7 @@ proc CollectZCT(gch: var TGcHeap): bool =
         if gch.maxPause > 0:
           let duration = getticks() - t0
           # the GC's measuring is not accurate and needs some cleanup actions 
-          # (stack unmarking), so subtract some short amount of time in to
+          # (stack unmarking), so subtract some short amount of time in
           # order to miss deadlines less often:
           if duration >= gch.maxPause - 50_000:
             return false
@@ -842,7 +974,7 @@ proc collectCTBody(gch: var TGcHeap) =
     when cycleGC:
       if getOccupiedMem(gch.region) >= gch.cycleThreshold or alwaysCycleGC:
-        discard collectZCT(gch)
+        #discard collectZCT(gch)
         gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim
new file mode 100644
index 000000000..eaea76235
--- /dev/null
+++ b/lib/system/gc_ms.nim
@@ -0,0 +1,524 @@
+#            Nimrod's Runtime Library
+#        (c) Copyright 2013 Andreas Rumpf
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+# A simple mark&sweep garbage collector for Nimrod.
+{.push profiler:off.}
+  CycleIncrease = 2 # is a multiplicative increase
+  InitialCycleThreshold = 4*1024*1024 # X MB because cycle checking is slow
+when defined(memProfiler):
+  proc nimProfile(requestedSize: int)
+  TWalkOp = enum
+    waMarkGlobal,  # we need to mark conservatively for global marker procs
+                   # as these may refer to a global var and not to a thread
+                   # local 
+    waMarkPrecise  # fast precise marking
+  TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall.}
+    # A ref type can have a finalizer that is called before the object's
+    # storage is freed.
+  TGlobalMarkerProc = proc () {.nimcall.}
+  TGcStat = object
+    stackScans: int          # number of performed stack scans (for statistics)
+    collections: int         # number of performed full collections
+    maxThreshold: int        # max threshold that has been set
+    maxStackSize: int        # max stack size
+    maxStackCells: int       # max stack cells in ``decStack``
+    cycleTableSize: int      # max entries in cycle table  
+  TGcHeap = object           # this contains the zero count and
+                             # non-zero count table
+    stackBottom: pointer
+    cycleThreshold: int
+    allocated, marked: TCellSet
+    tempStack: TCellSeq      # temporary stack for recursion elimination
+    recGcLock: int           # prevent recursion via finalizers; no thread lock
+    region: TMemRegion       # garbage collected region
+    stat: TGcStat
+  gch {.rtlThreadVar.}: TGcHeap
+when not defined(useNimRtl):
+  InstantiateForRegion(gch.region)
+template acquire(gch: TGcHeap) = 
+  when hasThreadSupport and hasSharedHeap:
+    AcquireSys(HeapLock)
+template release(gch: TGcHeap) = 
+  when hasThreadSupport and hasSharedHeap:
+    releaseSys(HeapLock)
+template gcAssert(cond: bool, msg: string) =
+  when defined(useGcAssert):
+    if not cond:
+      echo "[GCASSERT] ", msg
+      quit 1
+proc cellToUsr(cell: PCell): pointer {.inline.} =
+  # convert object (=pointer to refcount) to pointer to userdata
+  result = cast[pointer](cast[TAddress](cell)+%TAddress(sizeof(TCell)))
+proc usrToCell(usr: pointer): PCell {.inline.} =
+  # convert pointer to userdata to object (=pointer to refcount)
+  result = cast[PCell](cast[TAddress](usr)-%TAddress(sizeof(TCell)))
+proc canbeCycleRoot(c: PCell): bool {.inline.} =
+  result = ntfAcyclic notin c.typ.flags
+proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
+  # used for code generation concerning debugging
+  result = usrToCell(c).typ
+proc unsureAsgnRef(dest: ppointer, src: pointer) {.inline.} =
+  dest[] = src
+proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
+  result = int(usrToCell(p).refcount)
+  globalMarkersLen: int
+  globalMarkers: array[0.. 10_000, TGlobalMarkerProc]
+proc nimRegisterGlobalMarker(markerProc: pointer) {.compilerProc.} =
+  globalMarkers[globalMarkersLen] = cast[TGlobalMarkerProc](markerProc)
+  inc globalMarkersLen
+# this that has to equals zero, otherwise we have to round up UnitsPerPage:
+when BitsPerPage mod (sizeof(int)*8) != 0:
+  {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
+proc writeCell(msg: CString, c: PCell) =
+  var kind = -1
+  if c.typ != nil: kind = ord(c.typ.kind)
+  when leakDetector:
+    c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n",
+              msg, c, kind, c.refcount, c.filename, c.line)
+  else:
+    c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n",
+              msg, c, kind, c.refcount)
+template gcTrace(cell, state: expr): stmt {.immediate.} =
+  when traceGC: traceCell(cell, state)
+# forward declarations:
+proc collectCT(gch: var TGcHeap)
+proc IsOnStack*(p: pointer): bool {.noinline.}
+proc forAllChildren(cell: PCell, op: TWalkOp)
+proc doOperation(p: pointer, op: TWalkOp)
+proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp)
+# we need the prototype here for debugging purposes
+proc prepareDealloc(cell: PCell) =
+  if cell.typ.finalizer != nil:
+    # the finalizer could invoke something that
+    # allocates memory; this could trigger a garbage
+    # collection. Since we are already collecting we
+    # prevend recursive entering here by a lock.
+    # XXX: we should set the cell's children to nil!
+    inc(gch.recGcLock)
+    (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
+    dec(gch.recGcLock)
+proc nimGCref(p: pointer) {.compilerProc, inline.} = inc(usrToCell(p).refCount)
+proc nimGCunref(p: pointer) {.compilerProc, inline.} = dec(usrToCell(p).refCount)
+proc initGC() =
+  when not defined(useNimRtl):
+    when traceGC:
+      for i in low(TCellState)..high(TCellState): Init(states[i])
+    gch.cycleThreshold = InitialCycleThreshold
+    gch.stat.stackScans = 0
+    gch.stat.collections = 0
+    gch.stat.maxThreshold = 0
+    gch.stat.maxStackSize = 0
+    gch.stat.maxStackCells = 0
+    # init the rt
+    init(gch.tempStack)
+    Init(gch.allocated)
+    init(gch.marked)
+proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
+  var d = cast[TAddress](dest)
+  case n.kind
+  of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
+  of nkList:
+    for i in 0..n.len-1:
+      forAllSlotsAux(dest, n.sons[i], op)
+  of nkCase:
+    var m = selectBranch(dest, n)
+    if m != nil: forAllSlotsAux(dest, m, op)
+  of nkNone: sysAssert(false, "forAllSlotsAux")
+proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) =
+  var d = cast[TAddress](dest)
+  if dest == nil: return # nothing to do
+  if ntfNoRefs notin mt.flags:
+    case mt.Kind
+    of tyRef, tyString, tySequence: # leaf:
+      doOperation(cast[ppointer](d)[], op)
+    of tyObject, tyTuple:
+      forAllSlotsAux(dest, mt.node, op)
+    of tyArray, tyArrayConstr, tyOpenArray:
+      for i in 0..(mt.size div mt.base.size)-1:
+        forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op)
+    else: nil
+proc forAllChildren(cell: PCell, op: TWalkOp) =
+  gcAssert(cell != nil, "forAllChildren: 1")
+  gcAssert(cell.typ != nil, "forAllChildren: 2")
+  gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3"
+  let marker = cell.typ.marker
+  if marker != nil:
+    marker(cellToUsr(cell),
+  else:
+    case cell.typ.Kind
+    of tyRef: # common case
+      forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
+    of tySequence:
+      var d = cast[TAddress](cellToUsr(cell))
+      var s = cast[PGenericSeq](d)
+      if s != nil:
+        for i in 0..s.len-1:
+          forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +%
+            GenericSeqSize), cell.typ.base, op)
+    else: nil
+proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
+  # generates a new object and sets its reference counter to 0
+  acquire(gch)
+  gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
+  collectCT(gch)
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
+  gcAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  # now it is buffered in the ZCT
+  res.typ = typ
+  when leakDetector and not hasThreadSupport:
+    if framePtr != nil and framePtr.prev != nil:
+      res.filename = framePtr.prev.filename
+      res.line = framePtr.prev.line
+  res.refcount = 0
+  release(gch)
+  incl(gch.allocated, res)
+  result = cellToUsr(res)
+proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  result = rawNewObj(typ, size, gch)
+  zeroMem(result, size)
+  when defined(memProfiler): nimProfile(size)
+proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
+  # `newObj` already uses locks, so no need for them here.
+  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
+  result = newObj(typ, size)
+  cast[PGenericSeq](result).len = len
+  cast[PGenericSeq](result).reserved = len
+  when defined(memProfiler): nimProfile(size)
+proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  result = rawNewObj(typ, size, gch)
+  zeroMem(result, size)
+  when defined(memProfiler): nimProfile(size)
+proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
+  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
+  result = newObj(typ, size)
+  cast[PGenericSeq](result).len = len
+  cast[PGenericSeq](result).reserved = len
+  when defined(memProfiler): nimProfile(size)
+proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
+  acquire(gch)
+  collectCT(gch)
+  var ol = usrToCell(old)
+  sysAssert(ol.typ != nil, "growObj: 1")
+  gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
+  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell)))
+  var elemSize = 1
+  if ol.typ.kind != tyString: elemSize = ol.typ.base.size
+  var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
+  copyMem(res, ol, oldsize + sizeof(TCell))
+  zeroMem(cast[pointer](cast[TAddress](res)+% oldsize +% sizeof(TCell)),
+          newsize-oldsize)
+  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
+  excl(gch.allocated, ol)
+  when reallyDealloc: rawDealloc(gch.region, ol)
+  else:
+    zeroMem(ol, sizeof(TCell))
+  incl(gch.allocated, res)
+  release(gch)
+  result = cellToUsr(res)
+  when defined(memProfiler): nimProfile(newsize-oldsize)
+proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
+  result = growObj(old, newsize, gch)
+{.push profiler:off.}
+# ----------------- collector -----------------------------------------------
+proc mark(gch: var TGcHeap, c: PCell) =
+  incl(gch.marked, c)
+  gcAssert gch.tempStack.len == 0, "stack not empty!"
+  forAllChildren(c, waMarkPrecise)
+  while gch.tempStack.len > 0:
+    dec gch.tempStack.len
+    var d = gch.tempStack.d[gch.tempStack.len]
+    if not containsOrIncl(gch.marked, d):
+      forAllChildren(d, waMarkPrecise)
+proc doOperation(p: pointer, op: TWalkOp) =
+  if p == nil: return
+  var c: PCell = usrToCell(p)
+  gcAssert(c != nil, "doOperation: 1")
+  case op
+  of waMarkGlobal:
+    if isAllocatedPtr(gch.region, c):
+      mark(gch, c)
+  of waMarkPrecise: add(gch.tempStack, c)
+proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
+  doOperation(d, TWalkOp(op))
+proc freeCyclicCell(gch: var TGcHeap, c: PCell) =
+  prepareDealloc(c)
+  gcTrace(c, csCycFreed)
+  when logGC: writeCell("cycle collector dealloc cell", c)
+  when reallyDealloc: rawDealloc(gch.region, c)
+  else:
+    gcAssert(c.typ != nil, "freeCyclicCell")
+    zeroMem(c, sizeof(TCell))
+proc sweep(gch: var TGcHeap) =
+  when true:
+    for c in gch.allocated.elementsWithout(gch.marked):
+      gch.allocated.excl(c)
+      freeCyclicCell(gch, c)
+  else:
+    for c in gch.allocated.elements():
+      if not gch.marked.contains(c):
+        gch.allocated.excl(c)
+        freeCyclicCell(gch, c)
+proc markGlobals(gch: var TGcHeap) =
+  for i in 0 .. < globalMarkersLen: globalMarkers[i]()
+proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
+  # the addresses are not as cells on the stack, so turn them to cells:
+  var cell = usrToCell(p)
+  var c = cast[TAddress](cell)
+  if c >% PageSize:
+    # fast check: does it look like a cell?
+    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
+    if objStart != nil:
+      mark(gch, objStart)
+# ----------------- stack management --------------------------------------
+#  inspired from Smart Eiffel
+when defined(sparc):
+  const stackIncreases = false
+elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
+     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
+  const stackIncreases = true
+  const stackIncreases = false
+when not defined(useNimRtl):
+  {.push stack_trace: off.}
+  proc setStackBottom(theStackBottom: pointer) =
+    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
+    # the first init must be the one that defines the stack bottom:
+    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
+    else:
+      var a = cast[TAddress](theStackBottom) # and not PageMask - PageSize*2
+      var b = cast[TAddress](gch.stackBottom)
+      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
+      when stackIncreases:
+        gch.stackBottom = cast[pointer](min(a, b))
+      else:
+        gch.stackBottom = cast[pointer](max(a, b))
+  {.pop.}
+proc stackSize(): int {.noinline.} =
+  var stackTop {.volatile.}: pointer
+  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
+when defined(sparc): # For SPARC architecture.
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var b = cast[TAddress](gch.stackBottom)
+    var a = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
+    when defined(sparcv9):
+      asm  """"flushw \n" """
+    else:
+      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
+    var
+      max = gch.stackBottom
+      sp: PPointer
+      stackTop: array[0..1, pointer]
+    sp = addr(stackTop[0])
+    # Addresses decrease as the stack grows.
+    while sp <= max:
+      gcMark(gch, sp[])
+      sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer))
+elif defined(ELATE):
+  {.error: "stack marking code is to be written for this architecture".}
+elif stackIncreases:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses increase as the stack grows.
+  # ---------------------------------------------------------------------------
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var a = cast[TAddress](gch.stackBottom)
+    var b = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+  var
+    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
+      # a little hack to get the size of a TJmpBuf in the generated C code
+      # in a platform independant way
+  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
+    var registers: C_JmpBuf
+    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+      var max = cast[TAddress](gch.stackBottom)
+      var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
+      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
+      # otherwise sp would be below the registers structure).
+      while sp >=% max:
+        gcMark(gch, cast[ppointer](sp)[])
+        sp = sp -% sizeof(pointer)
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses decrease as the stack grows.
+  # ---------------------------------------------------------------------------
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var b = cast[TAddress](gch.stackBottom)
+    var a = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
+    # We use a jmp_buf buffer that is in the C stack.
+    # Used to traverse the stack and registers assuming
+    # that 'setjmp' will save registers in the C stack.
+    type PStackSlice = ptr array [0..7, pointer]
+    var registers: C_JmpBuf
+    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+      var max = cast[TAddress](gch.stackBottom)
+      var sp = cast[TAddress](addr(registers))
+      # loop unrolled:
+      while sp <% max - 8*sizeof(pointer):
+        gcMark(gch, cast[PStackSlice](sp)[0])
+        gcMark(gch, cast[PStackSlice](sp)[1])
+        gcMark(gch, cast[PStackSlice](sp)[2])
+        gcMark(gch, cast[PStackSlice](sp)[3])
+        gcMark(gch, cast[PStackSlice](sp)[4])
+        gcMark(gch, cast[PStackSlice](sp)[5])
+        gcMark(gch, cast[PStackSlice](sp)[6])
+        gcMark(gch, cast[PStackSlice](sp)[7])
+        sp = sp +% sizeof(pointer)*8
+      # last few entries:
+      while sp <=% max:
+        gcMark(gch, cast[ppointer](sp)[])
+        sp = sp +% sizeof(pointer)
+# ----------------------------------------------------------------------------
+# end of non-portable code
+# ----------------------------------------------------------------------------
+proc collectCTBody(gch: var TGcHeap) =
+  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+  prepareForInteriorPointerChecking(gch.region)
+  markStackAndRegisters(gch)
+  markGlobals(gch)
+  sweep(gch)
+  inc(gch.stat.stackScans)
+  inc(gch.stat.collections)
+  deinit(gch.marked)
+  init(gch.marked)
+  gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
+                           cycleIncrease)
+  gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
+  sysAssert(allocInv(gch.region), "collectCT: end")
+proc collectCT(gch: var TGcHeap) =
+  if getOccupiedMem(gch.region) >= gch.cycleThreshold and gch.recGcLock == 0:
+    collectCTBody(gch)
+when not defined(useNimRtl):
+  proc GC_disable() = 
+    when hasThreadSupport and hasSharedHeap:
+      atomicInc(gch.recGcLock, 1)
+    else:
+      inc(gch.recGcLock)
+  proc GC_enable() =
+    if gch.recGcLock > 0: 
+      when hasThreadSupport and hasSharedHeap:
+        atomicDec(gch.recGcLock, 1)
+      else:
+        dec(gch.recGcLock)
+  proc GC_setStrategy(strategy: TGC_Strategy) = nil
+  proc GC_enableMarkAndSweep() =
+    gch.cycleThreshold = InitialCycleThreshold
+  proc GC_disableMarkAndSweep() =
+    gch.cycleThreshold = high(gch.cycleThreshold)-1
+    # set to the max value to suppress the cycle detector
+  proc GC_fullCollect() =
+    acquire(gch)
+    var oldThreshold = gch.cycleThreshold
+    gch.cycleThreshold = 0 # forces cycle collection
+    collectCT(gch)
+    gch.cycleThreshold = oldThreshold
+    release(gch)
+  proc GC_getStatistics(): string =
+    GC_disable()
+    result = "[GC] total memory: " & $getTotalMem() & "\n" &
+             "[GC] occupied memory: " & $getOccupiedMem() & "\n" &
+             "[GC] stack scans: " & $gch.stat.stackScans & "\n" &
+             "[GC] stack cells: " & $gch.stat.maxStackCells & "\n" &
+             "[GC] collections: " & $gch.stat.collections & "\n" &
+             "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
+             "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
+             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
+    when traceGC: writeLeakage()
+    GC_enable()
diff --git a/lib/system/mmdisp.nim b/lib/system/mmdisp.nim
index 4b5509774..eee98fd52 100755
--- a/lib/system/mmdisp.nim
+++ b/lib/system/mmdisp.nim
@@ -309,6 +309,9 @@ else:
     sysAssert(sizeof(TCell) == sizeof(TFreeCell), "sizeof TFreeCell")
   when compileOption("gc", "v2"):
     include "system/gc2"
+  elif defined(gcMarkAndSweep):
+    # XXX use 'compileOption' here
+    include "system/gc_ms"
     include "system/gc"
diff --git a/lib/system/sysstr.nim b/lib/system/sysstr.nim
index bbb86d329..2e60c6153 100755
--- a/lib/system/sysstr.nim
+++ b/lib/system/sysstr.nim
@@ -202,7 +202,8 @@ proc setLengthSeq(seq: PGenericSeq, elemSize, newLen: int): PGenericSeq {.
   elif newLen < result.len:
     # we need to decref here, otherwise the GC leaks!
-    when not defined(boehmGC) and not defined(nogc):
+    when not defined(boehmGC) and not defined(nogc) and 
+         not defined(gcMarkAndSweep):
       when compileOption("gc", "v2"):
         for i in newLen..result.len-1:
           let len0 = gch.tempStack.len