summary refs log tree commit diff stats
path: root/lib/system
diff options
context:
space:
mode:
Diffstat (limited to 'lib/system')
-rw-r--r--lib/system/excpt.nim4
-rw-r--r--lib/system/gc.nim184
-rw-r--r--lib/system/gc_common.nim275
-rw-r--r--lib/system/gc_ms.nim176
4 files changed, 331 insertions, 308 deletions
diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim
index 5d2faa2d6..df28c1493 100644
--- a/lib/system/excpt.nim
+++ b/lib/system/excpt.nim
@@ -44,10 +44,12 @@ var
     # a global variable for the root of all try blocks
   currException {.threadvar.}: ref Exception
 
+proc getFrame*(): PFrame {.compilerRtl, inl.} = framePtr
+
 proc popFrame {.compilerRtl, inl.} =
   framePtr = framePtr.prev
 
-proc setFrame(s: PFrame) {.compilerRtl, inl.} =
+proc setFrame*(s: PFrame) {.compilerRtl, inl.} =
   framePtr = s
 
 proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} =
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index ae8bb724f..df7864cff 100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -16,6 +16,9 @@
 # Special care has been taken to avoid recursion as far as possible to avoid
 # stack overflows when traversing deep datastructures. It is well-suited
 # for soft real time applications (like games).
+
+import arch
+
 {.push profiler:off.}
 
 const
@@ -64,8 +67,16 @@ type
     cycleTableSize: int      # max entries in cycle table
     maxPause: int64          # max measured GC pause in nanoseconds
 
+  GcStack {.final.} = object
+    prev: ptr GcStack
+    next: ptr GcStack
+    starts: pointer
+    pos: pointer
+    maxStackSize: int
+
   GcHeap {.final, pure.} = object # this contains the zero count and
                                    # non-zero count table
+    stack: ptr GcStack
     stackBottom: pointer
     cycleThreshold: int
     when useCellIds:
@@ -154,7 +165,7 @@ template gcTrace(cell, state: expr): stmt {.immediate.} =
 
 # forward declarations:
 proc collectCT(gch: var GcHeap) {.benign.}
-proc isOnStack*(p: pointer): bool {.noinline, benign.}
+proc isOnStack(p: pointer): bool {.noinline, benign.}
 proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
 proc doOperation(p: pointer, op: WalkOp) {.benign.}
 proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
@@ -293,20 +304,6 @@ proc initGC() =
     when useMarkForDebug or useBackupGc:
       init(gch.marked)
 
-var
-  localGcInitialized {.rtlThreadVar.}: bool
-
-proc setupForeignThreadGc*() =
-  ## call this if you registered a callback that will be run from a thread not
-  ## under your control. This has a cheap thread-local guard, so the GC for
-  ## this thread will only be initialized once per thread, no matter how often
-  ## it is called.
-  if not localGcInitialized:
-    localGcInitialized = true
-    var stackTop {.volatile.}: pointer
-    setStackBottom(addr(stackTop))
-    initGC()
-
 when useMarkForDebug or useBackupGc:
   type
     GlobalMarkerProc = proc () {.nimcall, benign.}
@@ -816,138 +813,7 @@ proc markThreadStacks(gch: var GcHeap) =
         sp = sp +% sizeof(pointer)
       it = it.next
 
-# ----------------- stack management --------------------------------------
-#  inspired from Smart Eiffel
-
-when defined(sparc):
-  const stackIncreases = false
-elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
-     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
-  const stackIncreases = true
-else:
-  const stackIncreases = false
-
-when not defined(useNimRtl):
-  {.push stack_trace: off.}
-  proc setStackBottom(theStackBottom: pointer) =
-    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
-    # the first init must be the one that defines the stack bottom:
-    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
-    else:
-      var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[ByteAddress](gch.stackBottom)
-      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
-      when stackIncreases:
-        gch.stackBottom = cast[pointer](min(a, b))
-      else:
-        gch.stackBottom = cast[pointer](max(a, b))
-  {.pop.}
-
-proc stackSize(): int {.noinline.} =
-  var stackTop {.volatile.}: pointer
-  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
-
-when defined(sparc): # For SPARC architecture.
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[TAddress](gch.stackBottom)
-    var a = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-
-  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
-    when defined(sparcv9):
-      asm  """"flushw \n" """
-    else:
-      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
-
-    var
-      max = gch.stackBottom
-      sp: PPointer
-      stackTop: array[0..1, pointer]
-    sp = addr(stackTop[0])
-    # Addresses decrease as the stack grows.
-    while sp <= max:
-      gcMark(gch, sp[])
-      sp = cast[PPointer](cast[TAddress](sp) +% sizeof(pointer))
-
-elif defined(ELATE):
-  {.error: "stack marking code is to be written for this architecture".}
-
-elif stackIncreases:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses increase as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var a = cast[TAddress](gch.stackBottom)
-    var b = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-
-  var
-    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-      # a little hack to get the size of a JmpBuf in the generated C code
-      # in a platform independent way
-
-  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[TAddress](gch.stackBottom)
-      var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
-      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
-      # otherwise sp would be below the registers structure).
-      while sp >=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp -% sizeof(pointer)
-
-else:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses decrease as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
-    # We use a jmp_buf buffer that is in the C stack.
-    # Used to traverse the stack and registers assuming
-    # that 'setjmp' will save registers in the C stack.
-    type PStackSlice = ptr array [0..7, pointer]
-    var registers {.noinit.}: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[ByteAddress](gch.stackBottom)
-      var sp = cast[ByteAddress](addr(registers))
-      when defined(amd64):
-        # words within the jmp_buf structure may not be properly aligned.
-        let regEnd = sp +% sizeof(registers)
-        while sp <% regEnd:
-          gcMark(gch, cast[PPointer](sp)[])
-          gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
-          sp = sp +% sizeof(pointer)
-      # Make sure sp is word-aligned
-      sp = sp and not (sizeof(pointer) - 1)
-      # loop unrolled:
-      while sp <% max - 8*sizeof(pointer):
-        gcMark(gch, cast[PStackSlice](sp)[0])
-        gcMark(gch, cast[PStackSlice](sp)[1])
-        gcMark(gch, cast[PStackSlice](sp)[2])
-        gcMark(gch, cast[PStackSlice](sp)[3])
-        gcMark(gch, cast[PStackSlice](sp)[4])
-        gcMark(gch, cast[PStackSlice](sp)[5])
-        gcMark(gch, cast[PStackSlice](sp)[6])
-        gcMark(gch, cast[PStackSlice](sp)[7])
-        sp = sp +% sizeof(pointer)*8
-      # last few entries:
-      while sp <=% max:
-        gcMark(gch, cast[PPointer](sp)[])
-        sp = sp +% sizeof(pointer)
+include gc_common
 
 proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
   forEachStackSlot(gch, gcMark)
@@ -956,10 +822,6 @@ when useMarkForDebug or useBackupGc:
   proc markStackAndRegistersForSweep(gch: var GcHeap) =
     forEachStackSlot(gch, stackMarkS)
 
-# ----------------------------------------------------------------------------
-# end of non-portable code
-# ----------------------------------------------------------------------------
-
 proc collectZCT(gch: var GcHeap): bool =
   # Note: Freeing may add child objects to the ZCT! So essentially we do
   # deep freeing, which is bad for incremental operation. In order to
@@ -1033,7 +895,8 @@ proc collectCTBody(gch: var GcHeap) =
     let t0 = getticks()
   sysAssert(allocInv(gch.region), "collectCT: begin")
 
-  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+  when not defined(nimCoroutines):
+    gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
   sysAssert(gch.decStack.len == 0, "collectCT")
   prepareForInteriorPointerChecking(gch.region)
   markStackAndRegisters(gch)
@@ -1064,11 +927,19 @@ when useMarkForDebug or useBackupGc:
     markStackAndRegistersForSweep(gch)
     markGlobals(gch)
 
+when defined(nimCoroutines):
+  proc currentStackSizes(): int =
+    for stack in items(gch.stack):
+      result = result + stackSize(stack.starts, stack.pos)
+
 proc collectCT(gch: var GcHeap) =
   # stackMarkCosts prevents some pathological behaviour: Stack marking
   # becomes more expensive with large stacks and large stacks mean that
   # cells with RC=0 are more likely to be kept alive by the stack.
-  let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
+  when defined(nimCoroutines):
+    let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold)
+  else:
+    let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
   if (gch.zct.len >= stackMarkCosts or (cycleGC and
       getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
       gch.recGcLock == 0:
@@ -1137,8 +1008,13 @@ when not defined(useNimRtl):
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
              "[GC] zct capacity: " & $gch.zct.cap & "\n" &
              "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
-             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" &
              "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
+    when defined(nimCoroutines):
+      result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
+      for stack in items(gch.stack):
+        result = result & "[GC]   stack " & stack.starts.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
+    else:
+      result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
     GC_enable()
 
 {.pop.}
diff --git a/lib/system/gc_common.nim b/lib/system/gc_common.nim
new file mode 100644
index 000000000..c7dd667e4
--- /dev/null
+++ b/lib/system/gc_common.nim
@@ -0,0 +1,275 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Rokas Kupstys
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+proc len(stack: ptr GcStack): int =
+  if stack == nil:
+    return 0
+
+  var s = stack
+  result = 1
+  while s.next != nil:
+    inc(result)
+    s = s.next
+
+when defined(nimCoroutines):
+  proc stackSize(stackBottom: pointer, pos: pointer=nil): int {.noinline.} =
+    var sp: pointer
+    if pos == nil:
+      var stackTop {.volatile.}: pointer
+      sp = addr(stackTop)
+    else:
+      sp = pos
+    result = abs(cast[int](sp) - cast[int](stackBottom))
+
+  proc GC_addStack*(starts: pointer) {.cdecl, exportc.} =
+    var sp {.volatile.}: pointer
+    var stack = cast[ptr GcStack](alloc0(sizeof(GcStack)))
+    stack.starts = starts
+    stack.pos = addr sp
+    if gch.stack == nil:
+      gch.stack = stack
+    else:
+      stack.next = gch.stack
+      gch.stack.prev = stack
+      gch.stack = stack
+    # c_fprintf(c_stdout, "[GC] added stack 0x%016X\n", starts)
+
+  proc GC_removeStack*(starts: pointer) {.cdecl, exportc.} =
+    var stack = gch.stack
+    while stack != nil:
+      if stack.starts == starts:
+        if stack.prev == nil:
+          if stack.next != nil:
+            stack.next.prev = nil
+          gch.stack = stack.next
+        else:
+          stack.prev.next = stack.next
+          if stack.next != nil:
+              stack.next.prev = stack.prev
+        dealloc(stack)
+        # echo "[GC] removed stack ", starts.repr
+        break
+      else:
+        stack = stack.next
+
+  proc GC_setCurrentStack*(starts, pos: pointer) {.cdecl, exportc.} =
+    var stack = gch.stack
+    while stack != nil:
+      if stack.starts == starts:
+        stack.pos = pos
+        stack.maxStackSize = max(stack.maxStackSize, stackSize(stack.starts, pos))
+        return
+      stack = stack.next
+    gcAssert(false, "Current stack position does not belong to registered stack")
+else:
+  proc stackSize(): int {.noinline.} =
+    var stackTop {.volatile.}: pointer
+    result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
+
+iterator items(stack: ptr GcStack): ptr GcStack =
+  var s = stack
+  while not isNil(s):
+    yield s
+    s = s.next
+
+var
+  localGcInitialized {.rtlThreadVar.}: bool
+
+proc setupForeignThreadGc*() =
+  ## call this if you registered a callback that will be run from a thread not
+  ## under your control. This has a cheap thread-local guard, so the GC for
+  ## this thread will only be initialized once per thread, no matter how often
+  ## it is called.
+  if not localGcInitialized:
+    localGcInitialized = true
+    var stackTop {.volatile.}: pointer
+    setStackBottom(addr(stackTop))
+    initGC()
+
+# ----------------- stack management --------------------------------------
+#  inspired from Smart Eiffel
+
+when defined(sparc):
+  const stackIncreases = false
+elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
+     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
+  const stackIncreases = true
+else:
+  const stackIncreases = false
+
+when not defined(useNimRtl):
+  {.push stack_trace: off.}
+  proc setStackBottom(theStackBottom: pointer) =
+    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
+    # the first init must be the one that defines the stack bottom:
+    when defined(nimCoroutines):
+      GC_addStack(theStackBottom)
+    else:
+      if gch.stackBottom == nil: gch.stackBottom = theStackBottom
+      else:
+        var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
+        var b = cast[ByteAddress](gch.stackBottom)
+        #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
+        when stackIncreases:
+          gch.stackBottom = cast[pointer](min(a, b))
+        else:
+          gch.stackBottom = cast[pointer](max(a, b))
+  {.pop.}
+
+when defined(sparc): # For SPARC architecture.
+  when defined(nimCoroutines):
+    {.error: "Nim coroutines are not supported on this platform."}
+
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var b = cast[TAddress](gch.stackBottom)
+    var a = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+
+  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
+    when defined(sparcv9):
+      asm  """"flushw \n" """
+    else:
+      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
+
+    var
+      max = gch.stackBottom
+      sp: PPointer
+      stackTop: array[0..1, pointer]
+    sp = addr(stackTop[0])
+    # Addresses decrease as the stack grows.
+    while sp <= max:
+      gcMark(gch, sp[])
+      sp = cast[PPointer](cast[TAddress](sp) +% sizeof(pointer))
+
+elif defined(ELATE):
+  {.error: "stack marking code is to be written for this architecture".}
+
+elif stackIncreases:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses increase as the stack grows.
+  # ---------------------------------------------------------------------------
+  when defined(nimCoroutines):
+    {.error: "Nim coroutines are not supported on this platform."}
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var a = cast[TAddress](gch.stackBottom)
+    var b = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+
+  var
+    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
+      # a little hack to get the size of a JmpBuf in the generated C code
+      # in a platform independent way
+
+  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
+    var registers: C_JmpBuf
+    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+      var max = cast[TAddress](gch.stackBottom)
+      var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
+      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
+      # otherwise sp would be below the registers structure).
+      while sp >=% max:
+        gcMark(gch, cast[ppointer](sp)[])
+        sp = sp -% sizeof(pointer)
+
+else:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses decrease as the stack grows.
+  # ---------------------------------------------------------------------------
+  when defined(nimCoroutines):
+    proc isOnStack(p: pointer): bool =
+      var stackTop {.volatile.}: pointer
+      stackTop = addr(stackTop)
+      for stack in items(gch.stack):
+        var b = cast[ByteAddress](stack.starts)
+        var a = cast[ByteAddress](stack.starts) - stack.maxStackSize
+        var x = cast[ByteAddress](p)
+        if a <=% x and x <=% b:
+          return true
+  
+    template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
+      # We use a jmp_buf buffer that is in the C stack.
+      # Used to traverse the stack and registers assuming
+      # that 'setjmp' will save registers in the C stack.
+      type PStackSlice = ptr array [0..7, pointer]
+      var registers {.noinit.}: Registers
+      getRegisters(registers)
+      for i in registers.low .. registers.high:
+        gcMark(gch, cast[PPointer](registers[i]))
+  
+      for stack in items(gch.stack):
+        stack.maxStackSize = max(stack.maxStackSize, stackSize(stack.starts))
+        var max = cast[ByteAddress](stack.starts)
+        var sp = cast[ByteAddress](stack.pos)
+        # loop unrolled:
+        while sp <% max - 8*sizeof(pointer):
+          gcMark(gch, cast[PStackSlice](sp)[0])
+          gcMark(gch, cast[PStackSlice](sp)[1])
+          gcMark(gch, cast[PStackSlice](sp)[2])
+          gcMark(gch, cast[PStackSlice](sp)[3])
+          gcMark(gch, cast[PStackSlice](sp)[4])
+          gcMark(gch, cast[PStackSlice](sp)[5])
+          gcMark(gch, cast[PStackSlice](sp)[6])
+          gcMark(gch, cast[PStackSlice](sp)[7])
+          sp = sp +% sizeof(pointer)*8
+        # last few entries:
+        while sp <=% max:
+          gcMark(gch, cast[PPointer](sp)[])
+          sp = sp +% sizeof(pointer)
+  else:
+    proc isOnStack(p: pointer): bool =
+      var stackTop {.volatile.}: pointer
+      stackTop = addr(stackTop)
+      var b = cast[ByteAddress](gch.stackBottom)
+      var a = cast[ByteAddress](stackTop)
+      var x = cast[ByteAddress](p)
+      result = a <=% x and x <=% b
+
+    template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
+      # We use a jmp_buf buffer that is in the C stack.
+      # Used to traverse the stack and registers assuming
+      # that 'setjmp' will save registers in the C stack.
+      type PStackSlice = ptr array [0..7, pointer]
+      var registers {.noinit.}: C_JmpBuf
+      if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+        var max = cast[ByteAddress](gch.stackBottom)
+        var sp = cast[ByteAddress](addr(registers))
+        when defined(amd64):
+          # words within the jmp_buf structure may not be properly aligned.
+          let regEnd = sp +% sizeof(registers)
+          while sp <% regEnd:
+            gcMark(gch, cast[PPointer](sp)[])
+            gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
+            sp = sp +% sizeof(pointer)
+        # Make sure sp is word-aligned
+        sp = sp and not (sizeof(pointer) - 1)
+        # loop unrolled:
+        while sp <% max - 8*sizeof(pointer):
+          gcMark(gch, cast[PStackSlice](sp)[0])
+          gcMark(gch, cast[PStackSlice](sp)[1])
+          gcMark(gch, cast[PStackSlice](sp)[2])
+          gcMark(gch, cast[PStackSlice](sp)[3])
+          gcMark(gch, cast[PStackSlice](sp)[4])
+          gcMark(gch, cast[PStackSlice](sp)[5])
+          gcMark(gch, cast[PStackSlice](sp)[6])
+          gcMark(gch, cast[PStackSlice](sp)[7])
+          sp = sp +% sizeof(pointer)*8
+        # last few entries:
+        while sp <=% max:
+          gcMark(gch, cast[PPointer](sp)[])
+          sp = sp +% sizeof(pointer)
+
+# ----------------------------------------------------------------------------
+# end of non-portable code
+# ----------------------------------------------------------------------------
diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim
index ee80c61e9..2973d1b9a 100644
--- a/lib/system/gc_ms.nim
+++ b/lib/system/gc_ms.nim
@@ -9,6 +9,9 @@
 
 # A simple mark&sweep garbage collector for Nim. Define the
 # symbol ``gcUseBitvectors`` to generate a variant of this GC.
+
+import arch
+
 {.push profiler:off.}
 
 const
@@ -44,8 +47,16 @@ type
     maxStackSize: int        # max stack size
     freedObjects: int        # max entries in cycle table
 
+  GcStack {.final.} = object
+    prev: ptr GcStack
+    next: ptr GcStack
+    starts: pointer
+    pos: pointer
+    maxStackSize: int
+
   GcHeap = object            # this contains the zero count and
                              # non-zero count table
+    stack: ptr GcStack
     stackBottom: pointer
     cycleThreshold: int
     when useCellIds:
@@ -118,7 +129,6 @@ when BitsPerPage mod (sizeof(int)*8) != 0:
 
 # forward declarations:
 proc collectCT(gch: var GcHeap) {.benign.}
-proc isOnStack*(p: pointer): bool {.noinline, benign.}
 proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
 proc doOperation(p: pointer, op: WalkOp) {.benign.}
 proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
@@ -169,20 +179,6 @@ proc initGC() =
       init(gch.allocated)
       init(gch.marked)
 
-var
-  localGcInitialized {.rtlThreadVar.}: bool
-
-proc setupForeignThreadGc*() =
-  ## call this if you registered a callback that will be run from a thread not
-  ## under your control. This has a cheap thread-local guard, so the GC for
-  ## this thread will only be initialized once per thread, no matter how often
-  ## it is called.
-  if not localGcInitialized:
-    localGcInitialized = true
-    var stackTop {.volatile.}: pointer
-    setStackBottom(addr(stackTop))
-    initGC()
-
 proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
   var d = cast[ByteAddress](dest)
   case n.kind
@@ -407,145 +403,14 @@ proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
     if objStart != nil:
       mark(gch, objStart)
 
-# ----------------- stack management --------------------------------------
-#  inspired from Smart Eiffel
+include gc_common
 
-when defined(sparc):
-  const stackIncreases = false
-elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
-     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
-  const stackIncreases = true
-else:
-  const stackIncreases = false
-
-when not defined(useNimRtl):
-  {.push stack_trace: off.}
-  proc setStackBottom(theStackBottom: pointer) =
-    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
-    # the first init must be the one that defines the stack bottom:
-    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
-    else:
-      var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[ByteAddress](gch.stackBottom)
-      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
-      when stackIncreases:
-        gch.stackBottom = cast[pointer](min(a, b))
-      else:
-        gch.stackBottom = cast[pointer](max(a, b))
-  {.pop.}
-
-proc stackSize(): int {.noinline.} =
-  var stackTop {.volatile.}: pointer
-  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
-
-when defined(sparc): # For SPARC architecture.
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-    when defined(sparcv9):
-      asm  """"flushw \n" """
-    else:
-      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
-
-    var
-      max = gch.stackBottom
-      sp: PPointer
-      stackTop: array[0..1, pointer]
-    sp = addr(stackTop[0])
-    # Addresses decrease as the stack grows.
-    while sp <= max:
-      gcMark(gch, sp[])
-      sp = cast[ppointer](cast[ByteAddress](sp) +% sizeof(pointer))
-
-elif defined(ELATE):
-  {.error: "stack marking code is to be written for this architecture".}
-
-elif stackIncreases:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses increase as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var a = cast[ByteAddress](gch.stackBottom)
-    var b = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  var
-    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-      # a little hack to get the size of a JmpBuf in the generated C code
-      # in a platform independent way
-
-  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[ByteAddress](gch.stackBottom)
-      var sp = cast[ByteAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
-      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
-      # otherwise sp would be below the registers structure).
-      while sp >=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp -% sizeof(pointer)
-
-else:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses decrease as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-    # We use a jmp_buf buffer that is in the C stack.
-    # Used to traverse the stack and registers assuming
-    # that 'setjmp' will save registers in the C stack.
-    type PStackSlice = ptr array [0..7, pointer]
-    var registers {.noinit.}: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[ByteAddress](gch.stackBottom)
-      var sp = cast[ByteAddress](addr(registers))
-      when defined(amd64):
-        # words within the jmp_buf structure may not be properly aligned.
-        let regEnd = sp +% sizeof(registers)
-        while sp <% regEnd:
-          gcMark(gch, cast[PPointer](sp)[])
-          gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
-          sp = sp +% sizeof(pointer)
-      # Make sure sp is word-aligned
-      sp = sp and not (sizeof(pointer) - 1)
-      # loop unrolled:
-      while sp <% max - 8*sizeof(pointer):
-        gcMark(gch, cast[PStackSlice](sp)[0])
-        gcMark(gch, cast[PStackSlice](sp)[1])
-        gcMark(gch, cast[PStackSlice](sp)[2])
-        gcMark(gch, cast[PStackSlice](sp)[3])
-        gcMark(gch, cast[PStackSlice](sp)[4])
-        gcMark(gch, cast[PStackSlice](sp)[5])
-        gcMark(gch, cast[PStackSlice](sp)[6])
-        gcMark(gch, cast[PStackSlice](sp)[7])
-        sp = sp +% sizeof(pointer)*8
-      # last few entries:
-      while sp <=% max:
-        gcMark(gch, cast[PPointer](sp)[])
-        sp = sp +% sizeof(pointer)
-
-# ----------------------------------------------------------------------------
-# end of non-portable code
-# ----------------------------------------------------------------------------
+proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
+  forEachStackSlot(gch, gcMark)
 
 proc collectCTBody(gch: var GcHeap) =
-  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+  when not defined(nimCoroutines):
+    gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
   prepareForInteriorPointerChecking(gch.region)
   markStackAndRegisters(gch)
   markGlobals(gch)
@@ -599,8 +464,13 @@ when not defined(useNimRtl):
              "[GC] occupied memory: " & $getOccupiedMem() & "\n" &
              "[GC] collections: " & $gch.stat.collections & "\n" &
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
-             "[GC] freed objects: " & $gch.stat.freedObjects & "\n" &
-             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
+             "[GC] freed objects: " & $gch.stat.freedObjects & "\n"
+    when defined(nimCoroutines):
+      result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
+      for stack in items(gch.stack):
+        result = result & "[GC]   stack " & stack.starts.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
+    else:
+      result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
     GC_enable()
 
 {.pop.}