summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--config/nim.cfg1
-rw-r--r--lib/arch/arch.nim59
-rw-r--r--lib/arch/i386.asm79
-rw-r--r--lib/arch/ms_amd64.asm90
-rw-r--r--lib/arch/ms_i386.asm12
-rw-r--r--lib/arch/unix_amd64.asm89
-rw-r--r--lib/arch/unix_i386.asm12
-rw-r--r--lib/nimbase.h37
-rw-r--r--lib/pure/coro.nim145
-rw-r--r--lib/system/excpt.nim4
-rw-r--r--lib/system/gc.nim184
-rw-r--r--lib/system/gc_common.nim275
-rw-r--r--lib/system/gc_ms.nim176
13 files changed, 843 insertions, 320 deletions
diff --git a/config/nim.cfg b/config/nim.cfg
index 09864e0e5..9a2a4ff71 100644
--- a/config/nim.cfg
+++ b/config/nim.cfg
@@ -48,6 +48,7 @@ path="$lib/windows"
 path="$lib/posix"
 path="$lib/js"
 path="$lib/pure/unidecode"
+path="$lib/arch"
 
 @if nimbabel:
   babelpath="$home/.babel/pkgs/"
diff --git a/lib/arch/arch.nim b/lib/arch/arch.nim
new file mode 100644
index 000000000..a11bfb21f
--- /dev/null
+++ b/lib/arch/arch.nim
@@ -0,0 +1,59 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Rokas Kupstys
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+when defined(windows):
+  const
+    ABI* = "ms"
+elif defined(unix):
+  const
+    ABI* = "unix"
+else:
+  {.error: "Unsupported ABI".}
+
+when defined(amd64):
+  when defined(unix):
+    # unix (sysv) ABI
+    type
+      JmpBufReg* {.pure.} = enum
+        BX, BP, R12, R13, R14, R15, SP, IP, TOTAL
+  elif defined(windows):
+    # ms ABI
+    type
+      JmpBufReg* {.pure.} = enum
+        BX, BP, R12, R13, R14, R15, SP, IP, SI, DI, TOTAL
+  type
+    Reg* {.pure.} = enum
+      AX, BX, CX, DX, SI, DI, BP, SP, IP, R8, R9, R10, R11, R12, R13, R14, R15, TOTAL
+
+elif defined(i386):
+    # identical fastcall calling convention on all x86 OS
+    type
+      JmpBufReg* {.pure.} = enum
+        BX, SI, DI, BP, SP, IP, TOTAL
+
+      Reg* {.pure.} = enum
+        AX, BX, CX, BP, SP, DI, SI, TOTAL
+
+else:
+  {.error: "Unsupported architecture".}
+
+{.compile: "./" & ABI & "_" & hostCPU & ".asm"}
+
+type
+  JmpBuf* = array[JmpBufReg.TOTAL, pointer]
+  Registers* = array[Reg.TOTAL, pointer]
+
+
+proc getRegisters*(ctx: var Registers) {.importc: "narch_$1", fastcall.}
+
+proc setjmp*(ctx: var JmpBuf): int {.importc: "narch_$1", fastcall.}
+proc longjmp*(ctx: JmpBuf, ret=1) {.importc: "narch_$1", fastcall.}
+
+proc coroSwitchStack*(sp: pointer) {.importc: "narch_$1", fastcall.}
+proc coroRestoreStack*() {.importc: "narch_$1", fastcall.}
diff --git a/lib/arch/i386.asm b/lib/arch/i386.asm
new file mode 100644
index 000000000..61f6fdda7
--- /dev/null
+++ b/lib/arch/i386.asm
@@ -0,0 +1,79 @@
+;
+;
+;            Nim's Runtime Library
+;        (c) Copyright 2015 Rokas Kupstys
+;
+;    See the file "copying.txt", included in this
+;    distribution, for details about the copyright.
+;
+
+section ".text" executable
+public narch_getRegisters
+public @narch_getRegisters@4
+public narch_setjmp
+public @narch_setjmp@4
+public narch_longjmp
+public @narch_longjmp@8
+public narch_coroSwitchStack
+public @narch_coroSwitchStack@4
+public narch_coroRestoreStack
+public @narch_coroRestoreStack@0
+
+@narch_getRegisters@4:
+narch_getRegisters:
+    mov   [ecx], eax
+    mov   [ecx+4], ebx
+    mov   [ecx+8], ecx
+    mov   [ecx+0Ch], ebp
+    mov   [ecx+10h], esp
+    mov   [ecx+14h], edi
+    mov   [ecx+18h], esi
+    ret
+
+
+@narch_setjmp@4:
+narch_setjmp:
+    ; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
+    mov  [ecx], ebx
+    mov  [ecx+4], esi
+    mov  [ecx+8], edi
+    mov  [ecx+0Ch], ebp
+    lea  eax, [esp+4]
+    mov  [ecx+10h], eax
+    mov  eax, [esp]
+    mov  [ecx+14h], eax
+    xor  eax, eax
+    ret
+
+
+@narch_longjmp@8:
+narch_longjmp:
+    ; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
+    mov  eax, edx
+    test eax, eax
+    jnz  @F
+    inc  eax
+@@:
+    mov  ebx, [ecx]
+    mov  esi, [ecx+4]
+    mov  edi, [ecx+8]
+    mov  ebp, [ecx+0Ch]
+    mov  esp, [ecx+10h]
+    mov  edx, [ecx+14h]
+    jmp  edx
+
+
+@narch_coroSwitchStack@4:
+narch_coroSwitchStack:
+    pop eax                   ; return address
+    mov edx, esp              ; old esp for saving
+    mov esp, ecx              ; swap stack with one passed to func
+    push edx                  ; store old stack pointer on newly switched stack
+    jmp eax                   ; return
+
+
+@narch_coroRestoreStack@0:
+narch_coroRestoreStack:
+    pop eax                   ; return address
+    pop esp                   ; resture old stack pointer
+    jmp eax                   ; return
diff --git a/lib/arch/ms_amd64.asm b/lib/arch/ms_amd64.asm
new file mode 100644
index 000000000..0503b31c9
--- /dev/null
+++ b/lib/arch/ms_amd64.asm
@@ -0,0 +1,90 @@
+;
+;
+;            Nim's Runtime Library
+;        (c) Copyright 2015 Rokas Kupstys
+;
+;    See the file "copying.txt", included in this
+;    distribution, for details about the copyright.
+;
+
+format MS64 COFF
+
+section ".text" executable align 16
+public narch_getRegisters
+public narch_setjmp
+public narch_longjmp
+public narch_coroSwitchStack
+public narch_coroRestoreStack
+
+
+narch_getRegisters:
+    mov   [rcx], rax
+    mov   [rcx+8], rbx
+    mov   [rcx+10h], rcx
+    mov   [rcx+18h], rdx
+    mov   [rcx+20h], rsi
+    mov   [rcx+28h], rdi
+    mov   [rcx+30h], rbp
+    mov   [rcx+38h], rsp
+    mov   rax, [rsp]
+    mov   [rcx+40h], rax      ; rip
+    mov   [rcx+48h], r8
+    mov   [rcx+50h], r9
+    mov   [rcx+58h], r10
+    mov   [rcx+60h], r11
+    mov   [rcx+68h], r12
+    mov   [rcx+70h], r13
+    mov   [rcx+78h], r14
+    mov   [rcx+80h], r15
+    ret
+
+
+narch_setjmp:
+    ; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
+    mov   [rcx], rbx          ; rcx is jmp_buf, move registers onto it
+    mov   [rcx+8], rbp
+    mov   [rcx+10h], r12
+    mov   [rcx+18h], r13
+    mov   [rcx+20h], r14
+    mov   [rcx+28h], r15
+    lea   rdx, [rsp+8]        ; this is our rsp WITHOUT current ret addr
+    mov   [rcx+30h], rdx
+    mov   rdx, [rsp]          ; save return addr ptr for new rip
+    mov   [rcx+38h], rdx
+    mov   [rcx+40h], rsi
+    mov   [rcx+48h], rdi
+    xor   rax, rax            ; always return 0
+    ret
+
+narch_longjmp:
+    ; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
+    mov   rax, rdx            ; val will be longjmp return
+    test  rax, rax
+    jnz   @F
+    inc   rax                 ; if val==0, val=1 per longjmp semantics
+@@:
+    mov   rbx, [rcx]          ; rax is the jmp_buf, restore regs from it
+    mov   rbp, [rcx+8]
+    mov   r12, [rcx+10h]
+    mov   r13, [rcx+18h]
+    mov   r14, [rcx+20h]
+    mov   r15, [rcx+28h]
+    mov   rsp, [rcx+30h]      ; this ends up being the stack pointer
+    mov   rdx, [rcx+38h]      ; this is the instruction pointer
+    jmp   rdx                 ; goto saved address without altering rsp
+
+
+narch_coroSwitchStack:
+    pop rax                   ; return address
+    mov rdx, rsp              ; old rsp for saving
+    mov rsp, rcx              ; swap stack with one passed to func
+    push rdx                  ; store old stack pointer on newly switched stack
+    sub rsp, 28h              ; stack alignment + shadow space
+    jmp rax                   ; return
+
+
+narch_coroRestoreStack:
+    pop rax                   ; return address
+    add rsp, 28h              ; stack alignment + shadow space
+    pop rsp                   ; resture old stack pointer
+    jmp rax                   ; return
diff --git a/lib/arch/ms_i386.asm b/lib/arch/ms_i386.asm
new file mode 100644
index 000000000..a31a698d1
--- /dev/null
+++ b/lib/arch/ms_i386.asm
@@ -0,0 +1,12 @@
+;
+;
+;            Nim's Runtime Library
+;        (c) Copyright 2015 Rokas Kupstys
+;
+;    See the file "copying.txt", included in this
+;    distribution, for details about the copyright.
+;
+
+format MS COFF
+
+include 'i386.asm'
diff --git a/lib/arch/unix_amd64.asm b/lib/arch/unix_amd64.asm
new file mode 100644
index 000000000..3005c150c
--- /dev/null
+++ b/lib/arch/unix_amd64.asm
@@ -0,0 +1,89 @@
+;
+;
+;            Nim's Runtime Library
+;        (c) Copyright 2015 Rokas Kupstys
+;
+;    See the file "copying.txt", included in this
+;    distribution, for details about the copyright.
+;
+
+format ELF64
+
+section ".text" executable align 16
+public narch_getRegisters
+public narch_setjmp
+public narch_longjmp
+public narch_coroSwitchStack
+public narch_coroRestoreStack
+
+
+narch_getRegisters:
+    mov   [rdi], rax
+    mov   [rdi+8], rbx
+    mov   [rdi+10h], rcx
+    mov   [rdi+18h], rdx
+    mov   [rdi+20h], rsi
+    mov   [rdi+28h], rdi
+    mov   [rdi+30h], rbp
+    mov   [rdi+38h], rsp
+    mov   rax, [rsp]
+    mov   [rdi+40h], rax      ; rip
+    mov   [rdi+48h], r8
+    mov   [rdi+50h], r9
+    mov   [rdi+58h], r10
+    mov   [rdi+60h], r11
+    mov   [rdi+68h], r12
+    mov   [rdi+70h], r13
+    mov   [rdi+78h], r14
+    mov   [rdi+80h], r15
+    ret
+
+
+narch_setjmp:
+    ; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
+    mov   [rdi], rbx          ; rdi is jmp_buf, move registers onto it
+    mov   [rdi+8], rbp
+    mov   [rdi+10h], r12
+    mov   [rdi+18h], r13
+    mov   [rdi+20h], r14
+    mov   [rdi+28h], r15
+    lea   rdx, [rsp+8]        ; this is our rsp WITHOUT current ret addr
+    mov   [rdi+30h], rdx
+    mov   rdx, [rsp]          ; save return addr ptr for new rip
+    mov   [rdi+38h], rdx
+    xor   rax, rax            ; always return 0
+    ret
+
+
+narch_longjmp:
+    ; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
+    mov   rax, rsi            ; val will be longjmp return
+    test  rax, rax
+    jnz   @F
+    inc   rax                 ; if val==0, val=1 per longjmp semantics
+@@:
+    mov   rbx, [rdi]          ; rdi is the jmp_buf, restore regs from it
+    mov   rbp, [rdi+8]
+    mov   r12, [rdi+10h]
+    mov   r13, [rdi+18h]
+    mov   r14, [rdi+20h]
+    mov   r15, [rdi+28h]
+    mov   rsp, [rdi+30h]      ; this ends up being the stack pointer
+    mov   rdx, [rdi+38h]      ; this is the instruction pointer
+    jmp   rdx                 ; goto saved address without altering rsp
+
+
+narch_coroSwitchStack:
+    pop rsi                   ; return address
+    mov rdx, rsp              ; old rsp for saving
+    mov rsp, rdi              ; swap stack with one passed to func
+    push rdx                  ; store old stack pointer on newly switched stack
+    sub rsp, 8h               ; stack alignment
+    jmp rsi                   ; return
+
+
+narch_coroRestoreStack:
+	pop rsi                   ; return address
+	add rsp, 8h               ; stack alignment
+	pop rsp                   ; resture old stack pointer
+	jmp rsi                   ; return
diff --git a/lib/arch/unix_i386.asm b/lib/arch/unix_i386.asm
new file mode 100644
index 000000000..278679067
--- /dev/null
+++ b/lib/arch/unix_i386.asm
@@ -0,0 +1,12 @@
+;
+;
+;            Nim's Runtime Library
+;        (c) Copyright 2015 Rokas Kupstys
+;
+;    See the file "copying.txt", included in this
+;    distribution, for details about the copyright.
+;
+
+format ELF
+
+include 'i386.asm'
diff --git a/lib/nimbase.h b/lib/nimbase.h
index e796ba735..5b5a43826 100644
--- a/lib/nimbase.h
+++ b/lib/nimbase.h
@@ -110,18 +110,31 @@ __clang__
 #  endif
 #  define N_LIB_IMPORT  extern __declspec(dllimport)
 #else
-#  define N_CDECL(rettype, name) rettype name
-#  define N_STDCALL(rettype, name) rettype name
-#  define N_SYSCALL(rettype, name) rettype name
-#  define N_FASTCALL(rettype, name) rettype name
-#  define N_SAFECALL(rettype, name) rettype name
-/* function pointers with calling convention: */
-#  define N_CDECL_PTR(rettype, name) rettype (*name)
-#  define N_STDCALL_PTR(rettype, name) rettype (*name)
-#  define N_SYSCALL_PTR(rettype, name) rettype (*name)
-#  define N_FASTCALL_PTR(rettype, name) rettype (*name)
-#  define N_SAFECALL_PTR(rettype, name) rettype (*name)
-
+#  if defined(__GNUC__)
+#    define N_CDECL(rettype, name) rettype name
+#    define N_STDCALL(rettype, name) rettype name
+#    define N_SYSCALL(rettype, name) rettype name
+#    define N_FASTCALL(rettype, name) __attribute__((fastcall)) rettype name
+#    define N_SAFECALL(rettype, name) rettype name
+/*   function pointers with calling convention: */
+#    define N_CDECL_PTR(rettype, name) rettype (*name)
+#    define N_STDCALL_PTR(rettype, name) rettype (*name)
+#    define N_SYSCALL_PTR(rettype, name) rettype (*name)
+#    define N_FASTCALL_PTR(rettype, name) __attribute__((fastcall)) rettype (*name)
+#    define N_SAFECALL_PTR(rettype, name) rettype (*name)
+#  else
+#    define N_CDECL(rettype, name) rettype name
+#    define N_STDCALL(rettype, name) rettype name
+#    define N_SYSCALL(rettype, name) rettype name
+#    define N_FASTCALL(rettype, name) rettype name
+#    define N_SAFECALL(rettype, name) rettype name
+/*   function pointers with calling convention: */
+#    define N_CDECL_PTR(rettype, name) rettype (*name)
+#    define N_STDCALL_PTR(rettype, name) rettype (*name)
+#    define N_SYSCALL_PTR(rettype, name) rettype (*name)
+#    define N_FASTCALL_PTR(rettype, name) rettype (*name)
+#    define N_SAFECALL_PTR(rettype, name) rettype (*name)
+#  endif
 #  ifdef __cplusplus
 #    define N_LIB_EXPORT  extern "C"
 #  else
diff --git a/lib/pure/coro.nim b/lib/pure/coro.nim
new file mode 100644
index 000000000..6ef5f6f54
--- /dev/null
+++ b/lib/pure/coro.nim
@@ -0,0 +1,145 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Rokas Kupstys
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+when not defined(nimCoroutines):
+  {.error: "Coroutines require -d:nimCoroutines".}
+
+import os, times
+import macros
+import arch
+import lists
+
+const coroDefaultStackSize = 512 * 1024
+
+
+type Coroutine = ref object
+  # prev: ptr Coroutine
+  # next: ptr Coroutine
+  ctx: JmpBuf
+  fn: proc()
+  started: bool
+  lastRun: float
+  sleepTime: float
+  stack: pointer
+  stacksize: int
+
+var coroutines = initDoublyLinkedList[Coroutine]()
+var current: Coroutine
+var mainCtx: JmpBuf
+
+
+proc GC_addStack(starts: pointer) {.cdecl, importc.}
+proc GC_removeStack(starts: pointer) {.cdecl, importc.}
+proc GC_setCurrentStack(starts, pos: pointer) {.cdecl, importc.}
+
+
+proc coroStart*(c: proc(), stacksize: int=coroDefaultStackSize) =
+  ## Adds coroutine to event loop. It does not run immediately.
+  var coro = Coroutine()
+  coro.fn = c
+  while coro.stack == nil:
+    coro.stack = alloc0(stacksize)
+  coro.stacksize = stacksize
+  coroutines.append(coro)
+
+{.push stackTrace: off.}
+proc coroYield*(sleepTime: float=0) =
+  ## Stops coroutine execution and resumes no sooner than after ``sleeptime`` seconds.
+  ## Until then other coroutines are executed.
+  var oldFrame = getFrame()
+  var sp {.volatile.}: pointer
+  GC_setCurrentStack(current.stack, cast[pointer](addr sp))
+  current.sleepTime = sleep_time
+  current.lastRun = epochTime()
+  if setjmp(current.ctx) == 0:
+    longjmp(mainCtx, 1)
+  setFrame(oldFrame)
+{.pop.}
+
+proc coroRun*() =
+  ## Starts main event loop which exits when all coroutines exit. Calling this proc
+  ## starts execution of first coroutine.
+  var node = coroutines.head
+  var minDelay: float = 0
+  var frame: PFrame
+  while node != nil:
+    var coro = node.value
+    current = coro
+    os.sleep(int(minDelay * 1000))
+
+    var remaining = coro.sleepTime - (epochTime() - coro.lastRun);
+    if remaining <= 0:
+      remaining = 0
+      let res = setjmp(mainCtx)
+      if res == 0:
+        frame = getFrame()
+        if coro.started:            # coroutine resumes
+          longjmp(coro.ctx, 1)
+        else:
+          coro.started = true       # coroutine starts
+          var stackEnd = cast[pointer](cast[ByteAddress](coro.stack) + coro.stacksize)
+          GC_addStack(coro.stack)
+          coroSwitchStack(stackEnd)
+          coro.fn()
+          coroRestoreStack()
+          GC_removeStack(coro.stack)
+          var next = node.prev
+          coroutines.remove(node)
+          dealloc(coro.stack)
+          node = next
+          setFrame(frame)
+      else:
+        setFrame(frame)
+
+    elif remaining > 0:
+      if minDelay > 0 and remaining > 0:
+        minDelay = min(remaining, minDelay)
+      else:
+        minDelay = remaining
+
+    if node == nil or node.next == nil:
+      node = coroutines.head
+    else:
+      node = node.next
+
+
+proc coroAlive*(c: proc()): bool =
+  ## Returns ``true`` if coroutine has not returned, ``false`` otherwise.
+  for coro in items(coroutines):
+    if coro.fn == c:
+      return true
+
+proc coroWait*(c: proc(), interval=0.01) =
+  ## Returns only after coroutine ``c`` has returned. ``interval`` is time in seconds how often.
+  while coroAlive(c):
+    coroYield interval
+
+
+when isMainModule:
+  var stackCheckValue = 1100220033
+  proc c2()
+
+  proc c1() =
+    for i in 0 .. 3:
+      echo "c1"
+      coroYield 0.05
+    echo "c1 exits"
+
+
+  proc c2() =
+    for i in 0 .. 3:
+      echo "c2"
+      coroYield 0.025
+    coroWait(c1)
+    echo "c2 exits"
+
+  coroStart(c1)
+  coroStart(c2)
+  coroRun()
+  echo "done ", stackCheckValue
diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim
index 5d2faa2d6..df28c1493 100644
--- a/lib/system/excpt.nim
+++ b/lib/system/excpt.nim
@@ -44,10 +44,12 @@ var
     # a global variable for the root of all try blocks
   currException {.threadvar.}: ref Exception
 
+proc getFrame*(): PFrame {.compilerRtl, inl.} = framePtr
+
 proc popFrame {.compilerRtl, inl.} =
   framePtr = framePtr.prev
 
-proc setFrame(s: PFrame) {.compilerRtl, inl.} =
+proc setFrame*(s: PFrame) {.compilerRtl, inl.} =
   framePtr = s
 
 proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} =
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index ae8bb724f..df7864cff 100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -16,6 +16,9 @@
 # Special care has been taken to avoid recursion as far as possible to avoid
 # stack overflows when traversing deep datastructures. It is well-suited
 # for soft real time applications (like games).
+
+import arch
+
 {.push profiler:off.}
 
 const
@@ -64,8 +67,16 @@ type
     cycleTableSize: int      # max entries in cycle table
     maxPause: int64          # max measured GC pause in nanoseconds
 
+  GcStack {.final.} = object
+    prev: ptr GcStack
+    next: ptr GcStack
+    starts: pointer
+    pos: pointer
+    maxStackSize: int
+
   GcHeap {.final, pure.} = object # this contains the zero count and
                                    # non-zero count table
+    stack: ptr GcStack
     stackBottom: pointer
     cycleThreshold: int
     when useCellIds:
@@ -154,7 +165,7 @@ template gcTrace(cell, state: expr): stmt {.immediate.} =
 
 # forward declarations:
 proc collectCT(gch: var GcHeap) {.benign.}
-proc isOnStack*(p: pointer): bool {.noinline, benign.}
+proc isOnStack(p: pointer): bool {.noinline, benign.}
 proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
 proc doOperation(p: pointer, op: WalkOp) {.benign.}
 proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
@@ -293,20 +304,6 @@ proc initGC() =
     when useMarkForDebug or useBackupGc:
       init(gch.marked)
 
-var
-  localGcInitialized {.rtlThreadVar.}: bool
-
-proc setupForeignThreadGc*() =
-  ## call this if you registered a callback that will be run from a thread not
-  ## under your control. This has a cheap thread-local guard, so the GC for
-  ## this thread will only be initialized once per thread, no matter how often
-  ## it is called.
-  if not localGcInitialized:
-    localGcInitialized = true
-    var stackTop {.volatile.}: pointer
-    setStackBottom(addr(stackTop))
-    initGC()
-
 when useMarkForDebug or useBackupGc:
   type
     GlobalMarkerProc = proc () {.nimcall, benign.}
@@ -816,138 +813,7 @@ proc markThreadStacks(gch: var GcHeap) =
         sp = sp +% sizeof(pointer)
       it = it.next
 
-# ----------------- stack management --------------------------------------
-#  inspired from Smart Eiffel
-
-when defined(sparc):
-  const stackIncreases = false
-elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
-     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
-  const stackIncreases = true
-else:
-  const stackIncreases = false
-
-when not defined(useNimRtl):
-  {.push stack_trace: off.}
-  proc setStackBottom(theStackBottom: pointer) =
-    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
-    # the first init must be the one that defines the stack bottom:
-    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
-    else:
-      var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[ByteAddress](gch.stackBottom)
-      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
-      when stackIncreases:
-        gch.stackBottom = cast[pointer](min(a, b))
-      else:
-        gch.stackBottom = cast[pointer](max(a, b))
-  {.pop.}
-
-proc stackSize(): int {.noinline.} =
-  var stackTop {.volatile.}: pointer
-  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
-
-when defined(sparc): # For SPARC architecture.
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[TAddress](gch.stackBottom)
-    var a = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-
-  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
-    when defined(sparcv9):
-      asm  """"flushw \n" """
-    else:
-      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
-
-    var
-      max = gch.stackBottom
-      sp: PPointer
-      stackTop: array[0..1, pointer]
-    sp = addr(stackTop[0])
-    # Addresses decrease as the stack grows.
-    while sp <= max:
-      gcMark(gch, sp[])
-      sp = cast[PPointer](cast[TAddress](sp) +% sizeof(pointer))
-
-elif defined(ELATE):
-  {.error: "stack marking code is to be written for this architecture".}
-
-elif stackIncreases:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses increase as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var a = cast[TAddress](gch.stackBottom)
-    var b = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-
-  var
-    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-      # a little hack to get the size of a JmpBuf in the generated C code
-      # in a platform independent way
-
-  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[TAddress](gch.stackBottom)
-      var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
-      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
-      # otherwise sp would be below the registers structure).
-      while sp >=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp -% sizeof(pointer)
-
-else:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses decrease as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
-    # We use a jmp_buf buffer that is in the C stack.
-    # Used to traverse the stack and registers assuming
-    # that 'setjmp' will save registers in the C stack.
-    type PStackSlice = ptr array [0..7, pointer]
-    var registers {.noinit.}: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[ByteAddress](gch.stackBottom)
-      var sp = cast[ByteAddress](addr(registers))
-      when defined(amd64):
-        # words within the jmp_buf structure may not be properly aligned.
-        let regEnd = sp +% sizeof(registers)
-        while sp <% regEnd:
-          gcMark(gch, cast[PPointer](sp)[])
-          gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
-          sp = sp +% sizeof(pointer)
-      # Make sure sp is word-aligned
-      sp = sp and not (sizeof(pointer) - 1)
-      # loop unrolled:
-      while sp <% max - 8*sizeof(pointer):
-        gcMark(gch, cast[PStackSlice](sp)[0])
-        gcMark(gch, cast[PStackSlice](sp)[1])
-        gcMark(gch, cast[PStackSlice](sp)[2])
-        gcMark(gch, cast[PStackSlice](sp)[3])
-        gcMark(gch, cast[PStackSlice](sp)[4])
-        gcMark(gch, cast[PStackSlice](sp)[5])
-        gcMark(gch, cast[PStackSlice](sp)[6])
-        gcMark(gch, cast[PStackSlice](sp)[7])
-        sp = sp +% sizeof(pointer)*8
-      # last few entries:
-      while sp <=% max:
-        gcMark(gch, cast[PPointer](sp)[])
-        sp = sp +% sizeof(pointer)
+include gc_common
 
 proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
   forEachStackSlot(gch, gcMark)
@@ -956,10 +822,6 @@ when useMarkForDebug or useBackupGc:
   proc markStackAndRegistersForSweep(gch: var GcHeap) =
     forEachStackSlot(gch, stackMarkS)
 
-# ----------------------------------------------------------------------------
-# end of non-portable code
-# ----------------------------------------------------------------------------
-
 proc collectZCT(gch: var GcHeap): bool =
   # Note: Freeing may add child objects to the ZCT! So essentially we do
   # deep freeing, which is bad for incremental operation. In order to
@@ -1033,7 +895,8 @@ proc collectCTBody(gch: var GcHeap) =
     let t0 = getticks()
   sysAssert(allocInv(gch.region), "collectCT: begin")
 
-  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+  when not defined(nimCoroutines):
+    gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
   sysAssert(gch.decStack.len == 0, "collectCT")
   prepareForInteriorPointerChecking(gch.region)
   markStackAndRegisters(gch)
@@ -1064,11 +927,19 @@ when useMarkForDebug or useBackupGc:
     markStackAndRegistersForSweep(gch)
     markGlobals(gch)
 
+when defined(nimCoroutines):
+  proc currentStackSizes(): int =
+    for stack in items(gch.stack):
+      result = result + stackSize(stack.starts, stack.pos)
+
 proc collectCT(gch: var GcHeap) =
   # stackMarkCosts prevents some pathological behaviour: Stack marking
   # becomes more expensive with large stacks and large stacks mean that
   # cells with RC=0 are more likely to be kept alive by the stack.
-  let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
+  when defined(nimCoroutines):
+    let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold)
+  else:
+    let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
   if (gch.zct.len >= stackMarkCosts or (cycleGC and
       getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
       gch.recGcLock == 0:
@@ -1137,8 +1008,13 @@ when not defined(useNimRtl):
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
              "[GC] zct capacity: " & $gch.zct.cap & "\n" &
              "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
-             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" &
              "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
+    when defined(nimCoroutines):
+      result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
+      for stack in items(gch.stack):
+        result = result & "[GC]   stack " & stack.starts.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
+    else:
+      result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
     GC_enable()
 
 {.pop.}
diff --git a/lib/system/gc_common.nim b/lib/system/gc_common.nim
new file mode 100644
index 000000000..c7dd667e4
--- /dev/null
+++ b/lib/system/gc_common.nim
@@ -0,0 +1,275 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Rokas Kupstys
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+proc len(stack: ptr GcStack): int =
+  if stack == nil:
+    return 0
+
+  var s = stack
+  result = 1
+  while s.next != nil:
+    inc(result)
+    s = s.next
+
+when defined(nimCoroutines):
+  proc stackSize(stackBottom: pointer, pos: pointer=nil): int {.noinline.} =
+    var sp: pointer
+    if pos == nil:
+      var stackTop {.volatile.}: pointer
+      sp = addr(stackTop)
+    else:
+      sp = pos
+    result = abs(cast[int](sp) - cast[int](stackBottom))
+
+  proc GC_addStack*(starts: pointer) {.cdecl, exportc.} =
+    var sp {.volatile.}: pointer
+    var stack = cast[ptr GcStack](alloc0(sizeof(GcStack)))
+    stack.starts = starts
+    stack.pos = addr sp
+    if gch.stack == nil:
+      gch.stack = stack
+    else:
+      stack.next = gch.stack
+      gch.stack.prev = stack
+      gch.stack = stack
+    # c_fprintf(c_stdout, "[GC] added stack 0x%016X\n", starts)
+
+  proc GC_removeStack*(starts: pointer) {.cdecl, exportc.} =
+    var stack = gch.stack
+    while stack != nil:
+      if stack.starts == starts:
+        if stack.prev == nil:
+          if stack.next != nil:
+            stack.next.prev = nil
+          gch.stack = stack.next
+        else:
+          stack.prev.next = stack.next
+          if stack.next != nil:
+              stack.next.prev = stack.prev
+        dealloc(stack)
+        # echo "[GC] removed stack ", starts.repr
+        break
+      else:
+        stack = stack.next
+
+  proc GC_setCurrentStack*(starts, pos: pointer) {.cdecl, exportc.} =
+    var stack = gch.stack
+    while stack != nil:
+      if stack.starts == starts:
+        stack.pos = pos
+        stack.maxStackSize = max(stack.maxStackSize, stackSize(stack.starts, pos))
+        return
+      stack = stack.next
+    gcAssert(false, "Current stack position does not belong to registered stack")
+else:
+  proc stackSize(): int {.noinline.} =
+    var stackTop {.volatile.}: pointer
+    result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
+
+iterator items(stack: ptr GcStack): ptr GcStack =
+  var s = stack
+  while not isNil(s):
+    yield s
+    s = s.next
+
+var
+  localGcInitialized {.rtlThreadVar.}: bool
+
+proc setupForeignThreadGc*() =
+  ## call this if you registered a callback that will be run from a thread not
+  ## under your control. This has a cheap thread-local guard, so the GC for
+  ## this thread will only be initialized once per thread, no matter how often
+  ## it is called.
+  if not localGcInitialized:
+    localGcInitialized = true
+    var stackTop {.volatile.}: pointer
+    setStackBottom(addr(stackTop))
+    initGC()
+
+# ----------------- stack management --------------------------------------
+#  inspired from Smart Eiffel
+
+when defined(sparc):
+  const stackIncreases = false
+elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
+     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
+  const stackIncreases = true
+else:
+  const stackIncreases = false
+
+when not defined(useNimRtl):
+  {.push stack_trace: off.}
+  proc setStackBottom(theStackBottom: pointer) =
+    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
+    # the first init must be the one that defines the stack bottom:
+    when defined(nimCoroutines):
+      GC_addStack(theStackBottom)
+    else:
+      if gch.stackBottom == nil: gch.stackBottom = theStackBottom
+      else:
+        var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
+        var b = cast[ByteAddress](gch.stackBottom)
+        #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
+        when stackIncreases:
+          gch.stackBottom = cast[pointer](min(a, b))
+        else:
+          gch.stackBottom = cast[pointer](max(a, b))
+  {.pop.}
+
+when defined(sparc): # For SPARC architecture.
+  when defined(nimCoroutines):
+    {.error: "Nim coroutines are not supported on this platform."}
+
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var b = cast[TAddress](gch.stackBottom)
+    var a = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+
+  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
+    when defined(sparcv9):
+      asm  """"flushw \n" """
+    else:
+      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
+
+    var
+      max = gch.stackBottom
+      sp: PPointer
+      stackTop: array[0..1, pointer]
+    sp = addr(stackTop[0])
+    # Addresses decrease as the stack grows.
+    while sp <= max:
+      gcMark(gch, sp[])
+      sp = cast[PPointer](cast[TAddress](sp) +% sizeof(pointer))
+
+elif defined(ELATE):
+  {.error: "stack marking code is to be written for this architecture".}
+
+elif stackIncreases:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses increase as the stack grows.
+  # ---------------------------------------------------------------------------
+  when defined(nimCoroutines):
+    {.error: "Nim coroutines are not supported on this platform."}
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var a = cast[TAddress](gch.stackBottom)
+    var b = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+
+  var
+    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
+      # a little hack to get the size of a JmpBuf in the generated C code
+      # in a platform independent way
+
+  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
+    var registers: C_JmpBuf
+    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+      var max = cast[TAddress](gch.stackBottom)
+      var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
+      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
+      # otherwise sp would be below the registers structure).
+      while sp >=% max:
+        gcMark(gch, cast[ppointer](sp)[])
+        sp = sp -% sizeof(pointer)
+
+else:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses decrease as the stack grows.
+  # ---------------------------------------------------------------------------
+  when defined(nimCoroutines):
+    proc isOnStack(p: pointer): bool =
+      var stackTop {.volatile.}: pointer
+      stackTop = addr(stackTop)
+      for stack in items(gch.stack):
+        var b = cast[ByteAddress](stack.starts)
+        var a = cast[ByteAddress](stack.starts) - stack.maxStackSize
+        var x = cast[ByteAddress](p)
+        if a <=% x and x <=% b:
+          return true
+  
+    template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
+      # We use a jmp_buf buffer that is in the C stack.
+      # Used to traverse the stack and registers assuming
+      # that 'setjmp' will save registers in the C stack.
+      type PStackSlice = ptr array [0..7, pointer]
+      var registers {.noinit.}: Registers
+      getRegisters(registers)
+      for i in registers.low .. registers.high:
+        gcMark(gch, cast[PPointer](registers[i]))
+  
+      for stack in items(gch.stack):
+        stack.maxStackSize = max(stack.maxStackSize, stackSize(stack.starts))
+        var max = cast[ByteAddress](stack.starts)
+        var sp = cast[ByteAddress](stack.pos)
+        # loop unrolled:
+        while sp <% max - 8*sizeof(pointer):
+          gcMark(gch, cast[PStackSlice](sp)[0])
+          gcMark(gch, cast[PStackSlice](sp)[1])
+          gcMark(gch, cast[PStackSlice](sp)[2])
+          gcMark(gch, cast[PStackSlice](sp)[3])
+          gcMark(gch, cast[PStackSlice](sp)[4])
+          gcMark(gch, cast[PStackSlice](sp)[5])
+          gcMark(gch, cast[PStackSlice](sp)[6])
+          gcMark(gch, cast[PStackSlice](sp)[7])
+          sp = sp +% sizeof(pointer)*8
+        # last few entries:
+        while sp <=% max:
+          gcMark(gch, cast[PPointer](sp)[])
+          sp = sp +% sizeof(pointer)
+  else:
+    proc isOnStack(p: pointer): bool =
+      var stackTop {.volatile.}: pointer
+      stackTop = addr(stackTop)
+      var b = cast[ByteAddress](gch.stackBottom)
+      var a = cast[ByteAddress](stackTop)
+      var x = cast[ByteAddress](p)
+      result = a <=% x and x <=% b
+
+    template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
+      # We use a jmp_buf buffer that is in the C stack.
+      # Used to traverse the stack and registers assuming
+      # that 'setjmp' will save registers in the C stack.
+      type PStackSlice = ptr array [0..7, pointer]
+      var registers {.noinit.}: C_JmpBuf
+      if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+        var max = cast[ByteAddress](gch.stackBottom)
+        var sp = cast[ByteAddress](addr(registers))
+        when defined(amd64):
+          # words within the jmp_buf structure may not be properly aligned.
+          let regEnd = sp +% sizeof(registers)
+          while sp <% regEnd:
+            gcMark(gch, cast[PPointer](sp)[])
+            gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
+            sp = sp +% sizeof(pointer)
+        # Make sure sp is word-aligned
+        sp = sp and not (sizeof(pointer) - 1)
+        # loop unrolled:
+        while sp <% max - 8*sizeof(pointer):
+          gcMark(gch, cast[PStackSlice](sp)[0])
+          gcMark(gch, cast[PStackSlice](sp)[1])
+          gcMark(gch, cast[PStackSlice](sp)[2])
+          gcMark(gch, cast[PStackSlice](sp)[3])
+          gcMark(gch, cast[PStackSlice](sp)[4])
+          gcMark(gch, cast[PStackSlice](sp)[5])
+          gcMark(gch, cast[PStackSlice](sp)[6])
+          gcMark(gch, cast[PStackSlice](sp)[7])
+          sp = sp +% sizeof(pointer)*8
+        # last few entries:
+        while sp <=% max:
+          gcMark(gch, cast[PPointer](sp)[])
+          sp = sp +% sizeof(pointer)
+
+# ----------------------------------------------------------------------------
+# end of non-portable code
+# ----------------------------------------------------------------------------
diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim
index ee80c61e9..2973d1b9a 100644
--- a/lib/system/gc_ms.nim
+++ b/lib/system/gc_ms.nim
@@ -9,6 +9,9 @@
 
 # A simple mark&sweep garbage collector for Nim. Define the
 # symbol ``gcUseBitvectors`` to generate a variant of this GC.
+
+import arch
+
 {.push profiler:off.}
 
 const
@@ -44,8 +47,16 @@ type
     maxStackSize: int        # max stack size
     freedObjects: int        # max entries in cycle table
 
+  GcStack {.final.} = object
+    prev: ptr GcStack
+    next: ptr GcStack
+    starts: pointer
+    pos: pointer
+    maxStackSize: int
+
   GcHeap = object            # this contains the zero count and
                              # non-zero count table
+    stack: ptr GcStack
     stackBottom: pointer
     cycleThreshold: int
     when useCellIds:
@@ -118,7 +129,6 @@ when BitsPerPage mod (sizeof(int)*8) != 0:
 
 # forward declarations:
 proc collectCT(gch: var GcHeap) {.benign.}
-proc isOnStack*(p: pointer): bool {.noinline, benign.}
 proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
 proc doOperation(p: pointer, op: WalkOp) {.benign.}
 proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
@@ -169,20 +179,6 @@ proc initGC() =
       init(gch.allocated)
       init(gch.marked)
 
-var
-  localGcInitialized {.rtlThreadVar.}: bool
-
-proc setupForeignThreadGc*() =
-  ## call this if you registered a callback that will be run from a thread not
-  ## under your control. This has a cheap thread-local guard, so the GC for
-  ## this thread will only be initialized once per thread, no matter how often
-  ## it is called.
-  if not localGcInitialized:
-    localGcInitialized = true
-    var stackTop {.volatile.}: pointer
-    setStackBottom(addr(stackTop))
-    initGC()
-
 proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
   var d = cast[ByteAddress](dest)
   case n.kind
@@ -407,145 +403,14 @@ proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
     if objStart != nil:
       mark(gch, objStart)
 
-# ----------------- stack management --------------------------------------
-#  inspired from Smart Eiffel
+include gc_common
 
-when defined(sparc):
-  const stackIncreases = false
-elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
-     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
-  const stackIncreases = true
-else:
-  const stackIncreases = false
-
-when not defined(useNimRtl):
-  {.push stack_trace: off.}
-  proc setStackBottom(theStackBottom: pointer) =
-    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
-    # the first init must be the one that defines the stack bottom:
-    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
-    else:
-      var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[ByteAddress](gch.stackBottom)
-      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
-      when stackIncreases:
-        gch.stackBottom = cast[pointer](min(a, b))
-      else:
-        gch.stackBottom = cast[pointer](max(a, b))
-  {.pop.}
-
-proc stackSize(): int {.noinline.} =
-  var stackTop {.volatile.}: pointer
-  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
-
-when defined(sparc): # For SPARC architecture.
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-    when defined(sparcv9):
-      asm  """"flushw \n" """
-    else:
-      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
-
-    var
-      max = gch.stackBottom
-      sp: PPointer
-      stackTop: array[0..1, pointer]
-    sp = addr(stackTop[0])
-    # Addresses decrease as the stack grows.
-    while sp <= max:
-      gcMark(gch, sp[])
-      sp = cast[ppointer](cast[ByteAddress](sp) +% sizeof(pointer))
-
-elif defined(ELATE):
-  {.error: "stack marking code is to be written for this architecture".}
-
-elif stackIncreases:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses increase as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var a = cast[ByteAddress](gch.stackBottom)
-    var b = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  var
-    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-      # a little hack to get the size of a JmpBuf in the generated C code
-      # in a platform independent way
-
-  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[ByteAddress](gch.stackBottom)
-      var sp = cast[ByteAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
-      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
-      # otherwise sp would be below the registers structure).
-      while sp >=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp -% sizeof(pointer)
-
-else:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses decrease as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
-    # We use a jmp_buf buffer that is in the C stack.
-    # Used to traverse the stack and registers assuming
-    # that 'setjmp' will save registers in the C stack.
-    type PStackSlice = ptr array [0..7, pointer]
-    var registers {.noinit.}: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[ByteAddress](gch.stackBottom)
-      var sp = cast[ByteAddress](addr(registers))
-      when defined(amd64):
-        # words within the jmp_buf structure may not be properly aligned.
-        let regEnd = sp +% sizeof(registers)
-        while sp <% regEnd:
-          gcMark(gch, cast[PPointer](sp)[])
-          gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
-          sp = sp +% sizeof(pointer)
-      # Make sure sp is word-aligned
-      sp = sp and not (sizeof(pointer) - 1)
-      # loop unrolled:
-      while sp <% max - 8*sizeof(pointer):
-        gcMark(gch, cast[PStackSlice](sp)[0])
-        gcMark(gch, cast[PStackSlice](sp)[1])
-        gcMark(gch, cast[PStackSlice](sp)[2])
-        gcMark(gch, cast[PStackSlice](sp)[3])
-        gcMark(gch, cast[PStackSlice](sp)[4])
-        gcMark(gch, cast[PStackSlice](sp)[5])
-        gcMark(gch, cast[PStackSlice](sp)[6])
-        gcMark(gch, cast[PStackSlice](sp)[7])
-        sp = sp +% sizeof(pointer)*8
-      # last few entries:
-      while sp <=% max:
-        gcMark(gch, cast[PPointer](sp)[])
-        sp = sp +% sizeof(pointer)
-
-# ----------------------------------------------------------------------------
-# end of non-portable code
-# ----------------------------------------------------------------------------
+proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
+  forEachStackSlot(gch, gcMark)
 
 proc collectCTBody(gch: var GcHeap) =
-  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+  when not defined(nimCoroutines):
+    gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
   prepareForInteriorPointerChecking(gch.region)
   markStackAndRegisters(gch)
   markGlobals(gch)
@@ -599,8 +464,13 @@ when not defined(useNimRtl):
              "[GC] occupied memory: " & $getOccupiedMem() & "\n" &
              "[GC] collections: " & $gch.stat.collections & "\n" &
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
-             "[GC] freed objects: " & $gch.stat.freedObjects & "\n" &
-             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
+             "[GC] freed objects: " & $gch.stat.freedObjects & "\n"
+    when defined(nimCoroutines):
+      result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
+      for stack in items(gch.stack):
+        result = result & "[GC]   stack " & stack.starts.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
+    else:
+      result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
     GC_enable()
 
 {.pop.}