summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAndreas Rumpf <rumpf_a@web.de>2017-02-26 23:24:29 +0100
committerGitHub <noreply@github.com>2017-02-26 23:24:29 +0100
commit895e2aea15f0600b146933bff77f54290b68a3dd (patch)
treea84e5cb8df765cc16ca5187b1d72bb720a0abf84
parent4f062c3be08fa2bc3e167e1a6b9842c92bc8c8f7 (diff)
parent9df0000621d528309cdaae67e0d203d9fdff57c3 (diff)
downloadNim-895e2aea15f0600b146933bff77f54290b68a3dd.tar.gz
Merge pull request #5317 from rokups/feature/coroutines
Coroutine improvements
-rw-r--r--.travis.yml12
-rw-r--r--appveyor.yml19
-rw-r--r--ci/deps.sh3
-rw-r--r--compiler/commands.nim4
-rw-r--r--compiler/extccomp.nim49
-rw-r--r--compiler/msgs.nim4
-rw-r--r--lib/arch/arch.nim62
-rw-r--r--lib/arch/i386.asm79
-rw-r--r--lib/arch/ms_amd64.asm90
-rw-r--r--lib/arch/ms_i386.asm12
-rw-r--r--lib/arch/unix_amd64.asm89
-rw-r--r--lib/arch/unix_i386.asm12
-rw-r--r--lib/arch/x86/amd64.S96
-rw-r--r--lib/arch/x86/i386.S64
-rw-r--r--lib/pure/coro.nim357
-rw-r--r--lib/system.nim20
-rw-r--r--lib/system/excpt.nim11
-rw-r--r--lib/system/gc.nim73
-rw-r--r--lib/system/gc2.nim66
-rw-r--r--lib/system/gc_common.nim344
-rw-r--r--lib/system/gc_ms.nim30
-rw-r--r--lib/windows/winlean.nim14
-rw-r--r--tests/coroutines/texceptions.nim23
-rw-r--r--tests/coroutines/texceptions.nim.cfg1
-rw-r--r--tests/coroutines/tgc.nim15
-rw-r--r--tests/coroutines/tgc.nim.cfg1
-rw-r--r--tests/coroutines/titerators.nim24
-rw-r--r--tests/coroutines/titerators.nim.cfg1
28 files changed, 803 insertions, 772 deletions
diff --git a/.travis.yml b/.travis.yml
index ffb0033a3..6d8d8b7fc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,18 +22,16 @@ addons:
 
 before_script:
   - set -e
-  - curl --out fasm-1.71.39.tgz https://nim-lang.org/download/fasm-1.71.39.tgz
-  - tar xvf fasm-1.71.39.tgz
   - git clone --depth 1 https://github.com/nim-lang/csources.git
   - cd csources
   - sh build.sh
   - cd ..
   - sed -i -e 's,cc = gcc,cc = clang,' config/nim.cfg
-  - export PATH=$(pwd)/bin:$(pwd)/fasm:$PATH
+  - export PATH=$(pwd)/bin:$PATH
 script:
   - nim c koch
-  - ./koch boot
-  - ./koch boot -d:release
+  - ./koch boot -d:nimCoroutines
+  - ./koch boot -d:release -d:nimCoroutines
   - ./koch nimble
   - nim e tests/test_nimscript.nims
   - nimble install zip -y
@@ -41,7 +39,7 @@ script:
   - nimble install sdl1
   - nimble install jester@#head
   - nimble install niminst
-  - nim c --taintMode:on tests/testament/tester
-  - tests/testament/tester --pedantic all
+  - nim c --taintMode:on -d:nimCoroutines tests/testament/tester
+  - tests/testament/tester --pedantic all -d:nimCoroutines
   - ./koch csource
   - ./koch xz
diff --git a/appveyor.yml b/appveyor.yml
index ea98b8507..9199755d9 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -3,7 +3,6 @@ version: '{build}'
 cache:
 - x86_64-4.9.2-release-win32-seh-rt_v4-rev4.7z
 - sqlite-dll-win64-x64-3160200.zip
-- fasmw17159.zip
 # - i686-4.9.2-release-win32-dwarf-rt_v4-rev4.7z
 
 matrix:
@@ -16,18 +15,12 @@ environment:
       MINGW_ARCHIVE: x86_64-4.9.2-release-win32-seh-rt_v4-rev4.7z
       SQLITE_URL: http://www.sqlite.org/2017/sqlite-dll-win64-x64-3160200.zip
       SQLITE_ARCHIVE: sqlite-dll-win64-x64-3160200.zip
-      FASM_DIR: fasm
-      FASM_URL: https://flatassembler.net/fasmw17159.zip
-      FASM_ARCHIVE: fasmw17159.zip
       platform: x64
     # - MINGW_DIR: mingw32
     #   MINGW_URL: https://sourceforge.net/projects/mingw-w64/files/Toolchains%20targetting%20Win32/Personal%20Builds/mingw-builds/4.9.2/threads-win32/dwarf/i686-4.9.2-release-win32-dwarf-rt_v4-rev4.7z/download
     #   MINGW_ARCHIVE: i686-4.9.2-release-win32-dwarf-rt_v4-rev4.7z
     #   SQLITE_URL: http://www.sqlite.org/2017/sqlite-dll-win32-x86-3160200.zip
     #   SQLITE_ARCHIVE: sqlite-dll-win32-x86-3160200.zip
-    #   FASM_DIR: fasm
-    #   FASM_URL: https://flatassembler.net/fasmw17159.zip
-    #   FASM_ARCHIVE: fasmw17159.zip
     #   platform: x86
 
 install:
@@ -38,9 +31,7 @@ install:
   - 7z x -y "%SQLITE_ARCHIVE%" -o"%CD%\DIST"> nul
   - IF not exist "%MINGW_ARCHIVE%" appveyor DownloadFile "%MINGW_URL%" -FileName "%MINGW_ARCHIVE%"
   - 7z x -y "%MINGW_ARCHIVE%" -o"%CD%\DIST"> nul
-  - IF not exist "%FASM_ARCHIVE%" appveyor DownloadFile "%FASM_URL%" -FileName "%FASM_ARCHIVE%"
-  - 7z x -y "%FASM_ARCHIVE%" -o"%CD%\DIST\%FASM_DIR%" > nul
-  - SET PATH=%CD%\DIST\%MINGW_DIR%\BIN;%CD%\BIN;%CD%\DIST\%FASM_DIR%;%PATH%
+  - SET PATH=%CD%\DIST\%MINGW_DIR%\BIN;%CD%\BIN;%PATH%
   - IF "%PLATFORM%" == "x64" ( copy C:\OpenSSL-Win64\libeay32.dll %CD%\BIN\libeay64.dll & copy C:\OpenSSL-Win64\libeay32.dll %CD%\BIN\libeay32.dll & copy C:\OpenSSL-Win64\libssl32.dll %CD%\BIN\libssl64.dll & copy C:\OpenSSL-Win64\libssl32.dll %CD%\BIN\libssl32.dll )
     ELSE ( copy C:\OpenSSL-Win32\libeay32.dll %CD%\BIN\libeay32.dll & copy C:\OpenSSL-Win32\libssl32.dll %CD%\BIN\libssl32.dll )
   - IF "%PLATFORM%" == "x64" ( copy %CD%\DIST\sqlite3.dll %CD%\BIN\sqlite3_64.dll ) ELSE ( copy %CD%\DIST\sqlite3.dll %CD%\BIN\sqlite3_32.dll )
@@ -52,8 +43,8 @@ install:
 
 build_script:
   - bin\nim c koch
-  - koch boot
-  - koch boot -d:release
+  - koch boot -d:nimCoroutines
+  - koch boot -d:release -d:nimCoroutines
   - koch nimble
   - nim e tests/test_nimscript.nims
   - nimble install zip -y
@@ -61,10 +52,10 @@ build_script:
   - nimble install sdl1
   - nimble install jester@#head
   - nimble install niminst
-  - nim c --taintMode:on tests/testament/tester
+  - nim c --taintMode:on -d:nimCoroutines tests/testament/tester
 
 test_script:
-  - tests\testament\tester --pedantic all
+  - tests\testament\tester --pedantic all -d:nimCoroutines
   - koch csource
   - koch zip
 
diff --git a/ci/deps.sh b/ci/deps.sh
index 3385a213b..7471785a0 100644
--- a/ci/deps.sh
+++ b/ci/deps.sh
@@ -3,11 +3,10 @@ echo "Running on $CI_RUNNER_ID ($CI_RUNNER_DESCRIPTION) with tags $CI_RUNNER_TAG
 
 # Packages
 apt-get update -qq
-apt-get install -y -qq build-essential git libcurl4-openssl-dev libsdl1.2-dev libgc-dev nodejs fasm
+apt-get install -y -qq build-essential git libcurl4-openssl-dev libsdl1.2-dev libgc-dev nodejs
 
 gcc -v
 
-fasm -v
 export PATH=$(pwd)/bin:$PATH
 
 # Nimble deps
diff --git a/compiler/commands.nim b/compiler/commands.nim
index b75e953de..6fdca27fc 100644
--- a/compiler/commands.nim
+++ b/compiler/commands.nim
@@ -642,10 +642,6 @@ proc processSwitch(switch, arg: string, pass: TCmdLinePass, info: TLineInfo;
   of "experimental":
     expectNoArg(switch, arg, pass, info)
     gExperimentalMode = true
-  of "assembler":
-    cAssembler = nameToCC(arg)
-    if cAssembler notin cValidAssemblers:
-      localError(info, errGenerated, "'$1' is not a valid assembler." % [arg])
   of "nocppexceptions":
     expectNoArg(switch, arg, pass, info)
     incl(gGlobalOptions, optNoCppExceptions)
diff --git a/compiler/extccomp.nim b/compiler/extccomp.nim
index dab643d50..70cd411fe 100644
--- a/compiler/extccomp.nim
+++ b/compiler/extccomp.nim
@@ -21,7 +21,7 @@ import
 type
   TSystemCC* = enum
     ccNone, ccGcc, ccLLVM_Gcc, ccCLang, ccLcc, ccBcc, ccDmc, ccWcc, ccVcc,
-    ccTcc, ccPcc, ccUcc, ccIcl, asmFasm
+    ccTcc, ccPcc, ccUcc, ccIcl
   TInfoCCProp* = enum         # properties of the C compiler:
     hasSwitchRange,           # CC allows ranges in switch statements (GNU C)
     hasComputedGoto,          # CC has computed goto (GNU C extension)
@@ -320,31 +320,6 @@ compiler ucc:
     packedPragma: "", # XXX: not supported yet
     props: {})
 
-# fasm assembler
-compiler fasm:
-  result = (
-    name: "fasm",
-    objExt: "o",
-    optSpeed: "",
-    optSize: "",
-    compilerExe: "fasm",
-    cppCompiler: "fasm",
-    compileTmpl: "$file $objfile",
-    buildGui: "",
-    buildDll: "",
-    buildLib: "",
-    linkerExe: "",
-    linkTmpl: "",
-    includeCmd: "",
-    linkDirCmd: "",
-    linkLibCmd: "",
-    debug: "",
-    pic: "",
-    asmStmtFrmt: "",
-    structStmtFmt: "",
-    packedPragma: "",
-    props: {})
-
 const
   CC*: array[succ(low(TSystemCC))..high(TSystemCC), TInfoCC] = [
     gcc(),
@@ -358,22 +333,17 @@ const
     tcc(),
     pcc(),
     ucc(),
-    icl(),
-    fasm()]
+    icl()]
 
   hExt* = ".h"
 
 var
   cCompiler* = ccGcc # the used compiler
-  cAssembler* = ccNone
   gMixedMode*: bool  # true if some module triggered C++ codegen
   cIncludes*: seq[string] = @[]   # directories to search for included files
   cLibs*: seq[string] = @[]       # directories to search for lib files
   cLinkedLibs*: seq[string] = @[] # libraries to link
 
-const
-  cValidAssemblers* = {asmFasm}
-
 # implementation
 
 proc libNameTmpl(): string {.inline.} =
@@ -577,21 +547,6 @@ proc getLinkerExe(compiler: TSystemCC): string =
 
 proc getCompileCFileCmd*(cfile: Cfile): string =
   var c = cCompiler
-  if cfile.cname.endswith(".asm"):
-    var customAssembler = getConfigVar("assembler")
-    if customAssembler.len > 0:
-      c = nameToCC(customAssembler)
-    else:
-      if targetCPU == cpuI386 or targetCPU == cpuAmd64:
-        c = asmFasm
-      else:
-        c = ccNone
-
-    if c == ccNone:
-      rawMessage(errExternalAssemblerNotFound, "")
-    elif c notin cValidAssemblers:
-      rawMessage(errExternalAssemblerNotValid, customAssembler)
-
   var options = cFileSpecificOptions(cfile.cname)
   var exe = getConfigVar(c, ".exe")
   if exe.len == 0: exe = c.getCompilerExe(cfile.cname)
diff --git a/compiler/msgs.nim b/compiler/msgs.nim
index a1ba82263..0d30651bb 100644
--- a/compiler/msgs.nim
+++ b/compiler/msgs.nim
@@ -108,8 +108,6 @@ type
     errCannotInferReturnType,
     errGenericLambdaNotAllowed,
     errCompilerDoesntSupportTarget,
-    errExternalAssemblerNotFound,
-    errExternalAssemblerNotValid,
     errUser,
     warnCannotOpenFile,
     warnOctalEscape, warnXIsNeverRead, warnXmightNotBeenInit,
@@ -372,8 +370,6 @@ const
                                 "it is used as an operand to another routine and the types " &
                                 "of the generic paramers can be inferred from the expected signature.",
     errCompilerDoesntSupportTarget: "The current compiler \'$1\' doesn't support the requested compilation target",
-    errExternalAssemblerNotFound: "External assembler not found",
-    errExternalAssemblerNotValid: "External assembler '$1' is not a valid assembler",
     errUser: "$1",
     warnCannotOpenFile: "cannot open \'$1\'",
     warnOctalEscape: "octal escape sequences do not exist; leading zero is ignored",
diff --git a/lib/arch/arch.nim b/lib/arch/arch.nim
deleted file mode 100644
index 0b3df3d3c..000000000
--- a/lib/arch/arch.nim
+++ /dev/null
@@ -1,62 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2015 Rokas Kupstys
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-# Architecture-specific optimizations and features.
-# arch.nim can be imported by only a subset of the
-# architectures supported by Nim.
-
-when defined(windows):
-  const
-    ABI* = "ms"
-elif defined(unix):
-  const
-    ABI* = "unix"
-else:
-  {.error: "Unsupported ABI".}
-
-when defined(amd64):
-  when defined(unix):
-    # unix (sysv) ABI
-    type
-      JmpBufReg* {.pure.} = enum
-        BX, BP, R12, R13, R14, R15, SP, IP, TOTAL
-  elif defined(windows):
-    # ms ABI
-    type
-      JmpBufReg* {.pure.} = enum
-        BX, BP, R12, R13, R14, R15, SP, IP, SI, DI, TOTAL
-  type
-    Reg* {.pure.} = enum
-      AX, BX, CX, DX, SI, DI, BP, SP, IP, R8, R9, R10, R11, R12, R13, R14, R15, TOTAL
-
-elif defined(i386) or defined(nimdoc):
-    # identical fastcall calling convention on all x86 OS
-    type
-      JmpBufReg* {.pure.} = enum
-        BX, SI, DI, BP, SP, IP, TOTAL
-
-      Reg* {.pure.} = enum
-        AX, BX, CX, BP, SP, DI, SI, TOTAL
-
-else:
-  {.error: "Unsupported architecture".}
-
-{.compile: "./" & ABI & "_" & hostCPU & ".asm"}
-
-type
-  JmpBuf* = array[JmpBufReg.TOTAL, pointer]
-  Registers* = array[Reg.TOTAL, pointer]
-
-
-proc getRegisters*(ctx: var Registers) {.importc: "narch_$1", fastcall.}
-
-proc setjmp*(ctx: var JmpBuf): int {.importc: "narch_$1", fastcall.}
-proc longjmp*(ctx: JmpBuf, ret=1) {.importc: "narch_$1", fastcall.}
-
-proc coroSwitchStack*(sp: pointer) {.importc: "narch_$1", fastcall.}
-proc coroRestoreStack*() {.importc: "narch_$1", fastcall.}
diff --git a/lib/arch/i386.asm b/lib/arch/i386.asm
deleted file mode 100644
index 61f6fdda7..000000000
--- a/lib/arch/i386.asm
+++ /dev/null
@@ -1,79 +0,0 @@
-;
-;
-;            Nim's Runtime Library
-;        (c) Copyright 2015 Rokas Kupstys
-;
-;    See the file "copying.txt", included in this
-;    distribution, for details about the copyright.
-;
-
-section ".text" executable
-public narch_getRegisters
-public @narch_getRegisters@4
-public narch_setjmp
-public @narch_setjmp@4
-public narch_longjmp
-public @narch_longjmp@8
-public narch_coroSwitchStack
-public @narch_coroSwitchStack@4
-public narch_coroRestoreStack
-public @narch_coroRestoreStack@0
-
-@narch_getRegisters@4:
-narch_getRegisters:
-    mov   [ecx], eax
-    mov   [ecx+4], ebx
-    mov   [ecx+8], ecx
-    mov   [ecx+0Ch], ebp
-    mov   [ecx+10h], esp
-    mov   [ecx+14h], edi
-    mov   [ecx+18h], esi
-    ret
-
-
-@narch_setjmp@4:
-narch_setjmp:
-    ; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
-    mov  [ecx], ebx
-    mov  [ecx+4], esi
-    mov  [ecx+8], edi
-    mov  [ecx+0Ch], ebp
-    lea  eax, [esp+4]
-    mov  [ecx+10h], eax
-    mov  eax, [esp]
-    mov  [ecx+14h], eax
-    xor  eax, eax
-    ret
-
-
-@narch_longjmp@8:
-narch_longjmp:
-    ; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
-    mov  eax, edx
-    test eax, eax
-    jnz  @F
-    inc  eax
-@@:
-    mov  ebx, [ecx]
-    mov  esi, [ecx+4]
-    mov  edi, [ecx+8]
-    mov  ebp, [ecx+0Ch]
-    mov  esp, [ecx+10h]
-    mov  edx, [ecx+14h]
-    jmp  edx
-
-
-@narch_coroSwitchStack@4:
-narch_coroSwitchStack:
-    pop eax                   ; return address
-    mov edx, esp              ; old esp for saving
-    mov esp, ecx              ; swap stack with one passed to func
-    push edx                  ; store old stack pointer on newly switched stack
-    jmp eax                   ; return
-
-
-@narch_coroRestoreStack@0:
-narch_coroRestoreStack:
-    pop eax                   ; return address
-    pop esp                   ; resture old stack pointer
-    jmp eax                   ; return
diff --git a/lib/arch/ms_amd64.asm b/lib/arch/ms_amd64.asm
deleted file mode 100644
index 0503b31c9..000000000
--- a/lib/arch/ms_amd64.asm
+++ /dev/null
@@ -1,90 +0,0 @@
-;
-;
-;            Nim's Runtime Library
-;        (c) Copyright 2015 Rokas Kupstys
-;
-;    See the file "copying.txt", included in this
-;    distribution, for details about the copyright.
-;
-
-format MS64 COFF
-
-section ".text" executable align 16
-public narch_getRegisters
-public narch_setjmp
-public narch_longjmp
-public narch_coroSwitchStack
-public narch_coroRestoreStack
-
-
-narch_getRegisters:
-    mov   [rcx], rax
-    mov   [rcx+8], rbx
-    mov   [rcx+10h], rcx
-    mov   [rcx+18h], rdx
-    mov   [rcx+20h], rsi
-    mov   [rcx+28h], rdi
-    mov   [rcx+30h], rbp
-    mov   [rcx+38h], rsp
-    mov   rax, [rsp]
-    mov   [rcx+40h], rax      ; rip
-    mov   [rcx+48h], r8
-    mov   [rcx+50h], r9
-    mov   [rcx+58h], r10
-    mov   [rcx+60h], r11
-    mov   [rcx+68h], r12
-    mov   [rcx+70h], r13
-    mov   [rcx+78h], r14
-    mov   [rcx+80h], r15
-    ret
-
-
-narch_setjmp:
-    ; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
-    mov   [rcx], rbx          ; rcx is jmp_buf, move registers onto it
-    mov   [rcx+8], rbp
-    mov   [rcx+10h], r12
-    mov   [rcx+18h], r13
-    mov   [rcx+20h], r14
-    mov   [rcx+28h], r15
-    lea   rdx, [rsp+8]        ; this is our rsp WITHOUT current ret addr
-    mov   [rcx+30h], rdx
-    mov   rdx, [rsp]          ; save return addr ptr for new rip
-    mov   [rcx+38h], rdx
-    mov   [rcx+40h], rsi
-    mov   [rcx+48h], rdi
-    xor   rax, rax            ; always return 0
-    ret
-
-narch_longjmp:
-    ; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
-    mov   rax, rdx            ; val will be longjmp return
-    test  rax, rax
-    jnz   @F
-    inc   rax                 ; if val==0, val=1 per longjmp semantics
-@@:
-    mov   rbx, [rcx]          ; rax is the jmp_buf, restore regs from it
-    mov   rbp, [rcx+8]
-    mov   r12, [rcx+10h]
-    mov   r13, [rcx+18h]
-    mov   r14, [rcx+20h]
-    mov   r15, [rcx+28h]
-    mov   rsp, [rcx+30h]      ; this ends up being the stack pointer
-    mov   rdx, [rcx+38h]      ; this is the instruction pointer
-    jmp   rdx                 ; goto saved address without altering rsp
-
-
-narch_coroSwitchStack:
-    pop rax                   ; return address
-    mov rdx, rsp              ; old rsp for saving
-    mov rsp, rcx              ; swap stack with one passed to func
-    push rdx                  ; store old stack pointer on newly switched stack
-    sub rsp, 28h              ; stack alignment + shadow space
-    jmp rax                   ; return
-
-
-narch_coroRestoreStack:
-    pop rax                   ; return address
-    add rsp, 28h              ; stack alignment + shadow space
-    pop rsp                   ; resture old stack pointer
-    jmp rax                   ; return
diff --git a/lib/arch/ms_i386.asm b/lib/arch/ms_i386.asm
deleted file mode 100644
index a31a698d1..000000000
--- a/lib/arch/ms_i386.asm
+++ /dev/null
@@ -1,12 +0,0 @@
-;
-;
-;            Nim's Runtime Library
-;        (c) Copyright 2015 Rokas Kupstys
-;
-;    See the file "copying.txt", included in this
-;    distribution, for details about the copyright.
-;
-
-format MS COFF
-
-include 'i386.asm'
diff --git a/lib/arch/unix_amd64.asm b/lib/arch/unix_amd64.asm
deleted file mode 100644
index 3005c150c..000000000
--- a/lib/arch/unix_amd64.asm
+++ /dev/null
@@ -1,89 +0,0 @@
-;
-;
-;            Nim's Runtime Library
-;        (c) Copyright 2015 Rokas Kupstys
-;
-;    See the file "copying.txt", included in this
-;    distribution, for details about the copyright.
-;
-
-format ELF64
-
-section ".text" executable align 16
-public narch_getRegisters
-public narch_setjmp
-public narch_longjmp
-public narch_coroSwitchStack
-public narch_coroRestoreStack
-
-
-narch_getRegisters:
-    mov   [rdi], rax
-    mov   [rdi+8], rbx
-    mov   [rdi+10h], rcx
-    mov   [rdi+18h], rdx
-    mov   [rdi+20h], rsi
-    mov   [rdi+28h], rdi
-    mov   [rdi+30h], rbp
-    mov   [rdi+38h], rsp
-    mov   rax, [rsp]
-    mov   [rdi+40h], rax      ; rip
-    mov   [rdi+48h], r8
-    mov   [rdi+50h], r9
-    mov   [rdi+58h], r10
-    mov   [rdi+60h], r11
-    mov   [rdi+68h], r12
-    mov   [rdi+70h], r13
-    mov   [rdi+78h], r14
-    mov   [rdi+80h], r15
-    ret
-
-
-narch_setjmp:
-    ; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
-    mov   [rdi], rbx          ; rdi is jmp_buf, move registers onto it
-    mov   [rdi+8], rbp
-    mov   [rdi+10h], r12
-    mov   [rdi+18h], r13
-    mov   [rdi+20h], r14
-    mov   [rdi+28h], r15
-    lea   rdx, [rsp+8]        ; this is our rsp WITHOUT current ret addr
-    mov   [rdi+30h], rdx
-    mov   rdx, [rsp]          ; save return addr ptr for new rip
-    mov   [rdi+38h], rdx
-    xor   rax, rax            ; always return 0
-    ret
-
-
-narch_longjmp:
-    ; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
-    mov   rax, rsi            ; val will be longjmp return
-    test  rax, rax
-    jnz   @F
-    inc   rax                 ; if val==0, val=1 per longjmp semantics
-@@:
-    mov   rbx, [rdi]          ; rdi is the jmp_buf, restore regs from it
-    mov   rbp, [rdi+8]
-    mov   r12, [rdi+10h]
-    mov   r13, [rdi+18h]
-    mov   r14, [rdi+20h]
-    mov   r15, [rdi+28h]
-    mov   rsp, [rdi+30h]      ; this ends up being the stack pointer
-    mov   rdx, [rdi+38h]      ; this is the instruction pointer
-    jmp   rdx                 ; goto saved address without altering rsp
-
-
-narch_coroSwitchStack:
-    pop rsi                   ; return address
-    mov rdx, rsp              ; old rsp for saving
-    mov rsp, rdi              ; swap stack with one passed to func
-    push rdx                  ; store old stack pointer on newly switched stack
-    sub rsp, 8h               ; stack alignment
-    jmp rsi                   ; return
-
-
-narch_coroRestoreStack:
-	pop rsi                   ; return address
-	add rsp, 8h               ; stack alignment
-	pop rsp                   ; resture old stack pointer
-	jmp rsi                   ; return
diff --git a/lib/arch/unix_i386.asm b/lib/arch/unix_i386.asm
deleted file mode 100644
index 278679067..000000000
--- a/lib/arch/unix_i386.asm
+++ /dev/null
@@ -1,12 +0,0 @@
-;
-;
-;            Nim's Runtime Library
-;        (c) Copyright 2015 Rokas Kupstys
-;
-;    See the file "copying.txt", included in this
-;    distribution, for details about the copyright.
-;
-
-format ELF
-
-include 'i386.asm'
diff --git a/lib/arch/x86/amd64.S b/lib/arch/x86/amd64.S
new file mode 100644
index 000000000..47a26f627
--- /dev/null
+++ b/lib/arch/x86/amd64.S
@@ -0,0 +1,96 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Rokas Kupstys
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+# Partially based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
+
+.globl narch_coroExecWithStack
+.globl narch_setjmp
+.globl narch_longjmp
+.text
+
+
+# SysV ABI - first argument is rdi.
+# MS ABI   - first argument is rcx.
+#if defined(__MINGW32__) || defined(__MINGW64__)
+  #define REG_ARG1 rcx
+  #define REG_ARG2 rdx
+#else
+  #define REG_ARG1 rdi
+  #define REG_ARG2 rsi
+#endif
+
+
+narch_coroExecWithStack:
+  mov  %REG_ARG2, %rsp        # swap stack with one passed to func
+  sub  $0x30, %rsp            # shadow space (for ms ABI) 0x20 + 0x10 for possible misalignment
+  and  $-0x10, %rsp           # 16-byte stack alignment
+  call *%REG_ARG1
+
+
+narch_setjmp:
+  add   $0x10, %REG_ARG1      # 16-byte alignment
+  and   $-0x10, %REG_ARG1
+  mov   %rbx, 0x00(%REG_ARG1) # jmp_buf, move registers onto it
+  mov   %rbp, 0x08(%REG_ARG1)
+  mov   %r12, 0x10(%REG_ARG1)
+  mov   %r13, 0x18(%REG_ARG1)
+  mov   %r14, 0x20(%REG_ARG1)
+  mov   %r15, 0x28(%REG_ARG1)
+  lea   0x08(%rsp), %rdx      # this is our rsp WITHOUT current ret addr
+  mov   %rdx, 0x30(%REG_ARG1)
+  mov   (%rsp), %rdx          # save return addr ptr for new rip
+  mov   %rdx, 0x38(%REG_ARG1)
+  mov   %rsi, 0x40(%REG_ARG1)
+  mov   %rdi, 0x48(%REG_ARG1)
+#if defined(__MINGW32__) || defined(__MINGW64__)
+  movaps %xmm6,  0x50(%REG_ARG1)
+  movaps %xmm7,  0x60(%REG_ARG1)
+  movaps %xmm8,  0x70(%REG_ARG1)
+  movaps %xmm9,  0x80(%REG_ARG1)
+  movaps %xmm10, 0x90(%REG_ARG1)
+  movaps %xmm11, 0xA0(%REG_ARG1)
+  movaps %xmm12, 0xB0(%REG_ARG1)
+  movaps %xmm13, 0xC0(%REG_ARG1)
+  movaps %xmm14, 0xD0(%REG_ARG1)
+  movaps %xmm15, 0xE0(%REG_ARG1)
+#endif
+  xor   %rax, %rax            # always return 0
+  ret
+
+
+narch_longjmp:
+  add   $0x10, %REG_ARG1      # 16-byte alignment
+  and   $-0x10, %REG_ARG1     #
+  mov   %REG_ARG2, %rax       # val will be longjmp return
+  test  %rax, %rax
+  jnz   narch_longjmp_1
+  inc   %rax                  # if val==0, val=1 per longjmp semantics
+narch_longjmp_1:
+  mov   0x00(%REG_ARG1), %rbx # jmp_buf, restore regs from it
+  mov   0x08(%REG_ARG1), %rbp
+  mov   0x10(%REG_ARG1), %r12
+  mov   0x18(%REG_ARG1), %r13
+  mov   0x20(%REG_ARG1), %r14
+  mov   0x28(%REG_ARG1), %r15
+  mov   0x30(%REG_ARG1), %rsp # this ends up being the stack pointer
+  mov   0x38(%REG_ARG1), %rdx # this is the instruction pointer
+  mov   0x40(%REG_ARG1), %rsi
+  mov   0x48(%REG_ARG1), %rdi
+#if defined(__MINGW32__) || defined(__MINGW64__)
+  movaps 0x50(%REG_ARG1), %xmm6
+  movaps 0x60(%REG_ARG1), %xmm7
+  movaps 0x70(%REG_ARG1), %xmm8
+  movaps 0x80(%REG_ARG1), %xmm9
+  movaps 0x90(%REG_ARG1), %xmm10
+  movaps 0xA0(%REG_ARG1), %xmm11
+  movaps 0xB0(%REG_ARG1), %xmm12
+  movaps 0xC0(%REG_ARG1), %xmm13
+  movaps 0xD0(%REG_ARG1), %xmm14
+  movaps 0xE0(%REG_ARG1), %xmm15
+#endif
+  jmp  *%rdx                  # goto saved address without altering rsp
diff --git a/lib/arch/x86/i386.S b/lib/arch/x86/i386.S
new file mode 100644
index 000000000..d7de4a4c3
--- /dev/null
+++ b/lib/arch/x86/i386.S
@@ -0,0 +1,64 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Rokas Kupstys
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+# Partially based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
+
+.globl narch_coroExecWithStack
+.globl narch_setjmp
+.globl narch_longjmp
+#if defined(__MINGW32__) || defined(__MINGW64__)
+.globl @narch_coroExecWithStack@8
+.globl @narch_setjmp@4
+.globl @narch_longjmp@8
+#endif
+.text
+
+
+#if defined(__MINGW32__) || defined(__MINGW64__)
+@narch_coroExecWithStack@8:
+#endif
+narch_coroExecWithStack:
+  mov  %edx, %esp            # swap stack with one passed to func
+  sub  $0x10, %esp           # 16-byte alignment
+  and  $-0x10, %esp          #
+  sub  $4, %esp              # Simulate misalignment caused by return addr
+  jmp *%ecx
+
+
+#if defined(__MINGW32__) || defined(__MINGW64__)
+@narch_setjmp@4:
+#endif
+narch_setjmp:
+    mov  %ebx, (%ecx)
+    mov  %esi, 0x04(%ecx)
+    mov  %edi, 0x08(%ecx)
+    mov  %ebp, 0x0C(%ecx)
+    lea  0x04(%esp), %eax
+    mov  %eax, 0x10(%ecx)
+    mov  (%esp), %eax
+    mov  %eax, 0x14(%ecx)
+    xor  %eax, %eax
+    ret
+
+
+#if defined(__MINGW32__) || defined(__MINGW64__)
+@narch_longjmp@8:
+#endif
+narch_longjmp:
+    mov  %edx, %eax
+    test %eax, %eax
+    jnz  narch_longjmp_1
+    inc  %eax
+narch_longjmp_1:
+    mov  (%ecx), %ebx
+    mov  0x04(%ecx), %esi
+    mov  0x08(%ecx), %edi
+    mov  0x0C(%ecx), %ebp
+    mov  0x10(%ecx), %esp
+    mov  0x14(%ecx), %edx
+    jmp *%edx
diff --git a/lib/pure/coro.nim b/lib/pure/coro.nim
index 0373708d0..e053f4427 100644
--- a/lib/pure/coro.nim
+++ b/lib/pure/coro.nim
@@ -6,138 +6,291 @@
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
 #
+## Nim coroutines implementation supports several context switching methods:
+## ucontext: available on unix and alike (default)
+## setjmp:   available on unix and alike (x86/64 only)
+## Fibers:   available and required on windows.
+##
+## -d:nimCoroutines              Required to build this module.
+## -d:nimCoroutinesUcontext      Use ucontext backend.
+## -d:nimCoroutinesSetjmp        Use setjmp backend.
+## -d:nimCoroutinesSetjmpBundled Use bundled setjmp implementation.
 
-when not defined(nimCoroutines) and not defined(nimdoc):
-  {.error: "Coroutines require -d:nimCoroutines".}
+when not nimCoroutines and not defined(nimdoc):
+  when defined(noNimCoroutines):
+    {.error: "Coroutines can not be used with -d:noNimCoroutines"}
+  else:
+    {.error: "Coroutines require -d:nimCoroutines".}
 
-import os, times
+import os
 import macros
-import arch
 import lists
+include system/timers
 
 const defaultStackSize = 512 * 1024
 
-type Coroutine = ref object
-  # prev: ptr Coroutine
-  # next: ptr Coroutine
-  ctx: JmpBuf
-  fn: proc()
-  started: bool
-  lastRun: float
-  sleepTime: float
-  stack: pointer
-  stacksize: int
+proc GC_addStack(bottom: pointer) {.cdecl, importc.}
+proc GC_removeStack(bottom: pointer) {.cdecl, importc.}
+proc GC_setActiveStack(bottom: pointer) {.cdecl, importc.}
 
-var coroutines = initDoublyLinkedList[Coroutine]()
-var current: Coroutine
-var mainCtx: JmpBuf
+const
+  CORO_BACKEND_UCONTEXT = 0
+  CORO_BACKEND_SETJMP = 1
+  CORO_BACKEND_FIBERS = 2
 
+when defined(windows):
+  const coroBackend = CORO_BACKEND_FIBERS
+  when defined(nimCoroutinesUcontext):
+    {.warning: "ucontext coroutine backend is not available on windows, defaulting to fibers.".}
+  when defined(nimCoroutinesSetjmp):
+    {.warning: "setjmp coroutine backend is not available on windows, defaulting to fibers.".}
+elif defined(nimCoroutinesSetjmp) or defined(nimCoroutinesSetjmpBundled):
+  const coroBackend = CORO_BACKEND_SETJMP
+else:
+  const coroBackend = CORO_BACKEND_UCONTEXT
 
-proc GC_addStack(starts: pointer) {.cdecl, importc.}
-proc GC_removeStack(starts: pointer) {.cdecl, importc.}
-proc GC_setCurrentStack(starts, pos: pointer) {.cdecl, importc.}
+when coroBackend == CORO_BACKEND_FIBERS:
+  import windows.winlean
+  type
+    Context = pointer
 
-proc start*(c: proc(), stacksize: int=defaultStackSize) =
-  ## Adds coroutine to event loop. It does not run immediately.
-  var coro = Coroutine()
-  coro.fn = c
-  while coro.stack == nil:
-    coro.stack = alloc0(stacksize)
-  coro.stacksize = stacksize
-  coroutines.append(coro)
+elif coroBackend == CORO_BACKEND_UCONTEXT:
+  type
+    stack_t {.importc, header: "<sys/ucontext.h>".} = object
+      ss_sp: pointer
+      ss_flags: int
+      ss_size: int
+
+    ucontext_t {.importc, header: "<sys/ucontext.h>".} = object
+      uc_link: ptr ucontext_t
+      uc_stack: stack_t
+
+    Context = ucontext_t
+
+  proc getcontext(context: var ucontext_t): int32 {.importc, header: "<sys/ucontext.h>".}
+  proc setcontext(context: var ucontext_t): int32 {.importc, header: "<sys/ucontext.h>".}
+  proc swapcontext(fromCtx, toCtx: var ucontext_t): int32 {.importc, header: "<sys/ucontext.h>".}
+  proc makecontext(context: var ucontext_t, fn: pointer, argc: int32) {.importc, header: "<sys/ucontext.h>", varargs.}
+
+elif coroBackend == CORO_BACKEND_SETJMP:
+  proc coroExecWithStack*(fn: pointer, stack: pointer) {.noreturn, importc: "narch_$1", fastcall.}
+  when defined(amd64):
+    {.compile: "../arch/x86/amd64.S".}
+  elif defined(i386):
+    {.compile: "../arch/x86/i386.S".}
+  else:
+    # coroExecWithStack is defined in assembly. To support other platforms
+    # please provide implementation of this procedure.
+    {.error: "Unsupported architecture.".}
+
+  when defined(nimCoroutinesSetjmpBundled):
+    # Use setjmp/longjmp implementation shipped with compiler.
+    when defined(amd64):
+      type
+        JmpBuf = array[0x50 + 0x10, uint8]
+    elif defined(i386):
+      type
+        JmpBuf = array[0x1C, uint8]
+    else:
+      # Bundled setjmp/longjmp are defined in assembly. To support other
+      # platforms please provide implementations of these procedures.
+      {.error: "Unsupported architecture.".}
+
+    proc setjmp(ctx: var JmpBuf): int {.importc: "narch_$1".}
+    proc longjmp(ctx: JmpBuf, ret=1) {.importc: "narch_$1".}
+  else:
+    # Use setjmp/longjmp implementation provided by the system.
+    type
+      JmpBuf {.importc: "jmp_buf", header: "<setjmp.h>".} = object
+    
+    proc setjmp(ctx: var JmpBuf): int {.importc, header: "<setjmp.h>".}
+    proc longjmp(ctx: JmpBuf, ret=1) {.importc, header: "<setjmp.h>".}
+
+  type
+    Context = JmpBuf
+
+when defined(unix):
+  # GLibc fails with "*** longjmp causes uninitialized stack frame ***" because
+  # our custom stacks are not initialized to a magic value.
+  {.passC: "-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0"}
+
+const
+  CORO_CREATED = 0
+  CORO_EXECUTING = 1
+  CORO_FINISHED = 2
+
+type
+  Stack = object
+    top: pointer      # Top of the stack. Pointer used for deallocating stack if we own it.
+    bottom: pointer   # Very bottom of the stack, acts as unique stack identifier.
+    size: int
+
+  Coroutine = ref object
+    execContext: Context
+    fn: proc()
+    state: int
+    lastRun: Ticks
+    sleepTime: float
+    stack: Stack
+
+  CoroutineLoopContext = ref object
+    coroutines: DoublyLinkedList[Coroutine]
+    current: DoublyLinkedNode[Coroutine]
+    loop: Coroutine
+
+var ctx {.threadvar.}: CoroutineLoopContext
+
+proc getCurrent(): Coroutine =
+  ## Returns current executing coroutine object.
+  var node = ctx.current
+  if node != nil:
+    return node.value
+  return nil
+
+proc initialize() =
+  ## Initializes coroutine state of current thread.
+  if ctx == nil:
+    ctx = CoroutineLoopContext()
+    ctx.coroutines = initDoublyLinkedList[Coroutine]()
+    ctx.loop = Coroutine()
+    ctx.loop.state = CORO_EXECUTING
+    when coroBackend == CORO_BACKEND_FIBERS:
+      ctx.loop.execContext = ConvertThreadToFiberEx(nil, FIBER_FLAG_FLOAT_SWITCH)
+
+proc runCurrentTask()
+
+proc switchTo(current, to: Coroutine) =
+  ## Switches execution from `current` into `to` context.
+  to.lastRun = getTicks()
+  # Update position of current stack so gc invoked from another stack knows how much to scan.
+  GC_setActiveStack(current.stack.bottom)
+  var frame = getFrameState()
+  block:
+    # Execution will switch to another fiber now. We do not need to update current stack
+    when coroBackend == CORO_BACKEND_FIBERS:
+      SwitchToFiber(to.execContext)
+    elif coroBackend == CORO_BACKEND_UCONTEXT:
+      discard swapcontext(current.execContext, to.execContext)
+    elif coroBackend == CORO_BACKEND_SETJMP:
+      var res = setjmp(current.execContext)
+      if res == 0:
+        if to.state == CORO_EXECUTING:
+          # Coroutine is resumed.
+          longjmp(to.execContext, 1)
+        elif to.state == CORO_CREATED:
+          # Coroutine is started.
+          coroExecWithStack(runCurrentTask, to.stack.bottom)
+          doAssert false
+    else:
+      {.error: "Invalid coroutine backend set.".}
+  # Execution was just resumed. Restore frame information and set active stack.
+  setFrameState(frame)
+  GC_setActiveStack(current.stack.bottom)
 
-{.push stackTrace: off.}
 proc suspend*(sleepTime: float=0) =
   ## Stops coroutine execution and resumes no sooner than after ``sleeptime`` seconds.
   ## Until then other coroutines are executed.
-  ##
-  ## This is similar to a `yield`:idx:, or a `yieldFrom`:idx in Python.
-  var oldFrame = getFrame()
-  var sp {.volatile.}: pointer
-  GC_setCurrentStack(current.stack, cast[pointer](addr sp))
+  var current = getCurrent()
   current.sleepTime = sleepTime
-  current.lastRun = epochTime()
-  if setjmp(current.ctx) == 0:
-    longjmp(mainCtx, 1)
-  setFrame(oldFrame)
-{.pop.}
+  switchTo(current, ctx.loop)
+
+proc runCurrentTask() =
+  ## Starts execution of current coroutine and updates it's state through coroutine's life.
+  var sp {.volatile.}: pointer
+  sp = addr(sp)
+  block:
+    var current = getCurrent()
+    current.stack.bottom = sp
+    # Execution of new fiber just started. Since it was entered not through `switchTo` we
+    # have to set active stack here as well. GC_removeStack() has to be called in main loop
+    # because we still need stack available in final suspend(0) call from which we will not
+    # return.
+    GC_addStack(sp)
+    # Activate current stack because we are executing in a new coroutine.
+    GC_setActiveStack(sp)
+    current.state = CORO_EXECUTING
+    try:
+      current.fn()                    # Start coroutine execution
+    except:
+      echo "Unhandled exception in coroutine."
+      writeStackTrace()
+    current.state = CORO_FINISHED
+  suspend(0)                      # Exit coroutine without returning from coroExecWithStack()
+  doAssert false
+
+proc start*(c: proc(), stacksize: int=defaultStackSize) =
+  ## Schedule coroutine for execution. It does not run immediately.
+  if ctx == nil:
+    initialize()
+  
+  var coro = Coroutine()
+  coro.fn = c
+  when coroBackend == CORO_BACKEND_FIBERS:
+    coro.execContext = CreateFiberEx(stacksize, stacksize,
+      FIBER_FLAG_FLOAT_SWITCH, (proc(p: pointer): void {.stdcall.} = runCurrentTask()), nil)
+    coro.stack.size = stacksize
+  else:
+    var stack: pointer
+    while stack == nil:
+      stack = alloc0(stacksize)
+    coro.stack.top = stack
+    when coroBackend == CORO_BACKEND_UCONTEXT:
+      discard getcontext(coro.execContext)
+      coro.execContext.uc_stack.ss_sp = cast[pointer](cast[ByteAddress](stack) + stacksize)
+      coro.execContext.uc_stack.ss_size = coro.stack.size
+      coro.execContext.uc_link = addr ctx.loop.execContext
+      makecontext(coro.execContext, runCurrentTask, 0)
+  coro.stack.size = stacksize
+  coro.state = CORO_CREATED
+  ctx.coroutines.append(coro)
 
 proc run*() =
-  ## Starts main event loop which exits when all coroutines exit. Calling this proc
-  ## starts execution of first coroutine.
-  var node = coroutines.head
-  var minDelay: int = 0 # in milliseconds
-  var frame: PFrame
-  while node != nil:
-    var coro = node.value
-    current = coro
-    os.sleep(minDelay)
-
-    var remaining = int((coro.sleepTime - (epochTime() - coro.lastRun)) * 1000)
-    if remaining <= 0:
-      remaining = 0
-      let res = setjmp(mainCtx)
-      if res == 0:
-        frame = getFrame()
-        if coro.started:            # coroutine resumes
-          longjmp(coro.ctx, 1)
-        else:
-          coro.started = true       # coroutine starts
-          var stackEnd = cast[pointer](cast[ByteAddress](coro.stack) + coro.stacksize)
-          GC_addStack(coro.stack)
-          coroSwitchStack(stackEnd)
-          coro.fn()
-          coroRestoreStack()
-          GC_removeStack(coro.stack)
-          var next = node.prev
-          coroutines.remove(node)
-          dealloc(coro.stack)
-          node = next
-          setFrame(frame)
-      else:
-        setFrame(frame)
+  initialize()
+  ## Starts main coroutine scheduler loop which exits when all coroutines exit.
+  ## Calling this proc starts execution of first coroutine.
+  ctx.current = ctx.coroutines.head
+  var minDelay: float = 0
+  while ctx.current != nil:
+    var current = getCurrent()
 
-    elif remaining > 0:
+    var remaining = current.sleepTime - (float(getTicks() - current.lastRun) / 1_000_000_000)
+    if remaining <= 0:
+      # Save main loop context. Suspending coroutine will resume after this statement with
+      switchTo(ctx.loop, current)
+    else:
       if minDelay > 0 and remaining > 0:
         minDelay = min(remaining, minDelay)
       else:
         minDelay = remaining
 
-    if node == nil or node.next == nil:
-      node = coroutines.head
+    if current.state == CORO_FINISHED:
+      var next = ctx.current.prev
+      if next == nil:
+        # If first coroutine ends then `prev` is nil even if more coroutines 
+        # are to be scheduled.
+        next = ctx.current.next
+      ctx.coroutines.remove(ctx.current)
+      GC_removeStack(current.stack.bottom)
+      when coroBackend == CORO_BACKEND_FIBERS:
+        DeleteFiber(current.execContext)
+      else:
+        dealloc(current.stack.top)
+      current.stack.top = nil
+      current.stack.bottom = nil
+      ctx.current = next
+    elif ctx.current == nil or ctx.current.next == nil:
+      ctx.current = ctx.coroutines.head
+      os.sleep(int(minDelay * 1000))
     else:
-      node = node.next
+      ctx.current = ctx.current.next
 
 proc alive*(c: proc()): bool =
   ## Returns ``true`` if coroutine has not returned, ``false`` otherwise.
-  for coro in items(coroutines):
+  for coro in items(ctx.coroutines):
     if coro.fn == c:
-      return true
+      return coro.state != CORO_FINISHED
 
 proc wait*(c: proc(), interval=0.01) =
   ## Returns only after coroutine ``c`` has returned. ``interval`` is time in seconds how often.
   while alive(c):
-    suspend interval
-
-when defined(nimCoroutines) and isMainModule:
-  var stackCheckValue = 1100220033
-  proc c2()
-
-  proc c1() =
-    for i in 0 .. 3:
-      echo "c1"
-      suspend 0.05
-    echo "c1 exits"
-
-
-  proc c2() =
-    for i in 0 .. 3:
-      echo "c2"
-      suspend 0.025
-    wait(c1)
-    echo "c2 exits"
-
-  start(c1)
-  start(c2)
-  run()
-  echo "done ", stackCheckValue
+    suspend(interval)
diff --git a/lib/system.nim b/lib/system.nim
index 4371f0000..74dca461a 100644
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -2462,6 +2462,26 @@ template accumulateResult*(iter: untyped) =
 # we have to compute this here before turning it off in except.nim anyway ...
 const NimStackTrace = compileOption("stacktrace")
 
+template coroutinesSupportedPlatform(): bool =
+  when defined(sparc) or defined(ELATE) or compileOption("gc", "v2") or 
+    defined(boehmgc) or defined(gogc) or defined(nogc) or defined(gcStack) or 
+    defined(gcMarkAndSweep):
+    false
+  else:
+    true
+
+when defined(nimCoroutines):
+  # Explicit opt-in.
+  when not coroutinesSupportedPlatform():
+    {.error: "Coroutines are not supported on this architecture and/or garbage collector.".}
+  const nimCoroutines* = true
+elif defined(noNimCoroutines):
+  # Explicit opt-out.
+  const nimCoroutines* = false
+else:
+  # Autodetect coroutine support.
+  const nimCoroutines* = false
+
 {.push checks: off.}
 # obviously we cannot generate checking operations here :-)
 # because it would yield into an endless recursion
diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim
index be41a63a7..bae5de9d3 100644
--- a/lib/system/excpt.nim
+++ b/lib/system/excpt.nim
@@ -45,6 +45,17 @@ var
     # a global variable for the root of all try blocks
   currException {.threadvar.}: ref Exception
 
+type
+  FrameState = tuple[framePtr: PFrame, excHandler: PSafePoint, currException: ref Exception]
+
+proc getFrameState*(): FrameState {.compilerRtl, inl.} =
+  return (framePtr, excHandler, currException)
+
+proc setFrameState*(state: FrameState) {.compilerRtl, inl.} =
+  framePtr = state.framePtr
+  excHandler = state.excHandler
+  currException = state.currException
+
 proc getFrame*(): PFrame {.compilerRtl, inl.} = framePtr
 
 proc popFrame {.compilerRtl, inl.} =
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index 703146484..8db60ab0f 100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -12,9 +12,6 @@
 # Refcounting + Mark&Sweep. Complex algorithms avoided.
 # Been there, done that, didn't work.
 
-when defined(nimCoroutines):
-  import arch
-
 {.push profiler:off.}
 
 const
@@ -66,17 +63,24 @@ type
     cycleTableSize: int      # max entries in cycle table
     maxPause: int64          # max measured GC pause in nanoseconds
 
-  GcStack {.final.} = object
-    prev: ptr GcStack
-    next: ptr GcStack
-    starts: pointer
-    pos: pointer
-    maxStackSize: int
+  GcStack {.final, pure.} = object
+    when nimCoroutines:
+      prev: ptr GcStack
+      next: ptr GcStack
+      maxStackSize: int      # Used to track statistics because we can not use
+                             # GcStat.maxStackSize when multiple stacks exist.
+    bottom: pointer
+
+    when withRealTime or nimCoroutines:
+      pos: pointer           # Used with `withRealTime` only for code clarity, see GC_Step().
+    when withRealTime:
+      bottomSaved: pointer
 
   GcHeap {.final, pure.} = object # this contains the zero count and
-                                   # non-zero count table
-    stack: ptr GcStack
-    stackBottom: pointer
+                                  # non-zero count table
+    stack: GcStack
+    when nimCoroutines:
+      activeStack: ptr GcStack    # current executing coroutine stack.
     cycleThreshold: int
     when useCellIds:
       idGenerator: int
@@ -823,7 +827,10 @@ proc collectCTBody(gch: var GcHeap) =
     let t0 = getticks()
   sysAssert(allocInv(gch.region), "collectCT: begin")
 
-  when not defined(nimCoroutines):
+  when nimCoroutines:
+    for stack in gch.stack.items():
+      gch.stat.maxStackSize = max(gch.stat.maxStackSize, stack.stackSize())
+  else:
     gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
   sysAssert(gch.decStack.len == 0, "collectCT")
   prepareForInteriorPointerChecking(gch.region)
@@ -849,19 +856,11 @@ proc collectCTBody(gch: var GcHeap) =
       if gch.maxPause > 0 and duration > gch.maxPause:
         c_fprintf(stdout, "[GC] missed deadline: %ld\n", duration)
 
-when defined(nimCoroutines):
-  proc currentStackSizes(): int =
-    for stack in items(gch.stack):
-      result = result + stackSize(stack.starts, stack.pos)
-
 proc collectCT(gch: var GcHeap) =
   # stackMarkCosts prevents some pathological behaviour: Stack marking
   # becomes more expensive with large stacks and large stacks mean that
   # cells with RC=0 are more likely to be kept alive by the stack.
-  when defined(nimCoroutines):
-    let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold)
-  else:
-    let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
+  let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
   if (gch.zct.len >= stackMarkCosts or (cycleGC and
       getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
       gch.recGcLock == 0:
@@ -888,18 +887,24 @@ when withRealTime:
     release(gch)
 
   proc GC_step*(us: int, strongAdvice = false, stackSize = -1) {.noinline.} =
-    var stackTop {.volatile.}: pointer
-    let prevStackBottom = gch.stackBottom
     if stackSize >= 0:
-      stackTop = addr(stackTop)
-      when stackIncreases:
-        gch.stackBottom = cast[pointer](
-          cast[ByteAddress](stackTop) - sizeof(pointer) * 6 - stackSize)
-      else:
-        gch.stackBottom = cast[pointer](
-          cast[ByteAddress](stackTop) + sizeof(pointer) * 6 + stackSize)
+      var stackTop {.volatile.}: pointer
+      gch.getActiveStack().pos = addr(stackTop)
+
+      for stack in gch.stack.items():
+        stack.bottomSaved = stack.bottom
+        when stackIncreases:
+          stack.bottom = cast[pointer](
+            cast[ByteAddress](stack.pos) - sizeof(pointer) * 6 - stackSize)
+        else:
+          stack.bottom = cast[pointer](
+            cast[ByteAddress](stack.pos) + sizeof(pointer) * 6 + stackSize)
+
     GC_step(gch, us, strongAdvice)
-    gch.stackBottom = prevStackBottom
+
+    if stackSize >= 0:
+      for stack in gch.stack.items():
+        stack.bottom = stack.bottomSaved
 
 when not defined(useNimRtl):
   proc GC_disable() =
@@ -943,10 +948,10 @@ when not defined(useNimRtl):
              "[GC] zct capacity: " & $gch.zct.cap & "\n" &
              "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
              "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) & "\n"
-    when defined(nimCoroutines):
+    when nimCoroutines:
       result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
       for stack in items(gch.stack):
-        result = result & "[GC]   stack " & stack.starts.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
+        result = result & "[GC]   stack " & stack.bottom.repr & "[GC]     max stack size " & cast[pointer](stack.maxStackSize).repr & "\n"
     else:
       result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
     GC_enable()
diff --git a/lib/system/gc2.nim b/lib/system/gc2.nim
index ce2bfc2ae..083c06fe3 100644
--- a/lib/system/gc2.nim
+++ b/lib/system/gc2.nim
@@ -15,9 +15,6 @@
 
 # XXX Ensure by smart color masking that the object is not in the ZCT.
 
-when defined(nimCoroutines):
-  import arch
-
 {.push profiler:off.}
 
 const
@@ -72,19 +69,26 @@ type
     maxStackCells: int       # max stack cells in ``decStack``
     cycleTableSize: int      # max entries in cycle table
     maxPause: int64          # max measured GC pause in nanoseconds
-
-  GcStack = object
-    prev: ptr GcStack
-    next: ptr GcStack
-    starts: pointer
-    pos: pointer
-    maxStackSize: int
+  
+  GcStack {.final, pure.} = object
+    when nimCoroutines:
+      prev: ptr GcStack
+      next: ptr GcStack
+      maxStackSize: int      # Used to track statistics because we can not use
+                             # GcStat.maxStackSize when multiple stacks exist.
+    bottom: pointer
+
+    when withRealTime or nimCoroutines:
+      pos: pointer           # Used with `withRealTime` only for code clarity, see GC_Step().
+    when withRealTime:
+      bottomSaved: pointer
 
   GcHeap = object # this contains the zero count and
                   # non-zero count table
     black, red: int # either 0 or 1.
-    stack: ptr GcStack
-    stackBottom: pointer
+    stack: GcStack
+    when nimCoroutines:
+      activeStack: ptr GcStack    # current executing coroutine stack.
     phase: Phase
     cycleThreshold: int
     when useCellIds:
@@ -913,7 +917,7 @@ proc collectCTBody(gch: var GcHeap) =
     let t0 = getticks()
   sysAssert(allocInv(gch.region), "collectCT: begin")
 
-  when not defined(nimCoroutines):
+  when not nimCoroutines:
     gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
   sysAssert(gch.decStack.len == 0, "collectCT")
   prepareForInteriorPointerChecking(gch.region)
@@ -938,16 +942,16 @@ proc collectCTBody(gch: var GcHeap) =
       if gch.maxPause > 0 and duration > gch.maxPause:
         c_fprintf(stdout, "[GC] missed deadline: %ld\n", duration)
 
-when defined(nimCoroutines):
+when nimCoroutines:
   proc currentStackSizes(): int =
     for stack in items(gch.stack):
-      result = result + stackSize(stack.starts, stack.pos)
+      result = result + stack.stackSize()
 
 proc collectCT(gch: var GcHeap) =
   # stackMarkCosts prevents some pathological behaviour: Stack marking
   # becomes more expensive with large stacks and large stacks mean that
   # cells with RC=0 are more likely to be kept alive by the stack.
-  when defined(nimCoroutines):
+  when nimCoroutines:
     let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold)
   else:
     let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
@@ -971,18 +975,24 @@ when withRealTime:
       collectCTBody(gch)
 
   proc GC_step*(us: int, strongAdvice = false, stackSize = -1) {.noinline.} =
-    var stackTop {.volatile.}: pointer
-    let prevStackBottom = gch.stackBottom
     if stackSize >= 0:
-      stackTop = addr(stackTop)
-      when stackIncreases:
-        gch.stackBottom = cast[pointer](
-          cast[ByteAddress](stackTop) - sizeof(pointer) * 6 - stackSize)
-      else:
-        gch.stackBottom = cast[pointer](
-          cast[ByteAddress](stackTop) + sizeof(pointer) * 6 + stackSize)
+      var stackTop {.volatile.}: pointer
+      gch.getActiveStack().pos = addr(stackTop)
+
+      for stack in gch.stack.items():
+        stack.bottomSaved = stack.bottom
+        when stackIncreases:
+          stack.bottom = cast[pointer](
+            cast[ByteAddress](stack.pos) - sizeof(pointer) * 6 - stackSize)
+        else:
+          stack.bottom = cast[pointer](
+            cast[ByteAddress](stack.pos) + sizeof(pointer) * 6 + stackSize)
+
     GC_step(gch, us, strongAdvice)
-    gch.stackBottom = prevStackBottom
+
+    if stackSize >= 0:
+      for stack in gch.stack.items():
+        stack.bottom = stack.bottomSaved
 
 when not defined(useNimRtl):
   proc GC_disable() =
@@ -1024,10 +1034,10 @@ when not defined(useNimRtl):
              "[GC] zct capacity: " & $gch.zct.cap & "\n" &
              "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
              "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
-    when defined(nimCoroutines):
+    when nimCoroutines:
       result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
       for stack in items(gch.stack):
-        result = result & "[GC]   stack " & stack.starts.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
+        result = result & "[GC]   stack " & stack.bottom.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
     else:
       result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
     GC_enable()
diff --git a/lib/system/gc_common.nim b/lib/system/gc_common.nim
index 6ab6bd920..e3b861fad 100644
--- a/lib/system/gc_common.nim
+++ b/lib/system/gc_common.nim
@@ -57,76 +57,96 @@ proc isNotForeign*(x: ForeignCell): bool =
   ## No deep copy has to be performed then.
   x.owner == addr(gch)
 
-proc len(stack: ptr GcStack): int =
-  if stack == nil:
-    return 0
-
-  var s = stack
-  result = 1
-  while s.next != nil:
-    inc(result)
-    s = s.next
-
-when defined(nimCoroutines):
-  proc stackSize(stackBottom: pointer, pos: pointer=nil): int {.noinline.} =
-    var sp: pointer
-    if pos == nil:
-      var stackTop {.volatile.}: pointer
-      sp = addr(stackTop)
-    else:
-      sp = pos
-    result = abs(cast[int](sp) - cast[int](stackBottom))
-
-  proc GC_addStack*(starts: pointer) {.cdecl, exportc.} =
-    var sp {.volatile.}: pointer
-    var stack = cast[ptr GcStack](alloc0(sizeof(GcStack)))
-    stack.starts = starts
-    stack.pos = addr sp
-    if gch.stack == nil:
-      gch.stack = stack
-    else:
-      stack.next = gch.stack
-      gch.stack.prev = stack
-      gch.stack = stack
-    # c_fprintf(stdout, "[GC] added stack 0x%016X\n", starts)
-
-  proc GC_removeStack*(starts: pointer) {.cdecl, exportc.} =
-    var stack = gch.stack
-    while stack != nil:
-      if stack.starts == starts:
-        if stack.prev == nil:
-          if stack.next != nil:
-            stack.next.prev = nil
-          gch.stack = stack.next
-        else:
-          stack.prev.next = stack.next
-          if stack.next != nil:
-              stack.next.prev = stack.prev
-        dealloc(stack)
-        # echo "[GC] removed stack ", starts.repr
+when nimCoroutines:
+  iterator items(first: var GcStack): ptr GcStack =
+    var item = addr(first)
+    while true:
+      yield item
+      item = item.next
+      if item == addr(first):
         break
-      else:
-        stack = stack.next
-
-  proc GC_setCurrentStack*(starts, pos: pointer) {.cdecl, exportc.} =
-    var stack = gch.stack
-    while stack != nil:
-      if stack.starts == starts:
-        stack.pos = pos
-        stack.maxStackSize = max(stack.maxStackSize, stackSize(stack.starts, pos))
-        return
-      stack = stack.next
-    gcAssert(false, "Current stack position does not belong to registered stack")
+
+  proc append(first: var GcStack, stack: ptr GcStack) =
+    ## Append stack to the ring of stacks.
+    first.prev.next = stack
+    stack.prev = first.prev
+    first.prev = stack
+    stack.next = addr(first)
+
+  proc append(first: var GcStack): ptr GcStack =
+    ## Allocate new GcStack object, append it to the ring of stacks and return it.
+    result = cast[ptr GcStack](alloc0(sizeof(GcStack)))
+    first.append(result)
+
+  proc remove(first: var GcStack, stack: ptr GcStack) =
+    ## Remove stack from ring of stacks.
+    gcAssert(addr(first) != stack, "Main application stack can not be removed")
+    if addr(first) == stack or stack == nil:
+      return
+    stack.prev.next = stack.next
+    stack.next.prev = stack.prev
+    dealloc(stack)
+
+  proc remove(stack: ptr GcStack) =
+    gch.stack.remove(stack)
+
+  proc find(first: var GcStack, bottom: pointer): ptr GcStack =
+    ## Find stack struct based on bottom pointer. If `bottom` is nil then main
+    ## thread stack is is returned.
+    if bottom == nil:
+      return addr(gch.stack)
+
+    for stack in first.items():
+      if stack.bottom == bottom:
+        return stack
+
+  proc len(stack: var GcStack): int =
+    for _ in stack.items():
+      result = result + 1
 else:
-  proc stackSize(): int {.noinline.} =
-    var stackTop {.volatile.}: pointer
-    result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
+  # This iterator gets optimized out in forEachStackSlot().
+  iterator items(first: var GcStack): ptr GcStack = yield addr(first)
+  proc len(stack: var GcStack): int = 1
+
+proc stackSize(stack: ptr GcStack): int {.noinline.} =
+  when nimCoroutines:
+    var pos = stack.pos
+  else:
+    var pos {.volatile.}: pointer
+    pos = addr(pos)
+
+  if pos != nil:
+    when defined(stackIncreases):
+      result = cast[ByteAddress](pos) -% cast[ByteAddress](stack.bottom)
+    else:
+      result = cast[ByteAddress](stack.bottom) -% cast[ByteAddress](pos)
+  else:
+    result = 0
 
-iterator items(stack: ptr GcStack): ptr GcStack =
-  var s = stack
-  while not isNil(s):
-    yield s
-    s = s.next
+proc stackSize(): int {.noinline.} =
+  for stack in gch.stack.items():
+    result = result + stack.stackSize()
+
+when nimCoroutines:
+  proc setPosition(stack: ptr GcStack, position: pointer) =
+    stack.pos = position
+    stack.maxStackSize = max(stack.maxStackSize, stack.stackSize())
+
+  proc setPosition(stack: var GcStack, position: pointer) =
+    setPosition(addr(stack), position)
+
+  proc getActiveStack(gch: var GcHeap): ptr GcStack =
+    return gch.activeStack
+
+  proc isActiveStack(stack: ptr GcStack): bool =
+    return gch.activeStack == stack
+else:
+  # Stack positions do not need to be tracked if coroutines are not used.
+  proc setPosition(stack: ptr GcStack, position: pointer) = discard
+  proc setPosition(stack: var GcStack, position: pointer) = discard
+  # There is just one stack - main stack of the thread. It is active always.
+  proc getActiveStack(gch: var GcHeap): ptr GcStack = addr(gch.stack)
+  proc isActiveStack(stack: ptr GcStack): bool = true
 
 when declared(threadType):
   proc setupForeignThreadGc*() {.gcsafe.} =
@@ -177,37 +197,69 @@ elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
 else:
   const stackIncreases = false
 
+{.push stack_trace: off.}
+when nimCoroutines:
+  proc GC_addStack(bottom: pointer) {.cdecl, exportc.} =
+    # c_fprintf(stdout, "GC_addStack: %p;\n", bottom)
+    var stack = gch.stack.append()
+    stack.bottom = bottom
+    stack.setPosition(bottom)
+
+  proc GC_removeStack(bottom: pointer) {.cdecl, exportc.} =
+    # c_fprintf(stdout, "GC_removeStack: %p;\n", bottom)
+    gch.stack.find(bottom).remove()
+
+  proc GC_setActiveStack(bottom: pointer) {.cdecl, exportc.} =
+    ## Sets active stack and updates current stack position.
+    # c_fprintf(stdout, "GC_setActiveStack: %p;\n", bottom)
+    var sp {.volatile.}: pointer
+    gch.activeStack = gch.stack.find(bottom)
+    gch.activeStack.setPosition(addr(sp))
+
 when not defined(useNimRtl):
-  {.push stack_trace: off.}
   proc setStackBottom(theStackBottom: pointer) =
-    #c_fprintf(stdout, "stack bottom: %p;\n", theStackBottom)
-    # the first init must be the one that defines the stack bottom:
-    when defined(nimCoroutines):
-      GC_addStack(theStackBottom)
-    else:
-      if gch.stackBottom == nil: gch.stackBottom = theStackBottom
+    # Initializes main stack of the thread.
+    when nimCoroutines:
+      if gch.stack.next == nil:
+        # Main stack was not initialized yet
+        gch.stack.next = addr(gch.stack)
+        gch.stack.prev = addr(gch.stack)
+        gch.stack.bottom = theStackBottom
+        gch.stack.maxStackSize = 0
+        gch.activeStack = addr(gch.stack)
+
+    if gch.stack.bottom == nil:
+      # This branch will not be called when -d:nimCoroutines - it is fine,
+      # because same thing is done just above.
+      #c_fprintf(stdout, "stack bottom: %p;\n", theStackBottom)
+      # the first init must be the one that defines the stack bottom:
+      gch.stack.bottom = theStackBottom
+    elif theStackBottom != gch.stack.bottom:
+      var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
+      var b = cast[ByteAddress](gch.stack.bottom)
+      #c_fprintf(stdout, "old: %p new: %p;\n",gch.stack.bottom,theStackBottom)
+      when stackIncreases:
+        gch.stack.bottom = cast[pointer](min(a, b))
       else:
-        var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
-        var b = cast[ByteAddress](gch.stackBottom)
-        #c_fprintf(stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
-        when stackIncreases:
-          gch.stackBottom = cast[pointer](min(a, b))
-        else:
-          gch.stackBottom = cast[pointer](max(a, b))
-  {.pop.}
+        gch.stack.bottom = cast[pointer](max(a, b))
+
+    gch.stack.setPosition(theStackBottom)
+{.pop.}
+
+proc isOnStack(p: pointer): bool =
+  var stackTop {.volatile.}: pointer
+  stackTop = addr(stackTop)
+  var a = cast[ByteAddress](gch.getActiveStack().bottom)
+  var b = cast[ByteAddress](stackTop)
+  when not stackIncreases:
+    swap(a, b)
+  var x = cast[ByteAddress](p)
+  result = a <=% x and x <=% b
 
 when defined(sparc): # For SPARC architecture.
-  when defined(nimCoroutines):
+  when nimCoroutines:
     {.error: "Nim coroutines are not supported on this platform."}
 
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
   template forEachStackSlot(gch, gcMark: untyped) {.dirty.} =
     when defined(sparcv9):
       asm  """"flushw \n" """
@@ -215,7 +267,7 @@ when defined(sparc): # For SPARC architecture.
       asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
 
     var
-      max = gch.stackBottom
+      max = gch.stack.bottom
       sp: PPointer
       stackTop: array[0..1, pointer]
     sp = addr(stackTop[0])
@@ -231,16 +283,6 @@ elif stackIncreases:
   # ---------------------------------------------------------------------------
   # Generic code for architectures where addresses increase as the stack grows.
   # ---------------------------------------------------------------------------
-  when defined(nimCoroutines):
-    {.error: "Nim coroutines are not supported on this platform."}
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var a = cast[ByteAddress](gch.stackBottom)
-    var b = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
   var
     jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
       # a little hack to get the size of a JmpBuf in the generated C code
@@ -248,84 +290,42 @@ elif stackIncreases:
 
   template forEachStackSlot(gch, gcMark: untyped) {.dirty.} =
     var registers {.noinit.}: C_JmpBuf
+    # sp will traverse the JMP_BUF as well (jmp_buf size is added,
+    # otherwise sp would be below the registers structure).
+    var regAddr = addr(registers) +% jmpbufSize
+
     if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[ByteAddress](gch.stackBottom)
-      var sp = cast[ByteAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
-      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
-      # otherwise sp would be below the registers structure).
-      while sp >=% max:
-        gcMark(gch, cast[PPointer](sp)[])
-        sp = sp -% sizeof(pointer)
+      for stack in gch.stack.items():
+        var max = cast[ByteAddress](gch.stack.bottom)
+        var sp = cast[ByteAddress](addr(registers)) -% sizeof(pointer)
+        while sp >=% max:
+          gcMark(gch, cast[PPointer](sp)[])
+          sp = sp -% sizeof(pointer)
 
 else:
   # ---------------------------------------------------------------------------
   # Generic code for architectures where addresses decrease as the stack grows.
   # ---------------------------------------------------------------------------
-  when defined(nimCoroutines):
-    proc isOnStack(p: pointer): bool =
-      var stackTop {.volatile.}: pointer
-      stackTop = addr(stackTop)
-      for stack in items(gch.stack):
-        var b = cast[ByteAddress](stack.starts)
-        var a = cast[ByteAddress](stack.starts) - stack.maxStackSize
-        var x = cast[ByteAddress](p)
-        if a <=% x and x <=% b:
-          return true
-
-    template forEachStackSlot(gch, gcMark: untyped) {.dirty.} =
-      # We use a jmp_buf buffer that is in the C stack.
-      # Used to traverse the stack and registers assuming
-      # that 'setjmp' will save registers in the C stack.
-      type PStackSlice = ptr array[0..7, pointer]
-      var registers {.noinit.}: Registers
-      getRegisters(registers)
-      for i in registers.low .. registers.high:
-        gcMark(gch, cast[PPointer](registers[i]))
-
-      for stack in items(gch.stack):
-        stack.maxStackSize = max(stack.maxStackSize, stackSize(stack.starts))
-        var max = cast[ByteAddress](stack.starts)
-        var sp = cast[ByteAddress](stack.pos)
-        # loop unrolled:
-        while sp <% max - 8*sizeof(pointer):
-          gcMark(gch, cast[PStackSlice](sp)[0])
-          gcMark(gch, cast[PStackSlice](sp)[1])
-          gcMark(gch, cast[PStackSlice](sp)[2])
-          gcMark(gch, cast[PStackSlice](sp)[3])
-          gcMark(gch, cast[PStackSlice](sp)[4])
-          gcMark(gch, cast[PStackSlice](sp)[5])
-          gcMark(gch, cast[PStackSlice](sp)[6])
-          gcMark(gch, cast[PStackSlice](sp)[7])
-          sp = sp +% sizeof(pointer)*8
-        # last few entries:
-        while sp <=% max:
-          gcMark(gch, cast[PPointer](sp)[])
-          sp = sp +% sizeof(pointer)
-  else:
-    proc isOnStack(p: pointer): bool =
-      var stackTop {.volatile.}: pointer
-      stackTop = addr(stackTop)
-      var b = cast[ByteAddress](gch.stackBottom)
-      var a = cast[ByteAddress](stackTop)
-      var x = cast[ByteAddress](p)
-      result = a <=% x and x <=% b
-
-    template forEachStackSlot(gch, gcMark: untyped) {.dirty.} =
-      # We use a jmp_buf buffer that is in the C stack.
-      # Used to traverse the stack and registers assuming
-      # that 'setjmp' will save registers in the C stack.
-      type PStackSlice = ptr array[0..7, pointer]
-      var registers {.noinit.}: C_JmpBuf
-      if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-        var max = cast[ByteAddress](gch.stackBottom)
+  template forEachStackSlot(gch, gcMark: untyped) {.dirty.} =
+    # We use a jmp_buf buffer that is in the C stack.
+    # Used to traverse the stack and registers assuming
+    # that 'setjmp' will save registers in the C stack.
+    type PStackSlice = ptr array[0..7, pointer]
+    var registers {.noinit.}: C_JmpBuf
+    # Update position of stack gc is executing in.
+    gch.getActiveStack().setPosition(addr(registers))
+    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+      for stack in gch.stack.items():
+        var max = cast[ByteAddress](stack.bottom)
         var sp = cast[ByteAddress](addr(registers))
         when defined(amd64):
-          # words within the jmp_buf structure may not be properly aligned.
-          let regEnd = sp +% sizeof(registers)
-          while sp <% regEnd:
-            gcMark(gch, cast[PPointer](sp)[])
-            gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
-            sp = sp +% sizeof(pointer)
+          if stack.isActiveStack():
+            # words within the jmp_buf structure may not be properly aligned.
+            let regEnd = sp +% sizeof(registers)
+            while sp <% regEnd:
+              gcMark(gch, cast[PPointer](sp)[])
+              gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
+              sp = sp +% sizeof(pointer)
         # Make sure sp is word-aligned
         sp = sp and not (sizeof(pointer) - 1)
         # loop unrolled:
diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim
index f927575dd..5896af88e 100644
--- a/lib/system/gc_ms.nim
+++ b/lib/system/gc_ms.nim
@@ -10,9 +10,6 @@
 # A simple mark&sweep garbage collector for Nim. Define the
 # symbol ``gcUseBitvectors`` to generate a variant of this GC.
 
-when defined(nimCoroutines):
-  import arch
-
 {.push profiler:off.}
 
 const
@@ -51,17 +48,22 @@ type
     maxStackSize: int        # max stack size
     freedObjects: int        # max entries in cycle table
 
-  GcStack {.final.} = object
-    prev: ptr GcStack
-    next: ptr GcStack
-    starts: pointer
-    pos: pointer
-    maxStackSize: int
+  GcStack {.final, pure.} = object
+    when nimCoroutines:
+      prev: ptr GcStack
+      next: ptr GcStack
+      maxStackSize: int      # Used to track statistics because we can not use
+                             # GcStat.maxStackSize when multiple stacks exist.
+    bottom: pointer
+
+    when nimCoroutines:
+      pos: pointer
 
   GcHeap = object            # this contains the zero count and
                              # non-zero count table
-    stack: ptr GcStack
-    stackBottom: pointer
+    stack: GcStack
+    when nimCoroutines:
+      activeStack: ptr GcStack    # current executing coroutine stack.
     cycleThreshold: int
     when useCellIds:
       idGenerator: int
@@ -423,7 +425,7 @@ proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
   forEachStackSlot(gch, gcMark)
 
 proc collectCTBody(gch: var GcHeap) =
-  when not defined(nimCoroutines):
+  when not nimCoroutines:
     gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
   prepareForInteriorPointerChecking(gch.region)
   markStackAndRegisters(gch)
@@ -479,10 +481,10 @@ when not defined(useNimRtl):
              "[GC] collections: " & $gch.stat.collections & "\n" &
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
              "[GC] freed objects: " & $gch.stat.freedObjects & "\n"
-    when defined(nimCoroutines):
+    when nimCoroutines:
       result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
       for stack in items(gch.stack):
-        result = result & "[GC]   stack " & stack.starts.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
+        result = result & "[GC]   stack " & stack.bottom.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
     else:
       result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
     GC_enable()
diff --git a/lib/windows/winlean.nim b/lib/windows/winlean.nim
index 02821b792..fa9ce9eed 100644
--- a/lib/windows/winlean.nim
+++ b/lib/windows/winlean.nim
@@ -1036,3 +1036,17 @@ else:
   proc readConsoleInput*(hConsoleInput: Handle, lpBuffer: pointer, nLength: cint,
                         lpNumberOfEventsRead: ptr cint): cint
        {.stdcall, dynlib: "kernel32", importc: "ReadConsoleInputW".}
+
+type
+  LPFIBER_START_ROUTINE* = proc (param: pointer): void {.stdcall.}
+
+const
+  FIBER_FLAG_FLOAT_SWITCH* = 0x01
+
+proc CreateFiber*(stackSize: int, fn: LPFIBER_START_ROUTINE, param: pointer): pointer {.stdcall, discardable, dynlib: "kernel32", importc.}
+proc CreateFiberEx*(stkCommit: int, stkReserve: int, flags: int32, fn: LPFIBER_START_ROUTINE, param: pointer): pointer {.stdcall, discardable, dynlib: "kernel32", importc.}
+proc ConvertThreadToFiber*(param: pointer): pointer {.stdcall, discardable, dynlib: "kernel32", importc.}
+proc ConvertThreadToFiberEx*(param: pointer, flags: int32): pointer {.stdcall, discardable, dynlib: "kernel32", importc.}
+proc DeleteFiber*(fiber: pointer): void {.stdcall, discardable, dynlib: "kernel32", importc.}
+proc SwitchToFiber*(fiber: pointer): void {.stdcall, discardable, dynlib: "kernel32", importc.}
+proc GetCurrentFiber*(): pointer {.stdcall, importc, header: "Windows.h".}
diff --git a/tests/coroutines/texceptions.nim b/tests/coroutines/texceptions.nim
new file mode 100644
index 000000000..f3debf0a7
--- /dev/null
+++ b/tests/coroutines/texceptions.nim
@@ -0,0 +1,23 @@
+import coro
+var
+  stackCheckValue = 1100220033
+  numbers = newSeqOfCap[int](10)
+
+proc testExceptions(id: int, sleep: float) =
+  try:
+    numbers.add(id)
+    suspend(sleep)
+    numbers.add(id)
+    raise (ref ValueError)()
+  except:
+    numbers.add(id)
+    suspend(sleep)
+    numbers.add(id)
+  suspend(sleep)
+  numbers.add(id)
+
+start(proc() = testExceptions(1, 0.01))
+start(proc() = testExceptions(2, 0.011))
+run()
+doAssert(stackCheckValue == 1100220033, "Thread stack got corrupted")
+doAssert(numbers == @[1, 2, 1, 2, 1, 2, 1, 2, 1, 2], "Coroutines executed in incorrect order")
diff --git a/tests/coroutines/texceptions.nim.cfg b/tests/coroutines/texceptions.nim.cfg
new file mode 100644
index 000000000..b011bc585
--- /dev/null
+++ b/tests/coroutines/texceptions.nim.cfg
@@ -0,0 +1 @@
+-d:nimCoroutines
diff --git a/tests/coroutines/tgc.nim b/tests/coroutines/tgc.nim
new file mode 100644
index 000000000..66a12ab9d
--- /dev/null
+++ b/tests/coroutines/tgc.nim
@@ -0,0 +1,15 @@
+import coro
+
+var maxOccupiedMemory = 0
+
+proc testGC() =
+  var numbers = newSeq[int](100)
+  maxOccupiedMemory = max(maxOccupiedMemory, getOccupiedMem())
+  suspend(0)
+
+start(testGC)
+start(testGC)
+run()
+
+GC_fullCollect()
+doAssert(getOccupiedMem() < maxOccupiedMemory, "GC did not free any memory allocated in coroutines")
diff --git a/tests/coroutines/tgc.nim.cfg b/tests/coroutines/tgc.nim.cfg
new file mode 100644
index 000000000..b011bc585
--- /dev/null
+++ b/tests/coroutines/tgc.nim.cfg
@@ -0,0 +1 @@
+-d:nimCoroutines
diff --git a/tests/coroutines/titerators.nim b/tests/coroutines/titerators.nim
new file mode 100644
index 000000000..e2623ce2d
--- /dev/null
+++ b/tests/coroutines/titerators.nim
@@ -0,0 +1,24 @@
+import coro
+include system/timers
+
+var
+  stackCheckValue = 1100220033
+  numbers = newSeqOfCap[int](10)
+
+iterator theIterator(id: int, sleep: float): int =
+  for i in 0..<5:
+    yield 10 * id + i
+    suspend(sleep)
+
+proc theCoroutine(id: int, sleep: float32) =
+  for n in theIterator(id, sleep):
+    numbers.add(n)
+
+var start = getTicks()
+start(proc() = theCoroutine(1, 0.01))
+start(proc() = theCoroutine(2, 0.011))
+run()
+var executionTime = getTicks() - start
+doAssert(executionTime >= 55_000_000.Nanos and executionTime < 56_000_000.Nanos, "Coroutines executed too short")
+doAssert(stackCheckValue == 1100220033, "Thread stack got corrupted")
+doAssert(numbers == @[10, 20, 11, 21, 12, 22, 13, 23, 14, 24], "Coroutines executed in incorrect order")
diff --git a/tests/coroutines/titerators.nim.cfg b/tests/coroutines/titerators.nim.cfg
new file mode 100644
index 000000000..b011bc585
--- /dev/null
+++ b/tests/coroutines/titerators.nim.cfg
@@ -0,0 +1 @@
+-d:nimCoroutines