summary refs log tree commit diff stats
path: root/lib/system
diff options
context:
space:
mode:
Diffstat (limited to 'lib/system')
-rw-r--r--lib/system/alloc.nim71
-rw-r--r--lib/system/ansi_c.nim163
-rw-r--r--lib/system/assign.nim4
-rw-r--r--lib/system/chcks.nim1
-rw-r--r--lib/system/debugger.nim6
-rw-r--r--lib/system/deepcopy.nim4
-rw-r--r--lib/system/dyncalls.nim34
-rw-r--r--lib/system/endb.nim8
-rw-r--r--lib/system/excpt.nim12
-rw-r--r--lib/system/gc.nim245
-rw-r--r--lib/system/gc2.nim40
-rw-r--r--lib/system/gc_common.nim10
-rw-r--r--lib/system/gc_ms.nim3
-rw-r--r--lib/system/gc_stack.nim507
-rw-r--r--lib/system/hti.nim2
-rw-r--r--lib/system/inclrtl.nim5
-rw-r--r--lib/system/jssys.nim28
-rw-r--r--lib/system/mmdisp.nim66
-rw-r--r--lib/system/nimscript.nim43
-rw-r--r--lib/system/osalloc.nim11
-rw-r--r--lib/system/profiler.nim19
-rw-r--r--lib/system/repr.nim25
-rw-r--r--lib/system/sets.nim2
-rw-r--r--lib/system/sysio.nim168
-rw-r--r--lib/system/syslocks.nim59
-rw-r--r--lib/system/sysstr.nim165
-rw-r--r--lib/system/threads.nim25
-rw-r--r--lib/system/timers.nim11
-rw-r--r--lib/system/widestrs.nim13
29 files changed, 960 insertions, 790 deletions
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim
index e0fd53b7b..bed9fd906 100644
--- a/lib/system/alloc.nim
+++ b/lib/system/alloc.nim
@@ -27,15 +27,14 @@ const
 
 type
   PTrunk = ptr Trunk
-  Trunk {.final.} = object
+  Trunk = object
     next: PTrunk         # all nodes are connected with this pointer
     key: int             # start address at bit 0
     bits: array[0..IntsPerTrunk-1, int] # a bit vector
 
   TrunkBuckets = array[0..255, PTrunk]
-  IntSet {.final.} = object
+  IntSet = object
     data: TrunkBuckets
-{.deprecated: [TIntSet: IntSet, TTrunk: Trunk, TTrunkBuckets: TrunkBuckets].}
 
 type
   AlignType = BiggestFloat
@@ -64,8 +63,6 @@ type
     next, prev: PBigChunk    # chunks of the same (or bigger) size
     align: int
     data: AlignType      # start of usable memory
-{.deprecated: [TAlignType: AlignType, TFreeCell: FreeCell, TBaseChunk: BaseChunk,
-              TBigChunk: BigChunk, TSmallChunk: SmallChunk].}
 
 template smallChunkOverhead(): expr = sizeof(SmallChunk)-sizeof(AlignType)
 template bigChunkOverhead(): expr = sizeof(BigChunk)-sizeof(AlignType)
@@ -79,18 +76,18 @@ template bigChunkOverhead(): expr = sizeof(BigChunk)-sizeof(AlignType)
 
 type
   PLLChunk = ptr LLChunk
-  LLChunk {.pure.} = object ## *low-level* chunk
+  LLChunk = object ## *low-level* chunk
     size: int                # remaining size
     acc: int                 # accumulator
     next: PLLChunk           # next low-level chunk; only needed for dealloc
 
   PAvlNode = ptr AvlNode
-  AvlNode {.pure, final.} = object
+  AvlNode = object
     link: array[0..1, PAvlNode] # Left (0) and right (1) links
     key, upperBound: int
     level: int
 
-  MemRegion {.final, pure.} = object
+  MemRegion = object
     minLargeObj, maxLargeObj: int
     freeSmallChunks: array[0..SmallChunkSize div MemAlign-1, PSmallChunk]
     llmem: PLLChunk
@@ -99,6 +96,7 @@ type
     freeChunksList: PBigChunk # XXX make this a datastructure with O(1) access
     chunkStarts: IntSet
     root, deleted, last, freeAvlNodes: PAvlNode
+    locked: bool # if locked, we cannot free pages.
 {.deprecated: [TLLChunk: LLChunk, TAvlNode: AvlNode, TMemRegion: MemRegion].}
 
 # shared:
@@ -234,7 +232,8 @@ proc isSmallChunk(c: PChunk): bool {.inline.} =
 proc chunkUnused(c: PChunk): bool {.inline.} =
   result = not c.used
 
-iterator allObjects(m: MemRegion): pointer {.inline.} =
+iterator allObjects(m: var MemRegion): pointer {.inline.} =
+  m.locked = true
   for s in elements(m.chunkStarts):
     # we need to check here again as it could have been modified:
     if s in m.chunkStarts:
@@ -252,6 +251,7 @@ iterator allObjects(m: MemRegion): pointer {.inline.} =
         else:
           let c = cast[PBigChunk](c)
           yield addr(c.data)
+  m.locked = false
 
 proc iterToProc*(iter: typed, envType: typedesc; procName: untyped) {.
                       magic: "Plugin", compileTime.}
@@ -311,7 +311,7 @@ proc freeOsChunks(a: var MemRegion, p: pointer, size: int) =
   osDeallocPages(p, size)
   decCurrMem(a, size)
   dec(a.freeMem, size)
-  #c_fprintf(c_stdout, "[Alloc] back to OS: %ld\n", size)
+  #c_fprintf(stdout, "[Alloc] back to OS: %ld\n", size)
 
 proc isAccessible(a: MemRegion, p: pointer): bool {.inline.} =
   result = contains(a.chunkStarts, pageIndex(p))
@@ -324,9 +324,9 @@ proc contains[T](list, x: T): bool =
 
 proc writeFreeList(a: MemRegion) =
   var it = a.freeChunksList
-  c_fprintf(c_stdout, "freeChunksList: %p\n", it)
+  c_fprintf(stdout, "freeChunksList: %p\n", it)
   while it != nil:
-    c_fprintf(c_stdout, "it: %p, next: %p, prev: %p\n",
+    c_fprintf(stdout, "it: %p, next: %p, prev: %p\n",
               it, it.next, it.prev)
     it = it.next
 
@@ -385,7 +385,7 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) =
           excl(a.chunkStarts, pageIndex(c))
           c = cast[PBigChunk](le)
 
-  if c.size < ChunkOsReturn or doNotUnmap:
+  if c.size < ChunkOsReturn or doNotUnmap or a.locked:
     incl(a, a.chunkStarts, pageIndex(c))
     updatePrevSize(a, c, c.size)
     listAdd(a.freeChunksList, c)
@@ -442,26 +442,29 @@ proc getSmallChunk(a: var MemRegion): PSmallChunk =
 # -----------------------------------------------------------------------------
 proc isAllocatedPtr(a: MemRegion, p: pointer): bool {.benign.}
 
-proc allocInv(a: MemRegion): bool =
-  ## checks some (not all yet) invariants of the allocator's data structures.
-  for s in low(a.freeSmallChunks)..high(a.freeSmallChunks):
-    var c = a.freeSmallChunks[s]
-    while not (c == nil):
-      if c.next == c:
-        echo "[SYSASSERT] c.next == c"
-        return false
-      if not (c.size == s * MemAlign):
-        echo "[SYSASSERT] c.size != s * MemAlign"
-        return false
-      var it = c.freeList
-      while not (it == nil):
-        if not (it.zeroField == 0):
-          echo "[SYSASSERT] it.zeroField != 0"
-          c_printf("%ld %p\n", it.zeroField, it)
+when true:
+  template allocInv(a: MemRegion): bool = true
+else:
+  proc allocInv(a: MemRegion): bool =
+    ## checks some (not all yet) invariants of the allocator's data structures.
+    for s in low(a.freeSmallChunks)..high(a.freeSmallChunks):
+      var c = a.freeSmallChunks[s]
+      while not (c == nil):
+        if c.next == c:
+          echo "[SYSASSERT] c.next == c"
           return false
-        it = it.next
-      c = c.next
-  result = true
+        if not (c.size == s * MemAlign):
+          echo "[SYSASSERT] c.size != s * MemAlign"
+          return false
+        var it = c.freeList
+        while not (it == nil):
+          if not (it.zeroField == 0):
+            echo "[SYSASSERT] it.zeroField != 0"
+            c_printf("%ld %p\n", it.zeroField, it)
+            return false
+          it = it.next
+        c = c.next
+    result = true
 
 proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
   sysAssert(allocInv(a), "rawAlloc: begin")
@@ -469,7 +472,7 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
   sysAssert(requestedSize >= sizeof(FreeCell), "rawAlloc: requested size too small")
   var size = roundup(requestedSize, MemAlign)
   sysAssert(size >= requestedSize, "insufficient allocated size!")
-  #c_fprintf(c_stdout, "alloc; size: %ld; %ld\n", requestedSize, size)
+  #c_fprintf(stdout, "alloc; size: %ld; %ld\n", requestedSize, size)
   if size <= SmallChunkSize-smallChunkOverhead():
     # allocate a small block: for small chunks, we use only its next pointer
     var s = size div MemAlign
@@ -490,7 +493,7 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
       sysAssert(allocInv(a), "rawAlloc: begin c != nil")
       sysAssert c.next != c, "rawAlloc 5"
       #if c.size != size:
-      #  c_fprintf(c_stdout, "csize: %lld; size %lld\n", c.size, size)
+      #  c_fprintf(stdout, "csize: %lld; size %lld\n", c.size, size)
       sysAssert c.size == size, "rawAlloc 6"
       if c.freeList == nil:
         sysAssert(c.acc + smallChunkOverhead() + size <= SmallChunkSize,
diff --git a/lib/system/ansi_c.nim b/lib/system/ansi_c.nim
index 1abd8466d..a8e358229 100644
--- a/lib/system/ansi_c.nim
+++ b/lib/system/ansi_c.nim
@@ -13,60 +13,43 @@
 
 {.push hints:off}
 
-proc c_strcmp(a, b: cstring): cint {.header: "<string.h>",
-  noSideEffect, importc: "strcmp".}
-proc c_memcmp(a, b: cstring, size: int): cint {.header: "<string.h>",
-  noSideEffect, importc: "memcmp".}
-proc c_memcpy(a, b: cstring, size: int) {.header: "<string.h>", importc: "memcpy".}
-proc c_strlen(a: cstring): int {.header: "<string.h>",
-  noSideEffect, importc: "strlen".}
-proc c_memset(p: pointer, value: cint, size: int) {.
-  header: "<string.h>", importc: "memset".}
-
-when not declared(File):
-  type
-    C_TextFile {.importc: "FILE", header: "<stdio.h>",
-                 final, incompleteStruct.} = object
-    C_BinaryFile {.importc: "FILE", header: "<stdio.h>",
-                   final, incompleteStruct.} = object
-    C_TextFileStar = ptr C_TextFile
-    C_BinaryFileStar = ptr C_BinaryFile
-else:
-  type
-    C_TextFileStar = File
-    C_BinaryFileStar = File
+proc c_memchr(s: pointer, c: cint, n: csize): pointer {.
+  importc: "memchr", header: "<string.h>".}
+proc c_memcmp(a, b: pointer, size: csize): cint {.
+  importc: "memcmp", header: "<string.h>", noSideEffect.}
+proc c_memcpy(a, b: pointer, size: csize): pointer {.
+  importc: "memcpy", header: "<string.h>", discardable.}
+proc c_memmove(a, b: pointer, size: csize): pointer {.
+  importc: "memmove", header: "<string.h>",discardable.}
+proc c_memset(p: pointer, value: cint, size: csize): pointer {.
+  importc: "memset", header: "<string.h>", discardable.}
+proc c_strcmp(a, b: cstring): cint {.
+  importc: "strcmp", header: "<string.h>", noSideEffect.}
 
 type
   C_JmpBuf {.importc: "jmp_buf", header: "<setjmp.h>".} = object
 
-when not defined(vm):
-  var
-    c_stdin {.importc: "stdin", nodecl.}: C_TextFileStar
-    c_stdout {.importc: "stdout", nodecl.}: C_TextFileStar
-    c_stderr {.importc: "stderr", nodecl.}: C_TextFileStar
-
-# constants faked as variables:
-when not declared(SIGINT):
+when defined(windows):
+  const
+    SIGABRT = cint(22)
+    SIGFPE = cint(8)
+    SIGILL = cint(4)
+    SIGINT = cint(2)
+    SIGSEGV = cint(11)
+    SIGTERM = cint(15)
+elif defined(macosx) or defined(linux) or defined(freebsd) or
+     defined(openbsd) or defined(netbsd) or defined(solaris):
+  const
+    SIGABRT = cint(6)
+    SIGFPE = cint(8)
+    SIGILL = cint(4)
+    SIGINT = cint(2)
+    SIGSEGV = cint(11)
+    SIGTERM = cint(15)
+    SIGPIPE = cint(13)
+else:
   when NoFakeVars:
-    when defined(windows):
-      const
-        SIGABRT = cint(22)
-        SIGFPE = cint(8)
-        SIGILL = cint(4)
-        SIGINT = cint(2)
-        SIGSEGV = cint(11)
-        SIGTERM = cint(15)
-    elif defined(macosx) or defined(linux):
-      const
-        SIGABRT = cint(6)
-        SIGFPE = cint(8)
-        SIGILL = cint(4)
-        SIGINT = cint(2)
-        SIGSEGV = cint(11)
-        SIGTERM = cint(15)
-        SIGPIPE = cint(13)
-    else:
-      {.error: "SIGABRT not ported to your platform".}
+    {.error: "SIGABRT not ported to your platform".}
   else:
     var
       SIGINT {.importc: "SIGINT", nodecl.}: cint
@@ -78,10 +61,7 @@ when not declared(SIGINT):
       var SIGPIPE {.importc: "SIGPIPE", nodecl.}: cint
 
 when defined(macosx):
-  when NoFakeVars:
-    const SIGBUS = cint(10)
-  else:
-    var SIGBUS {.importc: "SIGBUS", nodecl.}: cint
+  const SIGBUS = cint(10)
 else:
   template SIGBUS: expr = SIGSEGV
 
@@ -103,72 +83,27 @@ else:
   proc c_setjmp(jmpb: C_JmpBuf): cint {.
     header: "<setjmp.h>", importc: "setjmp".}
 
-proc c_signal(sign: cint, handler: proc (a: cint) {.noconv.}) {.
-  importc: "signal", header: "<signal.h>".}
-proc c_raise(sign: cint) {.importc: "raise", header: "<signal.h>".}
+type c_sighandler_t = proc (a: cint) {.noconv.}
+proc c_signal(sign: cint, handler: proc (a: cint) {.noconv.}): c_sighandler_t {.
+  importc: "signal", header: "<signal.h>", discardable.}
 
-proc c_fputs(c: cstring, f: C_TextFileStar) {.importc: "fputs",
-  header: "<stdio.h>".}
-proc c_fgets(c: cstring, n: int, f: C_TextFileStar): cstring  {.
-  importc: "fgets", header: "<stdio.h>".}
-proc c_fgetc(stream: C_TextFileStar): int {.importc: "fgetc",
-  header: "<stdio.h>".}
-proc c_ungetc(c: int, f: C_TextFileStar) {.importc: "ungetc",
-  header: "<stdio.h>".}
-proc c_putc(c: char, stream: C_TextFileStar) {.importc: "putc",
-  header: "<stdio.h>".}
-proc c_fprintf(f: C_TextFileStar, frmt: cstring) {.
-  importc: "fprintf", header: "<stdio.h>", varargs.}
-proc c_printf(frmt: cstring) {.
-  importc: "printf", header: "<stdio.h>", varargs.}
+proc c_fprintf(f: File, frmt: cstring): cint {.
+  importc: "fprintf", header: "<stdio.h>", varargs, discardable.}
+proc c_printf(frmt: cstring): cint {.
+  importc: "printf", header: "<stdio.h>", varargs, discardable.}
 
-proc c_fopen(filename, mode: cstring): C_TextFileStar {.
-  importc: "fopen", header: "<stdio.h>".}
-proc c_fclose(f: C_TextFileStar) {.importc: "fclose", header: "<stdio.h>".}
-
-proc c_sprintf(buf, frmt: cstring): cint {.header: "<stdio.h>",
-  importc: "sprintf", varargs, noSideEffect.}
+proc c_sprintf(buf, frmt: cstring): cint {.
+  importc: "sprintf", header: "<stdio.h>", varargs, noSideEffect.}
   # we use it only in a way that cannot lead to security issues
 
-proc c_fread(buf: pointer, size, n: int, f: C_BinaryFileStar): int {.
-  importc: "fread", header: "<stdio.h>".}
-proc c_fseek(f: C_BinaryFileStar, offset: clong, whence: int): int {.
-  importc: "fseek", header: "<stdio.h>".}
-
-proc c_fwrite(buf: pointer, size, n: int, f: C_BinaryFileStar): int {.
-  importc: "fwrite", header: "<stdio.h>".}
-
-proc c_exit(errorcode: cint) {.importc: "exit", header: "<stdlib.h>".}
-proc c_ferror(stream: C_TextFileStar): bool {.
-  importc: "ferror", header: "<stdio.h>".}
-proc c_fflush(stream: C_TextFileStar) {.importc: "fflush", header: "<stdio.h>".}
-proc c_abort() {.importc: "abort", header: "<stdlib.h>".}
-proc c_feof(stream: C_TextFileStar): bool {.
-  importc: "feof", header: "<stdio.h>".}
+proc c_fileno(f: File): cint {.
+  importc: "fileno", header: "<fcntl.h>".}
 
-proc c_malloc(size: int): pointer {.importc: "malloc", header: "<stdlib.h>".}
-proc c_free(p: pointer) {.importc: "free", header: "<stdlib.h>".}
-proc c_realloc(p: pointer, newsize: int): pointer {.
+proc c_malloc(size: csize): pointer {.
+  importc: "malloc", header: "<stdlib.h>".}
+proc c_free(p: pointer) {.
+  importc: "free", header: "<stdlib.h>".}
+proc c_realloc(p: pointer, newsize: csize): pointer {.
   importc: "realloc", header: "<stdlib.h>".}
 
-when hostOS != "standalone":
-  when not declared(errno):
-    when defined(NimrodVM):
-      var vmErrnoWrapper {.importc.}: ptr cint
-      template errno: expr =
-        bind vmErrnoWrapper
-        vmErrnoWrapper[]
-    else:
-      var errno {.importc, header: "<errno.h>".}: cint ## error variable
-proc strerror(errnum: cint): cstring {.importc, header: "<string.h>".}
-
-proc c_remove(filename: cstring): cint {.
-  importc: "remove", header: "<stdio.h>".}
-proc c_rename(oldname, newname: cstring): cint {.
-  importc: "rename", header: "<stdio.h>".}
-
-proc c_system(cmd: cstring): cint {.importc: "system", header: "<stdlib.h>".}
-proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
-proc c_putenv(env: cstring): cint {.importc: "putenv", header: "<stdlib.h>".}
-
 {.pop}
diff --git a/lib/system/assign.nim b/lib/system/assign.nim
index 55d7572e2..231a20d86 100644
--- a/lib/system/assign.nim
+++ b/lib/system/assign.nim
@@ -211,14 +211,14 @@ proc genericReset(dest: pointer, mt: PNimType) =
     zeroMem(dest, mt.size) # set raw bits to zero
 
 proc selectBranch(discVal, L: int,
-                  a: ptr array [0..0x7fff, ptr TNimNode]): ptr TNimNode =
+                  a: ptr array[0..0x7fff, ptr TNimNode]): ptr TNimNode =
   result = a[L] # a[L] contains the ``else`` part (but may be nil)
   if discVal <% L:
     var x = a[discVal]
     if x != nil: result = x
 
 proc FieldDiscriminantCheck(oldDiscVal, newDiscVal: int,
-                            a: ptr array [0..0x7fff, ptr TNimNode],
+                            a: ptr array[0..0x7fff, ptr TNimNode],
                             L: int) {.compilerProc.} =
   var oldBranch = selectBranch(oldDiscVal, L, a)
   var newBranch = selectBranch(newDiscVal, L, a)
diff --git a/lib/system/chcks.nim b/lib/system/chcks.nim
index 6caf99d27..27a3708ea 100644
--- a/lib/system/chcks.nim
+++ b/lib/system/chcks.nim
@@ -51,7 +51,6 @@ proc chckRangeF(x, a, b: float): float =
 proc chckNil(p: pointer) =
   if p == nil:
     sysFatal(ValueError, "attempt to write to a nil address")
-    #c_raise(SIGSEGV)
 
 proc chckObj(obj, subclass: PNimType) {.compilerproc.} =
   # checks if obj is of type subclass:
diff --git a/lib/system/debugger.nim b/lib/system/debugger.nim
index b18c61755..cc6919d36 100644
--- a/lib/system/debugger.nim
+++ b/lib/system/debugger.nim
@@ -108,8 +108,8 @@ proc fileMatches(c, bp: cstring): bool =
   # and the character for the suffix does not exist or
   # is one of: \  /  :
   # depending on the OS case does not matter!
-  var blen: int = c_strlen(bp)
-  var clen: int = c_strlen(c)
+  var blen: int = bp.len
+  var clen: int = c.len
   if blen > clen: return false
   # check for \ /  :
   if clen-blen-1 >= 0 and c[clen-blen-1] notin {'\\', '/', ':'}:
@@ -159,7 +159,7 @@ type
 {.deprecated: [THash: Hash, TWatchpoint: Watchpoint].}
 
 var
-  watchpoints: array [0..99, Watchpoint]
+  watchpoints: array[0..99, Watchpoint]
   watchpointsLen: int
 
 proc `!&`(h: Hash, val: int): Hash {.inline.} =
diff --git a/lib/system/deepcopy.nim b/lib/system/deepcopy.nim
index 03230e541..5445a067c 100644
--- a/lib/system/deepcopy.nim
+++ b/lib/system/deepcopy.nim
@@ -36,7 +36,7 @@ proc copyDeepString(src: NimString): NimString {.inline.} =
   if src != nil:
     result = rawNewStringNoInit(src.len)
     result.len = src.len
-    c_memcpy(result.data, src.data, src.len + 1)
+    copyMem(addr(result.data), addr(src.data), src.len + 1)
 
 proc genericDeepCopyAux(dest, src: pointer, mt: PNimType) =
   var
@@ -124,7 +124,9 @@ proc genericDeepCopyAux(dest, src: pointer, mt: PNimType) =
     copyMem(dest, src, mt.size)
 
 proc genericDeepCopy(dest, src: pointer, mt: PNimType) {.compilerProc.} =
+  GC_disable()
   genericDeepCopyAux(dest, src, mt)
+  GC_enable()
 
 proc genericSeqDeepCopy(dest, src: pointer, mt: PNimType) {.compilerProc.} =
   # also invoked for 'string'
diff --git a/lib/system/dyncalls.nim b/lib/system/dyncalls.nim
index 3b3d1f87d..0a994efac 100644
--- a/lib/system/dyncalls.nim
+++ b/lib/system/dyncalls.nim
@@ -52,11 +52,14 @@ when defined(posix):
   #
 
   # c stuff:
-  var
-    RTLD_NOW {.importc: "RTLD_NOW", header: "<dlfcn.h>".}: int
+  when defined(linux) or defined(macosx):
+    const RTLD_NOW = cint(2)
+  else:
+    var
+      RTLD_NOW {.importc: "RTLD_NOW", header: "<dlfcn.h>".}: cint
 
   proc dlclose(lib: LibHandle) {.importc, header: "<dlfcn.h>".}
-  proc dlopen(path: cstring, mode: int): LibHandle {.
+  proc dlopen(path: cstring, mode: cint): LibHandle {.
       importc, header: "<dlfcn.h>".}
   proc dlsym(lib: LibHandle, name: cstring): ProcAddr {.
       importc, header: "<dlfcn.h>".}
@@ -109,9 +112,30 @@ elif defined(windows) or defined(dos):
   proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
     result = getProcAddress(cast[THINSTANCE](lib), name)
     if result != nil: return
-    var decorated: array[250, char]
+    const decorated_length = 250
+    var decorated: array[decorated_length, char]
+    decorated[0] = '_'
+    var m = 1
+    while m < (decorated_length - 5):
+      if name[m - 1] == '\x00': break
+      decorated[m] = name[m - 1]
+      inc(m)
+    decorated[m] = '@'
     for i in countup(0, 50):
-      discard csprintf(decorated, "_%s@%ld", name, i*4)
+      var k = i * 4
+      if k div 100 == 0: 
+        if k div 10 == 0:
+          m = m + 1
+        else:
+          m = m + 2
+      else:
+        m = m + 3
+      decorated[m + 1] = '\x00'
+      while true:
+        decorated[m] = chr(ord('0') + (k %% 10))
+        dec(m)
+        k = k div 10
+        if k == 0: break
       result = getProcAddress(cast[THINSTANCE](lib), decorated)
       if result != nil: return
     procAddrError(name)
diff --git a/lib/system/endb.nim b/lib/system/endb.nim
index b2cc5624b..d4d10a52c 100644
--- a/lib/system/endb.nim
+++ b/lib/system/endb.nim
@@ -329,14 +329,14 @@ proc dbgStackFrame(s: cstring, start: int, currFrame: PFrame) =
 
 proc readLine(f: File, line: var StaticStr): bool =
   while true:
-    var c = fgetc(f)
+    var c = c_fgetc(f)
     if c < 0'i32:
       if line.len > 0: break
       else: return false
     if c == 10'i32: break # LF
     if c == 13'i32:  # CR
-      c = fgetc(f) # is the next char LF?
-      if c != 10'i32: ungetc(c, f) # no, put the character back
+      c = c_fgetc(f) # is the next char LF?
+      if c != 10'i32: discard c_ungetc(c, f) # no, put the character back
       break
     add line, chr(int(c))
   result = true
@@ -475,7 +475,7 @@ proc dbgWriteStackTrace(f: PFrame) =
     it = f
     i = 0
     total = 0
-    tempFrames: array [0..127, PFrame]
+    tempFrames: array[0..127, PFrame]
   # setup long head:
   while it != nil and i <= high(tempFrames)-firstCalls:
     tempFrames[i] = it
diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim
index 948f87410..b89729850 100644
--- a/lib/system/excpt.nim
+++ b/lib/system/excpt.nim
@@ -90,13 +90,13 @@ when defined(nativeStacktrace) and nativeStackTraceSupported:
 
   when not hasThreadSupport:
     var
-      tempAddresses: array [0..127, pointer] # should not be alloc'd on stack
+      tempAddresses: array[0..127, pointer] # should not be alloc'd on stack
       tempDlInfo: TDl_info
 
   proc auxWriteStackTraceWithBacktrace(s: var string) =
     when hasThreadSupport:
       var
-        tempAddresses: array [0..127, pointer] # but better than a threadvar
+        tempAddresses: array[0..127, pointer] # but better than a threadvar
         tempDlInfo: TDl_info
     # This is allowed to be expensive since it only happens during crashes
     # (but this way you don't need manual stack tracing)
@@ -124,12 +124,12 @@ when defined(nativeStacktrace) and nativeStackTraceSupported:
 
 when not hasThreadSupport:
   var
-    tempFrames: array [0..127, PFrame] # should not be alloc'd on stack
+    tempFrames: array[0..127, PFrame] # should not be alloc'd on stack
 
 proc auxWriteStackTrace(f: PFrame, s: var string) =
   when hasThreadSupport:
     var
-      tempFrames: array [0..127, PFrame] # but better than a threadvar
+      tempFrames: array[0..127, PFrame] # but better than a threadvar
   const
     firstCalls = 32
   var
@@ -250,12 +250,12 @@ proc raiseExceptionAux(e: ref Exception) =
             inc L, slen
         template add(buf, s: expr) =
           xadd(buf, s, s.len)
-        var buf: array [0..2000, char]
+        var buf: array[0..2000, char]
         var L = 0
         add(buf, "Error: unhandled exception: ")
         if not isNil(e.msg): add(buf, e.msg)
         add(buf, " [")
-        xadd(buf, e.name, c_strlen(e.name))
+        xadd(buf, e.name, e.name.len)
         add(buf, "]\n")
         showErrorMessage(buf)
       quitOrDebug()
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index 4f461b5c3..ba6b2fcf9 100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2015 Andreas Rumpf
+#        (c) Copyright 2016 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -9,13 +9,8 @@
 
 #            Garbage Collector
 #
-# The basic algorithm is *Deferred Reference Counting* with cycle detection.
-# This is achieved by combining a Deutsch-Bobrow garbage collector
-# together with Christoper's partial mark-sweep garbage collector.
-#
-# Special care has been taken to avoid recursion as far as possible to avoid
-# stack overflows when traversing deep datastructures. It is well-suited
-# for soft real time applications (like games).
+# Refcounting + Mark&Sweep. Complex algorithms avoided.
+# Been there, done that, didn't work.
 
 when defined(nimCoroutines):
   import arch
@@ -30,7 +25,7 @@ const
                       # this seems to be a good value
   withRealTime = defined(useRealtimeGC)
   useMarkForDebug = defined(gcGenerational)
-  useBackupGc = false                     # use a simple M&S GC to collect
+  useBackupGc = true                      # use a simple M&S GC to collect
                                           # cycles instead of the complex
                                           # algorithm
 
@@ -55,8 +50,8 @@ type
   WalkOp = enum
     waMarkGlobal,    # part of the backup/debug mark&sweep
     waMarkPrecise,   # part of the backup/debug mark&sweep
-    waZctDecRef, waPush, waCycleDecRef, waMarkGray, waScan, waScanBlack,
-    waCollectWhite #, waDebug
+    waZctDecRef, waPush
+    #, waDebug
 
   Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
     # A ref type can have a finalizer that is called before the object's
@@ -87,7 +82,6 @@ type
       idGenerator: int
     zct: CellSeq             # the zero count table
     decStack: CellSeq        # cells in the stack that are to decref again
-    cycleRoots: CellSet
     tempStack: CellSeq       # temporary stack for recursion elimination
     recGcLock: int           # prevent recursion via finalizers; no thread lock
     when withRealTime:
@@ -96,6 +90,7 @@ type
     stat: GcStat
     when useMarkForDebug or useBackupGc:
       marked: CellSet
+      additionalRoots: CellSeq # dummy roots for GC_ref/unref
     when hasThreadSupport:
       toDispose: SharedList[pointer]
 
@@ -136,9 +131,6 @@ proc usrToCell(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
   result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
 
-proc canBeCycleRoot(c: PCell): bool {.inline.} =
-  result = ntfAcyclic notin c.typ.flags
-
 proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
   # used for code generation concerning debugging
   result = usrToCell(c).typ
@@ -161,10 +153,10 @@ proc writeCell(msg: cstring, c: PCell) =
   var kind = -1
   if c.typ != nil: kind = ord(c.typ.kind)
   when leakDetector:
-    c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n",
+    c_fprintf(stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n",
               msg, c, kind, c.refcount shr rcShift, c.filename, c.line)
   else:
-    c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld; color=%ld\n",
+    c_fprintf(stdout, "[GC] %s: %p %d rc=%ld; color=%ld\n",
               msg, c, kind, c.refcount shr rcShift, c.color)
 
 template gcTrace(cell, state: expr): stmt {.immediate.} =
@@ -200,14 +192,16 @@ proc prepareDealloc(cell: PCell) =
     (cast[Finalizer](cell.typ.finalizer))(cellToUsr(cell))
     dec(gch.recGcLock)
 
+template beforeDealloc(gch: var GcHeap; c: PCell; msg: typed) =
+  when false:
+    for i in 0..gch.decStack.len-1:
+      if gch.decStack.d[i] == c:
+        sysAssert(false, msg)
+
 proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} =
   # we MUST access gch as a global here, because this crosses DLL boundaries!
   when hasThreadSupport and hasSharedHeap:
     acquireSys(HeapLock)
-  when cycleGC:
-    if c.color != rcPurple:
-      c.setColor(rcPurple)
-      incl(gch.cycleRoots, c)
   when hasThreadSupport and hasSharedHeap:
     releaseSys(HeapLock)
 
@@ -224,22 +218,30 @@ proc decRef(c: PCell) {.inline.} =
   gcAssert(c.refcount >=% rcIncrement, "decRef")
   if --c.refcount:
     rtlAddZCT(c)
-  elif canbeCycleRoot(c):
-    # unfortunately this is necessary here too, because a cycle might just
-    # have been broken up and we could recycle it.
-    rtlAddCycleRoot(c)
-    #writeCell("decRef", c)
 
 proc incRef(c: PCell) {.inline.} =
   gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
   c.refcount = c.refcount +% rcIncrement
   # and not colorMask
   #writeCell("incRef", c)
-  if canbeCycleRoot(c):
-    rtlAddCycleRoot(c)
 
-proc nimGCref(p: pointer) {.compilerProc, inline.} = incRef(usrToCell(p))
-proc nimGCunref(p: pointer) {.compilerProc, inline.} = decRef(usrToCell(p))
+proc nimGCref(p: pointer) {.compilerProc.} =
+  # we keep it from being collected by pretending it's not even allocated:
+  add(gch.additionalRoots, usrToCell(p))
+  incRef(usrToCell(p))
+
+proc nimGCunref(p: pointer) {.compilerProc.} =
+  let cell = usrToCell(p)
+  var L = gch.additionalRoots.len-1
+  var i = L
+  let d = gch.additionalRoots.d
+  while i >= 0:
+    if d[i] == cell:
+      d[i] = d[L]
+      dec gch.additionalRoots.len
+      break
+    dec(i)
+  decRef(usrToCell(p))
 
 proc GC_addCycleRoot*[T](p: ref T) {.inline.} =
   ## adds 'p' to the cycle candidate set for the cycle collector. It is
@@ -306,10 +308,10 @@ proc initGC() =
     # init the rt
     init(gch.zct)
     init(gch.tempStack)
-    init(gch.cycleRoots)
     init(gch.decStack)
     when useMarkForDebug or useBackupGc:
       init(gch.marked)
+      init(gch.additionalRoots)
     when hasThreadSupport:
       gch.toDispose = initSharedList[pointer]()
 
@@ -451,10 +453,13 @@ proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
-  when leakDetector and not hasThreadSupport:
-    if framePtr != nil and framePtr.prev != nil:
-      res.filename = framePtr.prev.filename
-      res.line = framePtr.prev.line
+  when leakDetector:
+    res.filename = nil
+    res.line = 0
+    when not hasThreadSupport:
+      if framePtr != nil and framePtr.prev != nil:
+        res.filename = framePtr.prev.filename
+        res.line = framePtr.prev.line
   # refcount is zero, color is black, but mark it to be in the ZCT
   res.refcount = ZctFlag
   sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
@@ -502,10 +507,13 @@ proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
   sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
-  when leakDetector and not hasThreadSupport:
-    if framePtr != nil and framePtr.prev != nil:
-      res.filename = framePtr.prev.filename
-      res.line = framePtr.prev.line
+  when leakDetector:
+    res.filename = nil
+    res.line = 0
+    when not hasThreadSupport:
+      if framePtr != nil and framePtr.prev != nil:
+        res.filename = framePtr.prev.filename
+        res.line = framePtr.prev.line
   res.refcount = rcIncrement # refcount is 1
   sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
   when logGC: writeCell("new cell", res)
@@ -563,7 +571,7 @@ proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
             d[j] = res
             break
           dec(j)
-      if canbeCycleRoot(ol): excl(gch.cycleRoots, ol)
+      beforeDealloc(gch, ol, "growObj stack trash")
       rawDealloc(gch.region, ol)
     else:
       # we split the old refcount in 2 parts. XXX This is still not entirely
@@ -597,54 +605,12 @@ proc freeCyclicCell(gch: var GcHeap, c: PCell) =
   when logGC: writeCell("cycle collector dealloc cell", c)
   when reallyDealloc:
     sysAssert(allocInv(gch.region), "free cyclic cell")
+    beforeDealloc(gch, c, "freeCyclicCell: stack trash")
     rawDealloc(gch.region, c)
   else:
     gcAssert(c.typ != nil, "freeCyclicCell")
     zeroMem(c, sizeof(Cell))
 
-proc markGray(s: PCell) =
-  if s.color != rcGray:
-    setColor(s, rcGray)
-    forAllChildren(s, waMarkGray)
-
-proc scanBlack(s: PCell) =
-  s.setColor(rcBlack)
-  forAllChildren(s, waScanBlack)
-
-proc scan(s: PCell) =
-  if s.color == rcGray:
-    if s.refcount >=% rcIncrement:
-      scanBlack(s)
-    else:
-      s.setColor(rcWhite)
-      forAllChildren(s, waScan)
-
-proc collectWhite(s: PCell) =
-  # This is a hacky way to deal with the following problem (bug #1796)
-  # Consider this content in cycleRoots:
-  #   x -> a; y -> a  where 'a' is an acyclic object so not included in
-  # cycleRoots itself. Then 'collectWhite' used to free 'a' twice. The
-  # 'isAllocatedPtr' check prevents this. This also means we do not need
-  # to query 's notin gch.cycleRoots' at all.
-  if isAllocatedPtr(gch.region, s) and s.color == rcWhite:
-    s.setColor(rcBlack)
-    forAllChildren(s, waCollectWhite)
-    freeCyclicCell(gch, s)
-
-proc markRoots(gch: var GcHeap) =
-  var tabSize = 0
-  for s in elements(gch.cycleRoots):
-    #writeCell("markRoot", s)
-    inc tabSize
-    if s.color == rcPurple and s.refcount >=% rcIncrement:
-      markGray(s)
-    else:
-      excl(gch.cycleRoots, s)
-      # (s.color == rcBlack and rc == 0) as 1 condition:
-      if s.refcount == 0:
-        freeCyclicCell(gch, s)
-  gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize)
-
 when useBackupGc:
   proc sweep(gch: var GcHeap) =
     for x in allObjects(gch.region):
@@ -666,16 +632,8 @@ when useMarkForDebug or useBackupGc:
 
   proc markGlobals(gch: var GcHeap) =
     for i in 0 .. < globalMarkersLen: globalMarkers[i]()
-
-  proc stackMarkS(gch: var GcHeap, p: pointer) {.inline.} =
-    # the addresses are not as cells on the stack, so turn them to cells:
-    var cell = usrToCell(p)
-    var c = cast[ByteAddress](cell)
-    if c >% PageSize:
-      # fast check: does it look like a cell?
-      var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
-      if objStart != nil:
-        markS(gch, objStart)
+    let d = gch.additionalRoots.d
+    for i in 0 .. < gch.additionalRoots.len: markS(gch, d[i])
 
 when logGC:
   var
@@ -697,7 +655,7 @@ when logGC:
     else:
       writeCell("cell {", s)
       forAllChildren(s, waDebug)
-      c_fprintf(c_stdout, "}\n")
+      c_fprintf(stdout, "}\n")
 
 proc doOperation(p: pointer, op: WalkOp) =
   if p == nil: return
@@ -708,7 +666,7 @@ proc doOperation(p: pointer, op: WalkOp) =
   case op
   of waZctDecRef:
     #if not isAllocatedPtr(gch.region, c):
-    #  c_fprintf(c_stdout, "[GC] decref bug: %p", c)
+    #  c_fprintf(stdout, "[GC] decref bug: %p", c)
     gcAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
     gcAssert(c.refcount >=% rcIncrement, "doOperation 2")
     #c.refcount = c.refcount -% rcIncrement
@@ -717,19 +675,6 @@ proc doOperation(p: pointer, op: WalkOp) =
     #if c.refcount <% rcIncrement: addZCT(gch.zct, c)
   of waPush:
     add(gch.tempStack, c)
-  of waCycleDecRef:
-    gcAssert(c.refcount >=% rcIncrement, "doOperation 3")
-    c.refcount = c.refcount -% rcIncrement
-  of waMarkGray:
-    gcAssert(c.refcount >=% rcIncrement, "waMarkGray")
-    c.refcount = c.refcount -% rcIncrement
-    markGray(c)
-  of waScan: scan(c)
-  of waScanBlack:
-    c.refcount = c.refcount +% rcIncrement
-    if c.color != rcBlack:
-      scanBlack(c)
-  of waCollectWhite: collectWhite(c)
   of waMarkGlobal:
     when useMarkForDebug or useBackupGc:
       when hasThreadSupport:
@@ -748,14 +693,6 @@ proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
 
 proc collectZCT(gch: var GcHeap): bool {.benign.}
 
-when useMarkForDebug or useBackupGc:
-  proc markStackAndRegistersForSweep(gch: var GcHeap) {.noinline, cdecl,
-                                                         benign.}
-
-proc collectRoots(gch: var GcHeap) =
-  for s in elements(gch.cycleRoots):
-    collectWhite(s)
-
 proc collectCycles(gch: var GcHeap) =
   when hasThreadSupport:
     for c in gch.toDispose:
@@ -764,33 +701,12 @@ proc collectCycles(gch: var GcHeap) =
   while gch.zct.len > 0: discard collectZCT(gch)
   when useBackupGc:
     cellsetReset(gch.marked)
-    markStackAndRegistersForSweep(gch)
-    markGlobals(gch)
-    sweep(gch)
-  else:
-    markRoots(gch)
-    # scanRoots:
-    for s in elements(gch.cycleRoots): scan(s)
-    collectRoots(gch)
-
-    cellsetReset(gch.cycleRoots)
-  # alive cycles need to be kept in 'cycleRoots' if they are referenced
-  # from the stack; otherwise the write barrier will add the cycle root again
-  # anyway:
-  when false:
     var d = gch.decStack.d
-    var cycleRootsLen = 0
     for i in 0..gch.decStack.len-1:
-      var c = d[i]
-      gcAssert isAllocatedPtr(gch.region, c), "addBackStackRoots"
-      gcAssert c.refcount >=% rcIncrement, "addBackStackRoots: dead cell"
-      if canBeCycleRoot(c):
-        #if c notin gch.cycleRoots:
-        inc cycleRootsLen
-        incl(gch.cycleRoots, c)
-      gcAssert c.typ != nil, "addBackStackRoots 2"
-    if cycleRootsLen != 0:
-      cfprintf(cstdout, "cycle roots: %ld\n", cycleRootsLen)
+      sysAssert isAllocatedPtr(gch.region, d[i]), "collectCycles"
+      markS(gch, d[i])
+    markGlobals(gch)
+    sweep(gch)
 
 proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
@@ -812,31 +728,11 @@ proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
         add(gch.decStack, cell)
   sysAssert(allocInv(gch.region), "gcMark end")
 
-proc markThreadStacks(gch: var GcHeap) =
-  when hasThreadSupport and hasSharedHeap:
-    {.error: "not fully implemented".}
-    var it = threadList
-    while it != nil:
-      # mark registers:
-      for i in 0 .. high(it.registers): gcMark(gch, it.registers[i])
-      var sp = cast[ByteAddress](it.stackBottom)
-      var max = cast[ByteAddress](it.stackTop)
-      # XXX stack direction?
-      # XXX unroll this loop:
-      while sp <=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp +% sizeof(pointer)
-      it = it.next
-
 include gc_common
 
 proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
   forEachStackSlot(gch, gcMark)
 
-when useMarkForDebug or useBackupGc:
-  proc markStackAndRegistersForSweep(gch: var GcHeap) =
-    forEachStackSlot(gch, stackMarkS)
-
 proc collectZCT(gch: var GcHeap): bool =
   # Note: Freeing may add child objects to the ZCT! So essentially we do
   # deep freeing, which is bad for incremental operation. In order to
@@ -866,8 +762,6 @@ proc collectZCT(gch: var GcHeap): bool =
       # as this might be too slow.
       # In any case, it should be removed from the ZCT. But not
       # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
-      when cycleGC:
-        if canbeCycleRoot(c): excl(gch.cycleRoots, c)
       when logGC: writeCell("zct dealloc cell", c)
       gcTrace(c, csZctFreed)
       # We are about to free the object, call the finalizer BEFORE its
@@ -877,6 +771,7 @@ proc collectZCT(gch: var GcHeap): bool =
       forAllChildren(c, waZctDecRef)
       when reallyDealloc:
         sysAssert(allocInv(gch.region), "collectZCT: rawDealloc")
+        beforeDealloc(gch, c, "collectZCT: stack trash")
         rawDealloc(gch.region, c)
       else:
         sysAssert(c.typ != nil, "collectZCT 2")
@@ -915,7 +810,6 @@ proc collectCTBody(gch: var GcHeap) =
   sysAssert(gch.decStack.len == 0, "collectCT")
   prepareForInteriorPointerChecking(gch.region)
   markStackAndRegisters(gch)
-  markThreadStacks(gch)
   gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
   inc(gch.stat.stackScans)
   if collectZCT(gch):
@@ -935,12 +829,7 @@ proc collectCTBody(gch: var GcHeap) =
     gch.stat.maxPause = max(gch.stat.maxPause, duration)
     when defined(reportMissedDeadlines):
       if gch.maxPause > 0 and duration > gch.maxPause:
-        c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration)
-
-when useMarkForDebug or useBackupGc:
-  proc markForDebug(gch: var GcHeap) =
-    markStackAndRegistersForSweep(gch)
-    markGlobals(gch)
+        c_fprintf(stdout, "[GC] missed deadline: %ld\n", duration)
 
 when defined(nimCoroutines):
   proc currentStackSizes(): int =
@@ -980,7 +869,19 @@ when withRealTime:
       collectCTBody(gch)
     release(gch)
 
-  proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice)
+  proc GC_step*(us: int, strongAdvice = false, stackSize = -1) {.noinline.} =
+    var stackTop {.volatile.}: pointer
+    let prevStackBottom = gch.stackBottom
+    if stackSize >= 0:
+      stackTop = addr(stackTop)
+      when stackIncreases:
+        gch.stackBottom = cast[pointer](
+          cast[ByteAddress](stackTop) - sizeof(pointer) * 6 - stackSize)
+      else:
+        gch.stackBottom = cast[pointer](
+          cast[ByteAddress](stackTop) + sizeof(pointer) * 6 + stackSize)
+    GC_step(gch, us, strongAdvice)
+    gch.stackBottom = prevStackBottom
 
 when not defined(useNimRtl):
   proc GC_disable() =
@@ -1023,7 +924,7 @@ when not defined(useNimRtl):
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
              "[GC] zct capacity: " & $gch.zct.cap & "\n" &
              "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
-             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
+             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) & "\n"
     when defined(nimCoroutines):
       result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
       for stack in items(gch.stack):
diff --git a/lib/system/gc2.nim b/lib/system/gc2.nim
index 6c44d509e..089c9c915 100644
--- a/lib/system/gc2.nim
+++ b/lib/system/gc2.nim
@@ -97,6 +97,8 @@ type
     additionalRoots: CellSeq # dummy roots for GC_ref/unref
     spaceIter: ObjectSpaceIter
     dumpHeapFile: File # File that is used for GC_dumpHeap
+    when hasThreadSupport:
+      toDispose: SharedList[pointer]
 
 var
   gch {.rtlThreadVar.}: GcHeap
@@ -119,6 +121,8 @@ proc initGC() =
     init(gch.decStack)
     init(gch.additionalRoots)
     init(gch.greyStack)
+    when hasThreadSupport:
+      gch.toDispose = initSharedList[pointer]()
 
 # Which color to use for new objects is tricky: When we're marking,
 # they have to be *white* so that everything is marked that is only
@@ -185,7 +189,7 @@ proc writeCell(file: File; msg: cstring, c: PCell) =
               msg, id, kind, c.refcount shr rcShift, col)
 
 proc writeCell(msg: cstring, c: PCell) =
-  c_stdout.writeCell(msg, c)
+  stdout.writeCell(msg, c)
 
 proc myastToStr[T](x: T): string {.magic: "AstToStr", noSideEffect.}
 
@@ -259,7 +263,7 @@ proc nimGCunref(p: pointer) {.compilerProc.} =
 template markGrey(x: PCell) =
   if x.color != 1-gch.black and gch.phase == Phase.Marking:
     if not isAllocatedPtr(gch.region, x):
-      c_fprintf(c_stdout, "[GC] markGrey proc: %p\n", x)
+      c_fprintf(stdout, "[GC] markGrey proc: %p\n", x)
       #GC_dumpHeap()
       sysAssert(false, "wtf")
     x.setColor(rcGrey)
@@ -675,7 +679,7 @@ proc sweep(gch: var GcHeap): bool =
   takeStartTime(100)
   #echo "loop start"
   let white = 1-gch.black
-  #cfprintf(cstdout, "black is %d\n", black)
+  #c_fprintf(stdout, "black is %d\n", black)
   while true:
     let x = allObjectsAsProc(gch.region, addr gch.spaceIter)
     if gch.spaceIter.state < 0: break
@@ -715,7 +719,7 @@ proc markIncremental(gch: var GcHeap): bool =
   while L[] > 0:
     var c = gch.greyStack.d[0]
     if not isAllocatedPtr(gch.region, c):
-      c_fprintf(c_stdout, "[GC] not allocated anymore: %p\n", c)
+      c_fprintf(stdout, "[GC] not allocated anymore: %p\n", c)
       #GC_dumpHeap()
       sysAssert(false, "wtf")
 
@@ -760,7 +764,7 @@ proc doOperation(p: pointer, op: WalkOp) =
   case op
   of waZctDecRef:
     #if not isAllocatedPtr(gch.region, c):
-    #  c_fprintf(c_stdout, "[GC] decref bug: %p", c)
+    #  c_fprintf(stdout, "[GC] decref bug: %p", c)
     gcAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
     gcAssert(c.refcount >=% rcIncrement, "doOperation 2")
     #c.refcount = c.refcount -% rcIncrement
@@ -779,14 +783,14 @@ proc doOperation(p: pointer, op: WalkOp) =
     else:
       #gcAssert(isAllocatedPtr(gch.region, c), "doOperation: waMarkGlobal")
       if not isAllocatedPtr(gch.region, c):
-        c_fprintf(c_stdout, "[GC] not allocated anymore: MarkGlobal %p\n", c)
+        c_fprintf(stdout, "[GC] not allocated anymore: MarkGlobal %p\n", c)
         #GC_dumpHeap()
         sysAssert(false, "wtf")
       handleRoot()
     discard allocInv(gch.region)
   of waMarkGrey:
     if not isAllocatedPtr(gch.region, c):
-      c_fprintf(c_stdout, "[GC] not allocated anymore: MarkGrey %p\n", c)
+      c_fprintf(stdout, "[GC] not allocated anymore: MarkGrey %p\n", c)
       #GC_dumpHeap()
       sysAssert(false, "wtf")
     if c.color == 1-gch.black:
@@ -800,6 +804,10 @@ proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
 proc collectZCT(gch: var GcHeap): bool {.benign.}
 
 proc collectCycles(gch: var GcHeap): bool =
+  when hasThreadSupport:
+    for c in gch.toDispose:
+      nimGCunref(c)
+
   # ensure the ZCT 'color' is not used:
   while gch.zct.len > 0: discard collectZCT(gch)
 
@@ -808,7 +816,7 @@ proc collectCycles(gch: var GcHeap): bool =
     gch.phase = Phase.Marking
     markGlobals(gch)
 
-    cfprintf(stdout, "collectCycles: introduced bug E %ld\n", gch.phase)
+    c_fprintf(stdout, "collectCycles: introduced bug E %ld\n", gch.phase)
     discard allocInv(gch.region)
   of Phase.Marking:
     # since locals do not have a write barrier, we need
@@ -922,7 +930,7 @@ proc collectCTBody(gch: var GcHeap) =
     gch.stat.maxPause = max(gch.stat.maxPause, duration)
     when defined(reportMissedDeadlines):
       if gch.maxPause > 0 and duration > gch.maxPause:
-        c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration)
+        c_fprintf(stdout, "[GC] missed deadline: %ld\n", duration)
 
 when defined(nimCoroutines):
   proc currentStackSizes(): int =
@@ -956,7 +964,19 @@ when withRealTime:
         strongAdvice:
       collectCTBody(gch)
 
-  proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice)
+  proc GC_step*(us: int, strongAdvice = false, stackSize = -1) {.noinline.} =
+    var stackTop {.volatile.}: pointer
+    let prevStackBottom = gch.stackBottom
+    if stackSize >= 0:
+      stackTop = addr(stackTop)
+      when stackIncreases:
+        gch.stackBottom = cast[pointer](
+          cast[ByteAddress](stackTop) - sizeof(pointer) * 6 - stackSize)
+      else:
+        gch.stackBottom = cast[pointer](
+          cast[ByteAddress](stackTop) + sizeof(pointer) * 6 + stackSize)
+    GC_step(gch, us, strongAdvice)
+    gch.stackBottom = prevStackBottom
 
 when not defined(useNimRtl):
   proc GC_disable() =
diff --git a/lib/system/gc_common.nim b/lib/system/gc_common.nim
index 4807bb6f8..7a1b88c84 100644
--- a/lib/system/gc_common.nim
+++ b/lib/system/gc_common.nim
@@ -63,7 +63,7 @@ when defined(nimCoroutines):
       stack.next = gch.stack
       gch.stack.prev = stack
       gch.stack = stack
-    # c_fprintf(c_stdout, "[GC] added stack 0x%016X\n", starts)
+    # c_fprintf(stdout, "[GC] added stack 0x%016X\n", starts)
 
   proc GC_removeStack*(starts: pointer) {.cdecl, exportc.} =
     var stack = gch.stack
@@ -143,7 +143,7 @@ else:
 when not defined(useNimRtl):
   {.push stack_trace: off.}
   proc setStackBottom(theStackBottom: pointer) =
-    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
+    #c_fprintf(stdout, "stack bottom: %p;\n", theStackBottom)
     # the first init must be the one that defines the stack bottom:
     when defined(nimCoroutines):
       GC_addStack(theStackBottom)
@@ -152,7 +152,7 @@ when not defined(useNimRtl):
       else:
         var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
         var b = cast[ByteAddress](gch.stackBottom)
-        #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
+        #c_fprintf(stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
         when stackIncreases:
           gch.stackBottom = cast[pointer](min(a, b))
         else:
@@ -239,7 +239,7 @@ else:
       # We use a jmp_buf buffer that is in the C stack.
       # Used to traverse the stack and registers assuming
       # that 'setjmp' will save registers in the C stack.
-      type PStackSlice = ptr array [0..7, pointer]
+      type PStackSlice = ptr array[0..7, pointer]
       var registers {.noinit.}: Registers
       getRegisters(registers)
       for i in registers.low .. registers.high:
@@ -277,7 +277,7 @@ else:
       # We use a jmp_buf buffer that is in the C stack.
       # Used to traverse the stack and registers assuming
       # that 'setjmp' will save registers in the C stack.
-      type PStackSlice = ptr array [0..7, pointer]
+      type PStackSlice = ptr array[0..7, pointer]
       var registers {.noinit.}: C_JmpBuf
       if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
         var max = cast[ByteAddress](gch.stackBottom)
diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim
index c764571b1..ec69f6e5f 100644
--- a/lib/system/gc_ms.nim
+++ b/lib/system/gc_ms.nim
@@ -28,6 +28,9 @@ template mulThreshold(x): expr {.immediate.} = x * 2
 
 when defined(memProfiler):
   proc nimProfile(requestedSize: int)
+  
+when hasThreadSupport:
+  import sharedlist
 
 type
   WalkOp = enum
diff --git a/lib/system/gc_stack.nim b/lib/system/gc_stack.nim
index 3a5c5594a..5f72b8959 100644
--- a/lib/system/gc_stack.nim
+++ b/lib/system/gc_stack.nim
@@ -8,7 +8,23 @@
 
 # "Stack GC" for embedded devices or ultra performance requirements.
 
-include osalloc
+when defined(nimphpext):
+  proc roundup(x, v: int): int {.inline.} =
+    result = (x + (v-1)) and not (v-1)
+  proc emalloc(size: int): pointer {.importc: "_emalloc".}
+  proc efree(mem: pointer) {.importc: "_efree".}
+
+  proc osAllocPages(size: int): pointer {.inline.} =
+    emalloc(size)
+
+  proc osTryAllocPages(size: int): pointer {.inline.} =
+    emalloc(size)
+
+  proc osDeallocPages(p: pointer, size: int) {.inline.} =
+    efree(p)
+
+else:
+  include osalloc
 
 # We manage memory as a thread local stack. Since the allocation pointer
 # is detached from the control flow pointer, this model is vastly more
@@ -36,33 +52,34 @@ type
   BaseChunk = object
     next: Chunk
     size: int
-    head, last: ptr ObjHeader # first and last object in chunk that
+    head, tail: ptr ObjHeader # first and last object in chunk that
                               # has a finalizer attached to it
 
 type
   StackPtr = object
-    chunk: pointer
+    bump: pointer
     remaining: int
     current: Chunk
 
   MemRegion* = object
     remaining: int
-    chunk: pointer
-    head, last: Chunk
+    bump: pointer
+    head, tail: Chunk
     nextChunkSize, totalSize: int
     hole: ptr Hole # we support individual freeing
-    lock: SysLock
+    when hasThreadSupport:
+      lock: SysLock
 
 var
-  region {.threadVar.}: MemRegion
+  tlRegion {.threadVar.}: MemRegion
 
 template withRegion*(r: MemRegion; body: untyped) =
-  let oldRegion = region
-  region = r
+  let oldRegion = tlRegion
+  tlRegion = r
   try:
     body
   finally:
-    region = oldRegion
+    tlRegion = oldRegion
 
 template inc(p: pointer, s: int) =
   p = cast[pointer](cast[int](p) +% s)
@@ -71,7 +88,7 @@ template `+!`(p: pointer, s: int): pointer =
   cast[pointer](cast[int](p) +% s)
 
 template `-!`(p: pointer, s: int): pointer =
-  cast[pointer](cast[int](p) +% s)
+  cast[pointer](cast[int](p) -% s)
 
 proc allocSlowPath(r: var MemRegion; size: int) =
   # we need to ensure that the underlying linked list
@@ -84,7 +101,7 @@ proc allocSlowPath(r: var MemRegion; size: int) =
     r.nextChunkSize =
       if r.totalSize < 64 * 1024: PageSize*4
       else: r.nextChunkSize*2
-  var s = align(size+sizeof(BaseChunk), PageSize)
+  var s = roundup(size+sizeof(BaseChunk), PageSize)
   var fresh: Chunk
   if s > r.nextChunkSize:
     fresh = cast[Chunk](osAllocPages(s))
@@ -97,22 +114,26 @@ proc allocSlowPath(r: var MemRegion; size: int) =
     else:
       s = r.nextChunkSize
   fresh.size = s
-  fresh.final = nil
-  r.totalSize += s
-  let old = r.last
+  fresh.head = nil
+  fresh.tail = nil
+  fresh.next = nil
+  inc r.totalSize, s
+  let old = r.tail
   if old == nil:
     r.head = fresh
   else:
-    r.last.next = fresh
-  r.chunk = fresh +! sizeof(BaseChunk)
-  r.last = fresh
+    r.tail.next = fresh
+  r.bump = fresh +! sizeof(BaseChunk)
+  r.tail = fresh
   r.remaining = s - sizeof(BaseChunk)
 
 proc alloc(r: var MemRegion; size: int): pointer {.inline.} =
-  if unlikely(r.remaining < size): allocSlowPath(r, size)
+  if size > r.remaining:
+    allocSlowPath(r, size)
+  sysAssert(size <= r.remaining, "size <= r.remaining")
   dec(r.remaining, size)
-  result = r.chunk
-  inc r.chunk, size
+  result = r.bump
+  inc r.bump, size
 
 proc runFinalizers(c: Chunk) =
   var it = c.head
@@ -120,228 +141,245 @@ proc runFinalizers(c: Chunk) =
     # indivually freed objects with finalizer stay in the list, but
     # their typ is nil then:
     if it.typ != nil and it.typ.finalizer != nil:
-      (cast[Finalizer](cell.typ.finalizer))(cell+!sizeof(ObjHeader))
-    it = it.next
+      (cast[Finalizer](it.typ.finalizer))(it+!sizeof(ObjHeader))
+    it = it.nextFinal
 
-proc dealloc(r: var MemRegion; p: pointer) =
-  let it = p-!sizeof(ObjHeader)
-  if it.typ != nil and it.typ.finalizer != nil:
-    (cast[Finalizer](cell.typ.finalizer))(p)
-  it.typ = nil
+when false:
+  proc dealloc(r: var MemRegion; p: pointer) =
+    let it = cast[ptr ObjHeader](p-!sizeof(ObjHeader))
+    if it.typ != nil and it.typ.finalizer != nil:
+      (cast[Finalizer](it.typ.finalizer))(p)
+    it.typ = nil
 
-proc deallocAll(head: Chunk) =
+proc deallocAll(r: var MemRegion; head: Chunk) =
   var it = head
   while it != nil:
+    let nxt = it.next
     runFinalizers(it)
+    dec r.totalSize, it.size
     osDeallocPages(it, it.size)
-    it = it.next
+    it = nxt
 
 proc deallocAll*(r: var MemRegion) =
-  deallocAll(r.head)
+  deallocAll(r, r.head)
   zeroMem(addr r, sizeof r)
 
 proc obstackPtr*(r: MemRegion): StackPtr =
-  result.chunk = r.chunk
+  result.bump = r.bump
   result.remaining = r.remaining
-  result.current = r.last
+  result.current = r.tail
+
+template computeRemaining(r): untyped =
+  r.tail.size -% (cast[int](r.bump) -% cast[int](r.tail))
 
-proc setObstackPtr*(r: MemRegion; sp: StackPtr) =
+proc setObstackPtr*(r: var MemRegion; sp: StackPtr) =
   # free everything after 'sp':
   if sp.current != nil:
-    deallocAll(sp.current.next)
-  r.chunk = sp.chunk
+    deallocAll(r, sp.current.next)
+    sp.current.next = nil
+  else:
+    deallocAll(r, r.head)
+    r.head = nil
+  r.bump = sp.bump
+  r.tail = sp.current
   r.remaining = sp.remaining
-  r.last = sp.current
+
+proc obstackPtr*(): StackPtr = tlRegion.obstackPtr()
+proc setObstackPtr*(sp: StackPtr) = tlRegion.setObstackPtr(sp)
+proc deallocAll*() = tlRegion.deallocAll()
+
+proc deallocOsPages(r: var MemRegion) = r.deallocAll()
 
 proc joinRegion*(dest: var MemRegion; src: MemRegion) =
   # merging is not hard.
   if dest.head.isNil:
     dest.head = src.head
   else:
-    dest.last.next = src.head
-  dest.last = src.last
-  dest.chunk = src.chunk
+    dest.tail.next = src.head
+  dest.tail = src.tail
+  dest.bump = src.bump
   dest.remaining = src.remaining
   dest.nextChunkSize = max(dest.nextChunkSize, src.nextChunkSize)
-  dest.totalSize += src.totalSize
-  if dest.hole.size < src.hole.size:
-    dest.hole = src.hole
+  inc dest.totalSize, src.totalSize
 
 proc isOnHeap*(r: MemRegion; p: pointer): bool =
-  # the last chunk is the largest, so check it first. It's also special
+  # the tail chunk is the largest, so check it first. It's also special
   # in that contains the current bump pointer:
-  if r.last >= p and p < r.chunk:
+  if r.tail >= p and p < r.bump:
     return true
   var it = r.head
-  while it != r.last:
+  while it != r.tail:
     if it >= p and p <= it+!it.size: return true
     it = it.next
 
-proc isInteriorPointer(r: MemRegion; p: pointer): pointer =
-  discard " we cannot patch stack pointers anyway!"
+when false:
+  # essential feature for later: copy data over from one region to another
 
-type
-  PointerStackChunk = object
-    next, prev: ptr PointerStackChunk
-    len: int
-    data: array[128, pointer]
+  proc isInteriorPointer(r: MemRegion; p: pointer): pointer =
+    discard " we cannot patch stack pointers anyway!"
 
-template head(s: PointerStackChunk): untyped = s.prev
-template tail(s: PointerStackChunk): untyped = s.next
+  type
+    PointerStackChunk = object
+      next, prev: ptr PointerStackChunk
+      len: int
+      data: array[128, pointer]
 
-include chains
+  template head(s: PointerStackChunk): untyped = s.prev
+  template tail(s: PointerStackChunk): untyped = s.next
 
-proc push(r: var MemRegion; s: var PointerStackChunk; x: pointer) =
-  if s.len < high(s.data):
-    s.data[s.len] = x
-    inc s.len
-  else:
-    let fresh = cast[ptr PointerStackChunk](alloc(r, sizeof(PointerStackChunk)))
-    fresh.len = 1
-    fresh.data[0] = x
-    fresh.next = nil
-    fresh.prev = nil
-    append(s, fresh)
-
-
-proc genericDeepCopyAux(dr: var MemRegion; stack: var PointerStackChunk;
-                        dest, src: pointer, mt: PNimType) {.benign.}
-proc genericDeepCopyAux(dr: var MemRegion; stack: var PointerStackChunk;
-                        dest, src: pointer, n: ptr TNimNode) {.benign.} =
-  var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
-  case n.kind
-  of nkSlot:
-    genericDeepCopyAux(cast[pointer](d +% n.offset),
-                       cast[pointer](s +% n.offset), n.typ)
-  of nkList:
-    for i in 0..n.len-1:
-      genericDeepCopyAux(dest, src, n.sons[i])
-  of nkCase:
-    var dd = selectBranch(dest, n)
-    var m = selectBranch(src, n)
-    # reset if different branches are in use; note different branches also
-    # imply that's not self-assignment (``x = x``)!
-    if m != dd and dd != nil:
-      genericResetAux(dest, dd)
-    copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset),
-            n.typ.size)
-    if m != nil:
-      genericDeepCopyAux(dest, src, m)
-  of nkNone: sysAssert(false, "genericDeepCopyAux")
-
-proc copyDeepString(dr: var MemRegion; stack: var PointerStackChunk; src: NimString): NimString {.inline.} =
-  result = rawNewStringNoInit(dr, src.len)
-  result.len = src.len
-  c_memcpy(result.data, src.data, src.len + 1)
-
-proc genericDeepCopyAux(dr: var MemRegion; stack: var PointerStackChunk;
-                        dest, src: pointer, mt: PNimType) =
-  var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
-  sysAssert(mt != nil, "genericDeepCopyAux 2")
-  case mt.kind
-  of tyString:
-    var x = cast[PPointer](dest)
-    var s2 = cast[PPointer](s)[]
-    if s2 == nil:
-      x[] = nil
-    else:
-      x[] = copyDeepString(cast[NimString](s2))
-  of tySequence:
-    var s2 = cast[PPointer](src)[]
-    var seq = cast[PGenericSeq](s2)
-    var x = cast[PPointer](dest)
-    if s2 == nil:
-      x[] = nil
-      return
-    sysAssert(dest != nil, "genericDeepCopyAux 3")
-    x[] = newSeq(mt, seq.len)
-    var dst = cast[ByteAddress](cast[PPointer](dest)[])
-    for i in 0..seq.len-1:
-      genericDeepCopyAux(dr, stack,
-        cast[pointer](dst +% i*% mt.base.size +% GenericSeqSize),
-        cast[pointer](cast[ByteAddress](s2) +% i *% mt.base.size +%
-                     GenericSeqSize),
-        mt.base)
-  of tyObject:
-    # we need to copy m_type field for tyObject, as it could be empty for
-    # sequence reallocations:
-    var pint = cast[ptr PNimType](dest)
-    pint[] = cast[ptr PNimType](src)[]
-    if mt.base != nil:
-      genericDeepCopyAux(dr, stack, dest, src, mt.base)
-    genericDeepCopyAux(dr, stack, dest, src, mt.node)
-  of tyTuple:
-    genericDeepCopyAux(dr, stack, dest, src, mt.node)
-  of tyArray, tyArrayConstr:
-    for i in 0..(mt.size div mt.base.size)-1:
-      genericDeepCopyAux(dr, stack,
-                         cast[pointer](d +% i*% mt.base.size),
-                         cast[pointer](s +% i*% mt.base.size), mt.base)
-  of tyRef:
-    let s2 = cast[PPointer](src)[]
-    if s2 == nil:
-      cast[PPointer](dest)[] = nil
+  include chains
+
+  proc push(r: var MemRegion; s: var PointerStackChunk; x: pointer) =
+    if s.len < high(s.data):
+      s.data[s.len] = x
+      inc s.len
     else:
-      # we modify the header of the cell temporarily; instead of the type
-      # field we store a forwarding pointer. XXX This is bad when the cloning
-      # fails due to OOM etc.
-      let x = usrToCell(s2)
-      let forw = cast[int](x.typ)
-      if (forw and 1) == 1:
-        # we stored a forwarding pointer, so let's use that:
-        let z = cast[pointer](forw and not 1)
-        unsureAsgnRef(cast[PPointer](dest), z)
+      let fresh = cast[ptr PointerStackChunk](alloc(r, sizeof(PointerStackChunk)))
+      fresh.len = 1
+      fresh.data[0] = x
+      fresh.next = nil
+      fresh.prev = nil
+      append(s, fresh)
+
+
+  proc genericDeepCopyAux(dr: var MemRegion; stack: var PointerStackChunk;
+                          dest, src: pointer, mt: PNimType) {.benign.}
+  proc genericDeepCopyAux(dr: var MemRegion; stack: var PointerStackChunk;
+                          dest, src: pointer, n: ptr TNimNode) {.benign.} =
+    var
+      d = cast[ByteAddress](dest)
+      s = cast[ByteAddress](src)
+    case n.kind
+    of nkSlot:
+      genericDeepCopyAux(cast[pointer](d +% n.offset),
+                         cast[pointer](s +% n.offset), n.typ)
+    of nkList:
+      for i in 0..n.len-1:
+        genericDeepCopyAux(dest, src, n.sons[i])
+    of nkCase:
+      var dd = selectBranch(dest, n)
+      var m = selectBranch(src, n)
+      # reset if different branches are in use; note different branches also
+      # imply that's not self-assignment (``x = x``)!
+      if m != dd and dd != nil:
+        genericResetAux(dest, dd)
+      copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset),
+              n.typ.size)
+      if m != nil:
+        genericDeepCopyAux(dest, src, m)
+    of nkNone: sysAssert(false, "genericDeepCopyAux")
+
+  proc copyDeepString(dr: var MemRegion; stack: var PointerStackChunk; src: NimString): NimString {.inline.} =
+    result = rawNewStringNoInit(dr, src.len)
+    result.len = src.len
+    copyMem(result.data, src.data, src.len + 1)
+
+  proc genericDeepCopyAux(dr: var MemRegion; stack: var PointerStackChunk;
+                          dest, src: pointer, mt: PNimType) =
+    var
+      d = cast[ByteAddress](dest)
+      s = cast[ByteAddress](src)
+    sysAssert(mt != nil, "genericDeepCopyAux 2")
+    case mt.kind
+    of tyString:
+      var x = cast[PPointer](dest)
+      var s2 = cast[PPointer](s)[]
+      if s2 == nil:
+        x[] = nil
       else:
-        let realType = x.typ
-        let z = newObj(realType, realType.base.size)
-
-        unsureAsgnRef(cast[PPointer](dest), z)
-        x.typ = cast[PNimType](cast[int](z) or 1)
-        genericDeepCopyAux(dr, stack, z, s2, realType.base)
-        x.typ = realType
-  else:
-    copyMem(dest, src, mt.size)
-
-proc joinAliveDataFromRegion*(dest: var MemRegion; src: var MemRegion;
-                              root: pointer): pointer =
-  # we mark the alive data and copy only alive data over to 'dest'.
-  # This is O(liveset) but it nicely compacts memory, so it's fine.
-  # We use the 'typ' field as a forwarding pointer. The forwarding
-  # pointers have bit 0 set, so we can disambiguate them.
-  # We allocate a temporary stack in 'src' that we later free:
-  var s: PointerStackChunk
-  s.len = 1
-  s.data[0] = root
-  while s.len > 0:
-    var p: pointer
-    if s.tail == nil:
-      p = s.data[s.len-1]
-      dec s.len
+        x[] = copyDeepString(cast[NimString](s2))
+    of tySequence:
+      var s2 = cast[PPointer](src)[]
+      var seq = cast[PGenericSeq](s2)
+      var x = cast[PPointer](dest)
+      if s2 == nil:
+        x[] = nil
+        return
+      sysAssert(dest != nil, "genericDeepCopyAux 3")
+      x[] = newSeq(mt, seq.len)
+      var dst = cast[ByteAddress](cast[PPointer](dest)[])
+      for i in 0..seq.len-1:
+        genericDeepCopyAux(dr, stack,
+          cast[pointer](dst +% i*% mt.base.size +% GenericSeqSize),
+          cast[pointer](cast[ByteAddress](s2) +% i *% mt.base.size +%
+                       GenericSeqSize),
+          mt.base)
+    of tyObject:
+      # we need to copy m_type field for tyObject, as it could be empty for
+      # sequence reallocations:
+      var pint = cast[ptr PNimType](dest)
+      pint[] = cast[ptr PNimType](src)[]
+      if mt.base != nil:
+        genericDeepCopyAux(dr, stack, dest, src, mt.base)
+      genericDeepCopyAux(dr, stack, dest, src, mt.node)
+    of tyTuple:
+      genericDeepCopyAux(dr, stack, dest, src, mt.node)
+    of tyArray, tyArrayConstr:
+      for i in 0..(mt.size div mt.base.size)-1:
+        genericDeepCopyAux(dr, stack,
+                           cast[pointer](d +% i*% mt.base.size),
+                           cast[pointer](s +% i*% mt.base.size), mt.base)
+    of tyRef:
+      let s2 = cast[PPointer](src)[]
+      if s2 == nil:
+        cast[PPointer](dest)[] = nil
+      else:
+        # we modify the header of the cell temporarily; instead of the type
+        # field we store a forwarding pointer. XXX This is bad when the cloning
+        # fails due to OOM etc.
+        let x = usrToCell(s2)
+        let forw = cast[int](x.typ)
+        if (forw and 1) == 1:
+          # we stored a forwarding pointer, so let's use that:
+          let z = cast[pointer](forw and not 1)
+          unsureAsgnRef(cast[PPointer](dest), z)
+        else:
+          let realType = x.typ
+          let z = newObj(realType, realType.base.size)
+
+          unsureAsgnRef(cast[PPointer](dest), z)
+          x.typ = cast[PNimType](cast[int](z) or 1)
+          genericDeepCopyAux(dr, stack, z, s2, realType.base)
+          x.typ = realType
     else:
-      p = s.tail.data[s.tail.len-1]
-      dec s.tail.len
-      if s.tail.len == 0:
-        unlink(s, s.tail)
+      copyMem(dest, src, mt.size)
+
+  proc joinAliveDataFromRegion*(dest: var MemRegion; src: var MemRegion;
+                                root: pointer): pointer =
+    # we mark the alive data and copy only alive data over to 'dest'.
+    # This is O(liveset) but it nicely compacts memory, so it's fine.
+    # We use the 'typ' field as a forwarding pointer. The forwarding
+    # pointers have bit 0 set, so we can disambiguate them.
+    # We allocate a temporary stack in 'src' that we later free:
+    var s: PointerStackChunk
+    s.len = 1
+    s.data[0] = root
+    while s.len > 0:
+      var p: pointer
+      if s.tail == nil:
+        p = s.data[s.len-1]
+        dec s.len
+      else:
+        p = s.tail.data[s.tail.len-1]
+        dec s.tail.len
+        if s.tail.len == 0:
+          unlink(s, s.tail)
 
 proc rawNewObj(r: var MemRegion, typ: PNimType, size: int): pointer =
   var res = cast[ptr ObjHeader](alloc(r, size + sizeof(ObjHeader)))
   res.typ = typ
   if typ.finalizer != nil:
-    res.nextFinal = r.chunk.head
-    r.chunk.head = res
+    res.nextFinal = r.head.head
+    r.head.head = res
   result = res +! sizeof(ObjHeader)
 
 proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = rawNewObj(typ, size, region)
+  result = rawNewObj(tlRegion, typ, size)
   zeroMem(result, size)
   when defined(memProfiler): nimProfile(size)
 
 proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = rawNewObj(typ, size, region)
+  result = rawNewObj(tlRegion, typ, size)
   when defined(memProfiler): nimProfile(size)
 
 proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
@@ -351,7 +389,7 @@ proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
   cast[PGenericSeq](result).reserved = len
 
 proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = rawNewObj(typ, size, gch)
+  result = rawNewObj(tlRegion, typ, size)
   zeroMem(result, size)
 
 proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
@@ -360,23 +398,70 @@ proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
 
-proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
-  collectCT(gch)
-  var ol = usrToCell(old)
-  sysAssert(ol.typ != nil, "growObj: 1")
-  gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
-
-  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
-  var elemSize = 1
-  if ol.typ.kind != tyString: elemSize = ol.typ.base.size
-
-  var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
-  copyMem(res, ol, oldsize + sizeof(Cell))
-  zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)),
-          newsize-oldsize)
-  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
-  result = cellToUsr(res)
+proc growObj(region: var MemRegion; old: pointer, newsize: int): pointer =
+  let typ = cast[ptr ObjHeader](old -! sizeof(ObjHeader)).typ
+  result = rawNewObj(region, typ, newsize)
+  let elemSize = if typ.kind == tyString: 1 else: typ.base.size
+  let oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
+  copyMem(result, old, oldsize)
+  zeroMem(result +! oldsize, newsize-oldsize)
 
 proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
-  result = growObj(old, newsize, region)
-
+  result = growObj(tlRegion, old, newsize)
+
+proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+
+proc alloc(size: Natural): pointer =
+  result = c_malloc(size)
+  if result == nil: raiseOutOfMem()
+proc alloc0(size: Natural): pointer =
+  result = alloc(size)
+  zeroMem(result, size)
+proc realloc(p: pointer, newsize: Natural): pointer =
+  result = c_realloc(p, newsize)
+  if result == nil: raiseOutOfMem()
+proc dealloc(p: pointer) = c_free(p)
+
+proc alloc0(r: var MemRegion; size: Natural): pointer =
+  # ignore the region. That is correct for the channels module
+  # but incorrect in general. XXX
+  result = alloc0(size)
+
+proc dealloc(r: var MemRegion; p: pointer) = dealloc(p)
+
+proc allocShared(size: Natural): pointer =
+  result = c_malloc(size)
+  if result == nil: raiseOutOfMem()
+proc allocShared0(size: Natural): pointer =
+  result = alloc(size)
+  zeroMem(result, size)
+proc reallocShared(p: pointer, newsize: Natural): pointer =
+  result = c_realloc(p, newsize)
+  if result == nil: raiseOutOfMem()
+proc deallocShared(p: pointer) = c_free(p)
+
+when hasThreadSupport:
+  proc getFreeSharedMem(): int = 0
+  proc getTotalSharedMem(): int = 0
+  proc getOccupiedSharedMem(): int = 0
+
+proc GC_disable() = discard
+proc GC_enable() = discard
+proc GC_fullCollect() = discard
+proc GC_setStrategy(strategy: GC_Strategy) = discard
+proc GC_enableMarkAndSweep() = discard
+proc GC_disableMarkAndSweep() = discard
+proc GC_getStatistics(): string = return ""
+
+proc getOccupiedMem(): int =
+  result = tlRegion.totalSize - tlRegion.remaining
+proc getFreeMem(): int = tlRegion.remaining
+proc getTotalMem(): int =
+  result = tlRegion.totalSize
+
+proc setStackBottom(theStackBottom: pointer) = discard
diff --git a/lib/system/hti.nim b/lib/system/hti.nim
index bfb13059e..984f888cb 100644
--- a/lib/system/hti.nim
+++ b/lib/system/hti.nim
@@ -71,7 +71,7 @@ type
     typ: ptr TNimType
     name: cstring
     len: int
-    sons: ptr array [0..0x7fff, ptr TNimNode]
+    sons: ptr array[0..0x7fff, ptr TNimNode]
 
   TNimTypeFlag = enum
     ntfNoRefs = 0,     # type contains no tyRef, tySequence, tyString
diff --git a/lib/system/inclrtl.nim b/lib/system/inclrtl.nim
index 3caeefcbc..f9e6754ef 100644
--- a/lib/system/inclrtl.nim
+++ b/lib/system/inclrtl.nim
@@ -19,6 +19,11 @@
 when not defined(nimNewShared):
   {.pragma: gcsafe.}
 
+when not defined(nimImmediateDeprecated):
+  {.pragma: oldimmediate, immediate.}
+else:
+  {.pragma: oldimmediate.}
+
 when defined(createNimRtl):
   when defined(useNimRtl):
     {.error: "Cannot create and use nimrtl at the same time!".}
diff --git a/lib/system/jssys.nim b/lib/system/jssys.nim
index 1b98883b9..9c8a18bfe 100644
--- a/lib/system/jssys.nim
+++ b/lib/system/jssys.nim
@@ -65,7 +65,7 @@ proc auxWriteStackTrace(f: PCallFrame): string =
     it = f
     i = 0
     total = 0
-    tempFrames: array [0..63, TempFrame]
+    tempFrames: array[0..63, TempFrame]
   while it != nil and i <= high(tempFrames):
     tempFrames[i].procname = it.procname
     tempFrames[i].line = it.line
@@ -97,6 +97,8 @@ proc rawWriteStackTrace(): string =
   else:
     result = "No stack traceback available\n"
 
+proc getStackTrace*(): string = rawWriteStackTrace()
+
 proc unhandledException(e: ref Exception) {.
     compilerproc, asmNoStackFrame.} =
   when NimStackTrace:
@@ -119,7 +121,10 @@ proc raiseException(e: ref Exception, ename: cstring) {.
   when not defined(noUnhandledHandler):
     if excHandler == 0:
       unhandledException(e)
-  asm "throw `e`;"
+  when defined(nimphp):
+    asm """throw new Exception($`e`["message"]);"""
+  else:
+    asm "throw `e`;"
 
 proc reraiseException() {.compilerproc, asmNoStackFrame.} =
   if lastJSError == nil:
@@ -243,8 +248,12 @@ proc toJSStr(s: string): cstring {.asmNoStackFrame, compilerproc.} =
     for (var i = 0; i < len; ++i) {
       if (nonAsciiPart !== null) {
         var offset = (i - nonAsciiOffset) * 2;
+        var code = `s`[i].toString(16);
+        if (code.length == 1) {
+          code = "0"+code;
+        }
         nonAsciiPart[offset] = "%";
-        nonAsciiPart[offset + 1] = `s`[i].toString(16);
+        nonAsciiPart[offset + 1] = code;
       }
       else if (`s`[i] < 128)
         asciiPart[i] = fcc(`s`[i]);
@@ -729,16 +738,19 @@ proc genericReset(x: JSRef, ti: PNimType): JSRef {.compilerproc.} =
   else:
     discard
 
-proc arrayConstr(len: int, value: JSRef, typ: PNimType): JSRef {.
-                 asmNoStackFrame, compilerproc.} =
-  # types are fake
-  when defined(nimphp):
+when defined(nimphp):
+  proc arrayConstr(len: int, value: string, typ: string): JSRef {.
+                  asmNoStackFrame, compilerproc.} =
+    # types are fake
     asm """
       $result = array();
       for ($i = 0; $i < `len`; $i++) $result[] = `value`;
       return $result;
     """
-  else:
+else:
+  proc arrayConstr(len: int, value: JSRef, typ: PNimType): JSRef {.
+                  asmNoStackFrame, compilerproc.} =
+  # types are fake
     asm """
       var result = new Array(`len`);
       for (var i = 0; i < `len`; ++i) result[i] = nimCopy(null, `value`, `typ`);
diff --git a/lib/system/mmdisp.nim b/lib/system/mmdisp.nim
index 5e576f0a3..186349152 100644
--- a/lib/system/mmdisp.nim
+++ b/lib/system/mmdisp.nim
@@ -300,7 +300,7 @@ elif defined(gogc):
   proc setStackBottom(theStackBottom: pointer) = discard
 
   proc alloc(size: Natural): pointer =
-    result = cmalloc(size)
+    result = c_malloc(size)
     if result == nil: raiseOutOfMem()
 
   proc alloc0(size: Natural): pointer =
@@ -308,13 +308,13 @@ elif defined(gogc):
     zeroMem(result, size)
 
   proc realloc(p: pointer, newsize: Natural): pointer =
-    result = crealloc(p, newsize)
+    result = c_realloc(p, newsize)
     if result == nil: raiseOutOfMem()
 
-  proc dealloc(p: pointer) = cfree(p)
+  proc dealloc(p: pointer) = c_free(p)
 
   proc allocShared(size: Natural): pointer =
-    result = cmalloc(size)
+    result = c_malloc(size)
     if result == nil: raiseOutOfMem()
 
   proc allocShared0(size: Natural): pointer =
@@ -322,10 +322,10 @@ elif defined(gogc):
     zeroMem(result, size)
 
   proc reallocShared(p: pointer, newsize: Natural): pointer =
-    result = crealloc(p, newsize)
+    result = c_realloc(p, newsize)
     if result == nil: raiseOutOfMem()
 
-  proc deallocShared(p: pointer) = cfree(p)
+  proc deallocShared(p: pointer) = c_free(p)
 
   when hasThreadSupport:
     proc getFreeSharedMem(): int = discard
@@ -354,6 +354,12 @@ elif defined(gogc):
     cast[PGenericSeq](result).reserved = len
     cast[PGenericSeq](result).elemSize = typ.base.size
 
+  proc nimNewSeqOfCap(typ: PNimType, cap: int): pointer {.compilerproc.} =
+    result = newObj(typ, cap * typ.base.size + GenericSeqSize)
+    cast[PGenericSeq](result).len = 0
+    cast[PGenericSeq](result).reserved = cap
+    cast[PGenericSeq](result).elemSize = typ.base.size
+
   proc growObj(old: pointer, newsize: int): pointer =
     # the Go GC doesn't have a realloc
     var
@@ -389,26 +395,41 @@ elif defined(nogc) and defined(useMalloc):
 
   when not defined(useNimRtl):
     proc alloc(size: Natural): pointer =
-      result = cmalloc(size)
-      if result == nil: raiseOutOfMem()
+      var x = c_malloc(size + sizeof(size))
+      if x == nil: raiseOutOfMem()
+
+      cast[ptr int](x)[] = size
+      result = cast[pointer](cast[int](x) + sizeof(size))
+
     proc alloc0(size: Natural): pointer =
       result = alloc(size)
       zeroMem(result, size)
     proc realloc(p: pointer, newsize: Natural): pointer =
-      result = crealloc(p, newsize)
-      if result == nil: raiseOutOfMem()
-    proc dealloc(p: pointer) = cfree(p)
+      var x = cast[pointer](cast[int](p) - sizeof(newsize))
+      let oldsize = cast[ptr int](x)[]
+
+      x = c_realloc(x, newsize + sizeof(newsize))
+
+      if x == nil: raiseOutOfMem()
+
+      cast[ptr int](x)[] = newsize
+      result = cast[pointer](cast[int](x) + sizeof(newsize))
+
+      if newsize > oldsize:
+        zeroMem(cast[pointer](cast[int](result) + oldsize), newsize - oldsize)
+
+    proc dealloc(p: pointer) = c_free(cast[pointer](cast[int](p) - sizeof(int)))
 
     proc allocShared(size: Natural): pointer =
-      result = cmalloc(size)
+      result = c_malloc(size)
       if result == nil: raiseOutOfMem()
     proc allocShared0(size: Natural): pointer =
       result = alloc(size)
       zeroMem(result, size)
     proc reallocShared(p: pointer, newsize: Natural): pointer =
-      result = crealloc(p, newsize)
+      result = c_realloc(p, newsize)
       if result == nil: raiseOutOfMem()
-    proc deallocShared(p: pointer) = cfree(p)
+    proc deallocShared(p: pointer) = c_free(p)
 
     proc GC_disable() = discard
     proc GC_enable() = discard
@@ -432,6 +453,7 @@ elif defined(nogc) and defined(useMalloc):
     result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
     cast[PGenericSeq](result).len = len
     cast[PGenericSeq](result).reserved = len
+
   proc newObjNoInit(typ: PNimType, size: int): pointer =
     result = alloc(size)
 
@@ -491,6 +513,7 @@ elif defined(nogc):
     result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
     cast[PGenericSeq](result).len = len
     cast[PGenericSeq](result).reserved = len
+
   proc growObj(old: pointer, newsize: int): pointer =
     result = realloc(old, newsize)
 
@@ -511,11 +534,12 @@ elif defined(nogc):
   include "system/cellsets"
 
 else:
-  include "system/alloc"
+  when not defined(gcStack):
+    include "system/alloc"
 
-  include "system/cellsets"
-  when not leakDetector and not useCellIds:
-    sysAssert(sizeof(Cell) == sizeof(FreeCell), "sizeof FreeCell")
+    include "system/cellsets"
+    when not leakDetector and not useCellIds:
+      sysAssert(sizeof(Cell) == sizeof(FreeCell), "sizeof FreeCell")
   when compileOption("gc", "v2"):
     include "system/gc2"
   elif defined(gcStack):
@@ -529,4 +553,10 @@ else:
   else:
     include "system/gc"
 
+when not declared(nimNewSeqOfCap):
+  proc nimNewSeqOfCap(typ: PNimType, cap: int): pointer {.compilerproc.} =
+    result = newObj(typ, addInt(mulInt(cap, typ.base.size), GenericSeqSize))
+    cast[PGenericSeq](result).len = 0
+    cast[PGenericSeq](result).reserved = cap
+
 {.pop.}
diff --git a/lib/system/nimscript.nim b/lib/system/nimscript.nim
index d587d772f..fc6b8c99d 100644
--- a/lib/system/nimscript.nim
+++ b/lib/system/nimscript.nim
@@ -39,6 +39,9 @@ proc getCurrentDir(): string = builtin
 proc rawExec(cmd: string): int {.tags: [ExecIOEffect], raises: [OSError].} =
   builtin
 
+proc warningImpl(arg, orig: string) = discard
+proc hintImpl(arg, orig: string) = discard
+
 proc paramStr*(i: int): string =
   ## Retrieves the ``i``'th command line parameter.
   builtin
@@ -52,6 +55,31 @@ proc switch*(key: string, val="") =
   ## example ``switch("checks", "on")``.
   builtin
 
+proc warning*(name: string; val: bool) =
+  ## Disables or enables a specific warning.
+  let v = if val: "on" else: "off"
+  warningImpl(name & "]:" & v, "warning[" & name & "]:" & v)
+
+proc hint*(name: string; val: bool) =
+  ## Disables or enables a specific hint.
+  let v = if val: "on" else: "off"
+  hintImpl(name & "]:" & v, "hint[" & name & "]:" & v)
+
+proc patchFile*(package, filename, replacement: string) =
+  ## Overrides the location of a given file belonging to the
+  ## passed package.
+  ## If the ``replacement`` is not an absolute path, the path
+  ## is interpreted to be local to the Nimscript file that contains
+  ## the call to ``patchFile``, Nim's ``--path`` is not used at all
+  ## to resolve the filename!
+  ##
+  ## Example:
+  ##
+  ## .. code-block:: nim
+  ##
+  ##   patchFile("stdlib", "asyncdispatch", "patches/replacement")
+  discard
+
 proc getCommand*(): string =
   ## Gets the Nim command that the compiler has been invoked with, for example
   ## "c", "js", "build", "help".
@@ -94,6 +122,10 @@ proc existsDir*(dir: string): bool =
   ## An alias for ``dirExists``.
   dirExists(dir)
 
+proc selfExe*(): string =
+  ## Returns the currently running nim or nimble executable.
+  builtin
+
 proc toExe*(filename: string): string =
   ## On Windows adds ".exe" to `filename`, else returns `filename` unmodified.
   (when defined(windows): filename & ".exe" else: filename)
@@ -180,6 +212,15 @@ proc exec*(command: string, input: string, cache = "") {.
   log "exec: " & command:
     echo staticExec(command, input, cache)
 
+proc selfExec*(command: string) =
+  ## Executes an external command with the current nim/nimble executable.
+  ## ``Command`` must not contain the "nim " part.
+  let c = selfExe() & " " & command
+  log "exec: " & c:
+    if rawExec(c) != 0:
+      raise newException(OSError, "FAILED: " & c)
+    checkOsError()
+
 proc put*(key, value: string) =
   ## Sets a configuration 'key' like 'gcc.options.always' to its value.
   builtin
@@ -206,7 +247,7 @@ proc cd*(dir: string) {.raises: [OSError].} =
   ##
   ## The change is permanent for the rest of the execution, since this is just
   ## a shortcut for `os.setCurrentDir()
-  ## <http://nim-lang.org/os.html#setCurrentDir,string>`_ . Use the `withDir()
+  ## <http://nim-lang.org/docs/os.html#setCurrentDir,string>`_ . Use the `withDir()
   ## <#withDir>`_ template if you want to perform a temporary change only.
   setCurrentDir(dir)
   checkOsError()
diff --git a/lib/system/osalloc.nim b/lib/system/osalloc.nim
index 78410d716..b07a362a0 100644
--- a/lib/system/osalloc.nim
+++ b/lib/system/osalloc.nim
@@ -68,11 +68,11 @@ when defined(emscripten):
     mmapDescr.realSize = realSize
     mmapDescr.realPointer = realPointer
 
-    c_fprintf(c_stdout, "[Alloc] size %d %d realSize:%d realPos:%d\n", block_size, cast[int](result), realSize, cast[int](realPointer))
+    #c_fprintf(stdout, "[Alloc] size %d %d realSize:%d realPos:%d\n", block_size, cast[int](result), realSize, cast[int](realPointer))
 
   proc osTryAllocPages(size: int): pointer = osAllocPages(size)
 
-  proc osDeallocPages(p: pointer, size: int) {.inline} =
+  proc osDeallocPages(p: pointer, size: int) {.inline.} =
     var mmapDescrPos = cast[ByteAddress](p) -% sizeof(EmscriptenMMapBlock)
     var mmapDescr = cast[EmscriptenMMapBlock](mmapDescrPos)
     munmap(mmapDescr.realPointer, mmapDescr.realSize)
@@ -87,6 +87,8 @@ elif defined(posix):
     const MAP_ANONYMOUS = 0x1000
   elif defined(solaris):
     const MAP_ANONYMOUS = 0x100
+  elif defined(linux):
+    const MAP_ANONYMOUS = 0x20
   else:
     var
       MAP_ANONYMOUS {.importc: "MAP_ANONYMOUS", header: "<sys/mman.h>".}: cint
@@ -107,7 +109,7 @@ elif defined(posix):
                              MAP_PRIVATE or MAP_ANONYMOUS, -1, 0)
     if result == cast[pointer](-1): result = nil
 
-  proc osDeallocPages(p: pointer, size: int) {.inline} =
+  proc osDeallocPages(p: pointer, size: int) {.inline.} =
     when reallyOsDealloc: discard munmap(p, size)
 
 elif defined(windows):
@@ -148,8 +150,9 @@ elif defined(windows):
     #VirtualFree(p, size, MEM_DECOMMIT)
 
 elif hostOS == "standalone":
+  const StandaloneHeapSize {.intdefine.}: int = 1024 * PageSize
   var
-    theHeap: array[1024*PageSize, float64] # 'float64' for alignment
+    theHeap: array[StandaloneHeapSize, float64] # 'float64' for alignment
     bumpPointer = cast[int](addr theHeap)
 
   proc osAllocPages(size: int): pointer {.inline.} =
diff --git a/lib/system/profiler.nim b/lib/system/profiler.nim
index ae8ff4e19..7146500d9 100644
--- a/lib/system/profiler.nim
+++ b/lib/system/profiler.nim
@@ -19,10 +19,14 @@ const
   MaxTraceLen = 20 # tracking the last 20 calls is enough
 
 type
-  StackTrace* = array [0..MaxTraceLen-1, cstring]
+  StackTrace* = object
+    lines*: array[0..MaxTraceLen-1, cstring]
+    files*: array[0..MaxTraceLen-1, cstring]
   ProfilerHook* = proc (st: StackTrace) {.nimcall.}
 {.deprecated: [TStackTrace: StackTrace, TProfilerHook: ProfilerHook].}
 
+proc `[]`*(st: StackTrace, i: int): cstring = st.lines[i]
+
 proc captureStackTrace(f: PFrame, st: var StackTrace) =
   const
     firstCalls = 5
@@ -30,9 +34,10 @@ proc captureStackTrace(f: PFrame, st: var StackTrace) =
     it = f
     i = 0
     total = 0
-  while it != nil and i <= high(st)-(firstCalls-1):
+  while it != nil and i <= high(st.lines)-(firstCalls-1):
     # the (-1) is for the "..." entry
-    st[i] = it.procname
+    st.lines[i] = it.procname
+    st.files[i] = it.filename
     inc(i)
     inc(total)
     it = it.prev
@@ -43,10 +48,12 @@ proc captureStackTrace(f: PFrame, st: var StackTrace) =
   for j in 1..total-i-(firstCalls-1):
     if b != nil: b = b.prev
   if total != i:
-    st[i] = "..."
+    st.lines[i] = "..."
+    st.files[i] = "..."
     inc(i)
-  while b != nil and i <= high(st):
-    st[i] = b.procname
+  while b != nil and i <= high(st.lines):
+    st.lines[i] = b.procname
+    st.files[i] = b.filename
     inc(i)
     b = b.prev
 
diff --git a/lib/system/repr.nim b/lib/system/repr.nim
index 4da4781ef..cf7d6d7a9 100644
--- a/lib/system/repr.nim
+++ b/lib/system/repr.nim
@@ -16,7 +16,7 @@ proc reprInt(x: int64): string {.compilerproc.} = return $x
 proc reprFloat(x: float): string {.compilerproc.} = return $x
 
 proc reprPointer(x: pointer): string {.compilerproc.} =
-  var buf: array [0..59, char]
+  var buf: array[0..59, char]
   discard c_sprintf(buf, "%p", x)
   return $buf
 
@@ -24,7 +24,7 @@ proc `$`(x: uint64): string =
   if x == 0:
     result = "0"
   else:
-    var buf: array [60, char]
+    var buf: array[60, char]
     var i = 0
     var n = x
     while n != 0:
@@ -73,23 +73,20 @@ proc reprChar(x: char): string {.compilerRtl.} =
   add result, "\'"
 
 proc reprEnum(e: int, typ: PNimType): string {.compilerRtl.} =
-  # we read an 'int' but this may have been too large, so mask the other bits:
-  let b = (sizeof(int)-typ.size)*8 # bits
-  let m = 1 shl (b-1) # mask
-  var o = e and ((1 shl b)-1) # clear upper bits
-  o = (o xor m) - m # sign extend
-  # XXX we need a proper narrowing based on signedness here
-  #e and ((1 shl (typ.size*8)) - 1)
+  ## Return string representation for enumeration values
+  var n = typ.node
   if ntfEnumHole notin typ.flags:
-    if o <% typ.node.len:
-      return $typ.node.sons[o].name
+    let o = e - n.sons[0].offset
+    if o >= 0 and o <% typ.node.len:
+      return $n.sons[o].name
   else:
     # ugh we need a slow linear search:
-    var n = typ.node
     var s = n.sons
     for i in 0 .. n.len-1:
-      if s[i].offset == o: return $s[i].name
-  result = $o & " (invalid data!)"
+      if s[i].offset == e:
+        return $s[i].name
+
+  result = $e & " (invalid data!)"
 
 type
   PByteArray = ptr array[0.. 0xffff, int8]
diff --git a/lib/system/sets.nim b/lib/system/sets.nim
index 66877de30..53d222468 100644
--- a/lib/system/sets.nim
+++ b/lib/system/sets.nim
@@ -10,7 +10,7 @@
 # set handling
 
 type
-  NimSet = array [0..4*2048-1, uint8]
+  NimSet = array[0..4*2048-1, uint8]
 {.deprecated: [TNimSet: NimSet].}
 
 proc countBits32(n: int32): int {.compilerproc.} =
diff --git a/lib/system/sysio.nim b/lib/system/sysio.nim
index d0bba6775..3e9657ce0 100644
--- a/lib/system/sysio.nim
+++ b/lib/system/sysio.nim
@@ -16,54 +16,56 @@
                        # of the standard library!
 
 
-proc fputs(c: cstring, f: File) {.importc: "fputs", header: "<stdio.h>",
-  tags: [WriteIOEffect].}
-proc fgets(c: cstring, n: int, f: File): cstring {.
+proc c_fdopen(filehandle: cint, mode: cstring): File {.
+  importc: "fdopen", header: "<stdio.h>".}
+proc c_fputs(c: cstring, f: File): cint {.
+  importc: "fputs", header: "<stdio.h>", tags: [WriteIOEffect].}
+proc c_fgets(c: cstring, n: cint, f: File): cstring {.
   importc: "fgets", header: "<stdio.h>", tags: [ReadIOEffect].}
-proc fgetc(stream: File): cint {.importc: "fgetc", header: "<stdio.h>",
-  tags: [ReadIOEffect].}
-proc ungetc(c: cint, f: File) {.importc: "ungetc", header: "<stdio.h>",
-  tags: [].}
-proc putc(c: char, stream: File) {.importc: "putc", header: "<stdio.h>",
-  tags: [WriteIOEffect].}
-proc fprintf(f: File, frmt: cstring) {.importc: "fprintf",
-  header: "<stdio.h>", varargs, tags: [WriteIOEffect].}
-proc strlen(c: cstring): int {.
-  importc: "strlen", header: "<string.h>", tags: [].}
+proc c_fgetc(stream: File): cint {.
+  importc: "fgetc", header: "<stdio.h>", tags: [ReadIOEffect].}
+proc c_ungetc(c: cint, f: File): cint {.
+  importc: "ungetc", header: "<stdio.h>", tags: [].}
+proc c_putc(c: cint, stream: File): cint {.
+  importc: "putc", header: "<stdio.h>", tags: [WriteIOEffect].}
+proc c_fflush(f: File): cint {.
+  importc: "fflush", header: "<stdio.h>".}
+proc c_fclose(f: File): cint {.
+  importc: "fclose", header: "<stdio.h>".}
 
 # C routine that is used here:
-proc fread(buf: pointer, size, n: int, f: File): int {.
+proc c_fread(buf: pointer, size, n: csize, f: File): csize {.
   importc: "fread", header: "<stdio.h>", tags: [ReadIOEffect].}
-proc fseek(f: File, offset: clong, whence: int): int {.
+proc c_fseek(f: File, offset: clong, whence: cint): cint {.
   importc: "fseek", header: "<stdio.h>", tags: [].}
-proc ftell(f: File): int {.importc: "ftell", header: "<stdio.h>", tags: [].}
-proc ferror(f: File): int {.importc: "ferror", header: "<stdio.h>", tags: [].}
-proc setvbuf(stream: File, buf: pointer, typ, size: cint): cint {.
-  importc, header: "<stdio.h>", tags: [].}
-proc memchr(s: pointer, c: cint, n: csize): pointer {.
-  importc: "memchr", header: "<string.h>", tags: [].}
-proc memset(s: pointer, c: cint, n: csize) {.
-  header: "<string.h>", importc: "memset", tags: [].}
-proc fwrite(buf: pointer, size, n: int, f: File): int {.
-  importc: "fwrite", noDecl.}
+proc c_ftell(f: File): clong {.
+  importc: "ftell", header: "<stdio.h>", tags: [].}
+proc c_ferror(f: File): cint {.
+  importc: "ferror", header: "<stdio.h>", tags: [].}
+proc c_setvbuf(f: File, buf: pointer, mode: cint, size: csize): cint {.
+  importc: "setvbuf", header: "<stdio.h>", tags: [].}
+proc c_fwrite(buf: pointer, size, n: csize, f: File): cint {.
+  importc: "fwrite", header: "<stdio.h>".}
 
 proc raiseEIO(msg: string) {.noinline, noreturn.} =
   sysFatal(IOError, msg)
 
 {.push stackTrace:off, profiler:off.}
 proc readBuffer(f: File, buffer: pointer, len: Natural): int =
-  result = fread(buffer, 1, len, f)
+  result = c_fread(buffer, 1, len, f)
 
 proc readBytes(f: File, a: var openArray[int8|uint8], start, len: Natural): int =
   result = readBuffer(f, addr(a[start]), len)
 
 proc readChars(f: File, a: var openArray[char], start, len: Natural): int =
+  if (start + len) > len(a):
+    raiseEIO("buffer overflow: (start+len) > length of openarray buffer")
   result = readBuffer(f, addr(a[start]), len)
 
-proc write(f: File, c: cstring) = fputs(c, f)
+proc write(f: File, c: cstring) = discard c_fputs(c, f)
 
 proc writeBuffer(f: File, buffer: pointer, len: Natural): int =
-  result = fwrite(buffer, 1, len, f)
+  result = c_fwrite(buffer, 1, len, f)
 
 proc writeBytes(f: File, a: openArray[int8|uint8], start, len: Natural): int =
   var x = cast[ptr array[0..1000_000_000, int8]](a)
@@ -95,23 +97,28 @@ else:
 const
   BufSize = 4000
 
+proc close*(f: File) = discard c_fclose(f)
+proc readChar*(f: File): char = result = char(c_fgetc(f))
+proc flushFile*(f: File) = discard c_fflush(f)
+proc getFileHandle*(f: File): FileHandle = c_fileno(f)
+
 proc readLine(f: File, line: var TaintedString): bool =
   var pos = 0
   # Use the currently reserved space for a first try
   when defined(nimscript):
-    var space = 80
+    var space: cint = 80
   else:
-    var space = cast[PGenericSeq](line.string).space
+    var space: cint = cint(cast[PGenericSeq](line.string).space)
   line.string.setLen(space)
 
   while true:
     # memset to \l so that we can tell how far fgets wrote, even on EOF, where
     # fgets doesn't append an \l
-    memset(addr line.string[pos], '\l'.ord, space)
-    if fgets(addr line.string[pos], space, f) == nil:
+    c_memset(addr line.string[pos], '\l'.ord, space)
+    if c_fgets(addr line.string[pos], space, f) == nil:
       line.string.setLen(0)
       return false
-    let m = memchr(addr line.string[pos], '\l'.ord, space)
+    let m = c_memchr(addr line.string[pos], '\l'.ord, space)
     if m != nil:
       # \l found: Could be our own or the one by fgets, in any case, we're done
       var last = cast[ByteAddress](m) - cast[ByteAddress](addr line.string[0])
@@ -140,23 +147,23 @@ proc readLine(f: File): TaintedString =
 
 proc write(f: File, i: int) =
   when sizeof(int) == 8:
-    fprintf(f, "%lld", i)
+    c_fprintf(f, "%lld", i)
   else:
-    fprintf(f, "%ld", i)
+    c_fprintf(f, "%ld", i)
 
 proc write(f: File, i: BiggestInt) =
   when sizeof(BiggestInt) == 8:
-    fprintf(f, "%lld", i)
+    c_fprintf(f, "%lld", i)
   else:
-    fprintf(f, "%ld", i)
+    c_fprintf(f, "%ld", i)
 
 proc write(f: File, b: bool) =
   if b: write(f, "true")
   else: write(f, "false")
-proc write(f: File, r: float32) = fprintf(f, "%g", r)
-proc write(f: File, r: BiggestFloat) = fprintf(f, "%g", r)
+proc write(f: File, r: float32) = c_fprintf(f, "%g", r)
+proc write(f: File, r: BiggestFloat) = c_fprintf(f, "%g", r)
 
-proc write(f: File, c: char) = putc(c, f)
+proc write(f: File, c: char) = discard c_putc(ord(c), f)
 proc write(f: File, a: varargs[string, `$`]) =
   for x in items(a): write(f, x)
 
@@ -176,15 +183,15 @@ proc readAllBuffer(file: File): string =
 
 proc rawFileSize(file: File): int =
   # this does not raise an error opposed to `getFileSize`
-  var oldPos = ftell(file)
-  discard fseek(file, 0, 2) # seek the end of the file
-  result = ftell(file)
-  discard fseek(file, clong(oldPos), 0)
+  var oldPos = c_ftell(file)
+  discard c_fseek(file, 0, 2) # seek the end of the file
+  result = c_ftell(file)
+  discard c_fseek(file, clong(oldPos), 0)
 
 proc endOfFile(f: File): bool =
   # do not blame me; blame the ANSI C standard this is so brain-damaged
-  var c = fgetc(f)
-  ungetc(c, f)
+  var c = c_fgetc(f)
+  discard c_ungetc(c, f)
   return c < 0'i32
 
 proc readAllFile(file: File, len: int): string =
@@ -195,7 +202,7 @@ proc readAllFile(file: File, len: int): string =
   if endOfFile(file):
     if bytes < len:
       result.setLen(bytes)
-  elif ferror(file) != 0:
+  elif c_ferror(file) != 0:
     raiseEIO("error while reading from file")
   else:
     # We read all the bytes but did not reach the EOF
@@ -234,8 +241,7 @@ when declared(stdout):
 
 # interface to the C procs:
 
-when (defined(windows) and not defined(useWinAnsi)) or defined(nimdoc):
-  include "system/widestrs"
+include "system/widestrs"
 
 when defined(windows) and not defined(useWinAnsi):
   when defined(cpp):
@@ -265,23 +271,40 @@ else:
     importc: "freopen", nodecl.}
 
 const
-  FormatOpen: array [FileMode, string] = ["rb", "wb", "w+b", "r+b", "ab"]
+  FormatOpen: array[FileMode, string] = ["rb", "wb", "w+b", "r+b", "ab"]
     #"rt", "wt", "w+t", "r+t", "at"
     # we always use binary here as for Nim the OS line ending
     # should not be translated.
 
 when defined(posix) and not defined(nimscript):
-  type
-    Mode {.importc: "mode_t", header: "<sys/types.h>".} = cint
+  when defined(linux) and defined(amd64):
+    type
+      Mode {.importc: "mode_t", header: "<sys/types.h>".} = cint
+
+      # fillers ensure correct size & offsets
+      Stat {.importc: "struct stat",
+              header: "<sys/stat.h>", final, pure.} = object ## struct stat
+        filler_1: array[24, char]
+        st_mode: Mode        ## Mode of file
+        filler_2: array[144 - 24 - 4, char]
+
+    proc S_ISDIR(m: Mode): bool =
+      ## Test for a directory.
+      (m and 0o170000) == 0o40000
+
+  else:
+    type
+      Mode {.importc: "mode_t", header: "<sys/types.h>".} = cint
 
-    Stat {.importc: "struct stat",
-             header: "<sys/stat.h>", final, pure.} = object ## struct stat
-      st_mode: Mode        ## Mode of file
+      Stat {.importc: "struct stat",
+               header: "<sys/stat.h>", final, pure.} = object ## struct stat
+        st_mode: Mode        ## Mode of file
 
-  proc S_ISDIR(m: Mode): bool {.importc, header: "<sys/stat.h>".}
-    ## Test for a directory.
+    proc S_ISDIR(m: Mode): bool {.importc, header: "<sys/stat.h>".}
+      ## Test for a directory.
 
-  proc fstat(a1: cint, a2: var Stat): cint {.importc, header: "<sys/stat.h>".}
+  proc c_fstat(a1: cint, a2: var Stat): cint {.
+    importc: "fstat", header: "<sys/stat.h>".}
 
 proc open(f: var File, filename: string,
           mode: FileMode = fmRead,
@@ -294,45 +317,38 @@ proc open(f: var File, filename: string,
       # be opened.
       var f2 = cast[File](p)
       var res: Stat
-      if fstat(getFileHandle(f2), res) >= 0'i32 and S_ISDIR(res.st_mode):
+      if c_fstat(getFileHandle(f2), res) >= 0'i32 and S_ISDIR(res.st_mode):
         close(f2)
         return false
     result = true
     f = cast[File](p)
     if bufSize > 0 and bufSize <= high(cint).int:
-      discard setvbuf(f, nil, IOFBF, bufSize.cint)
+      discard c_setvbuf(f, nil, IOFBF, bufSize.cint)
     elif bufSize == 0:
-      discard setvbuf(f, nil, IONBF, 0)
+      discard c_setvbuf(f, nil, IONBF, 0)
 
 proc reopen(f: File, filename: string, mode: FileMode = fmRead): bool =
   var p: pointer = freopen(filename, FormatOpen[mode], f)
   result = p != nil
 
-proc fdopen(filehandle: FileHandle, mode: cstring): File {.
-  importc: "fdopen", header: "<stdio.h>".}
-
 proc open(f: var File, filehandle: FileHandle, mode: FileMode): bool =
-  f = fdopen(filehandle, FormatOpen[mode])
+  f = c_fdopen(filehandle, FormatOpen[mode])
   result = f != nil
 
 proc setFilePos(f: File, pos: int64) =
-  if fseek(f, clong(pos), 0) != 0:
+  if c_fseek(f, clong(pos), 0) != 0:
     raiseEIO("cannot set file position")
 
 proc getFilePos(f: File): int64 =
-  result = ftell(f)
+  result = c_ftell(f)
   if result < 0: raiseEIO("cannot retrieve file position")
 
 proc getFileSize(f: File): int64 =
   var oldPos = getFilePos(f)
-  discard fseek(f, 0, 2) # seek the end of the file
+  discard c_fseek(f, 0, 2) # seek the end of the file
   result = getFilePos(f)
   setFilePos(f, oldPos)
 
-when not declared(close):
-  proc close(f: File) {.
-    importc: "fclose", header: "<stdio.h>", tags: [].}
-
 proc readFile(filename: string): TaintedString =
   var f: File
   if open(f, filename):
@@ -353,4 +369,12 @@ proc writeFile(filename, content: string) =
   else:
     sysFatal(IOError, "cannot open: ", filename)
 
+proc setStdIoUnbuffered() =
+  when declared(stdout):
+    discard c_setvbuf(stdout, nil, IONBF, 0)
+  when declared(stderr):
+    discard c_setvbuf(stderr, nil, IONBF, 0)
+  when declared(stdin):
+    discard c_setvbuf(stdin, nil, IONBF, 0)
+
 {.pop.}
diff --git a/lib/system/syslocks.nim b/lib/system/syslocks.nim
index 6dcdfff0d..c3e23052b 100644
--- a/lib/system/syslocks.nim
+++ b/lib/system/syslocks.nim
@@ -14,39 +14,41 @@
 when defined(Windows):
   type
     Handle = int
-    SysLock {.final, pure.} = object # CRITICAL_SECTION in WinApi
+
+    SysLock {.importc: "CRITICAL_SECTION",
+              header: "<windows.h>", final, pure.} = object # CRITICAL_SECTION in WinApi
       DebugInfo: pointer
       LockCount: int32
       RecursionCount: int32
       OwningThread: int
       LockSemaphore: int
-      Reserved: int32
+      SpinCount: int
 
     SysCond = Handle
 
   {.deprecated: [THandle: Handle, TSysLock: SysLock, TSysCond: SysCond].}
 
-  proc initSysLock(L: var SysLock) {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "InitializeCriticalSection".}
+  proc initSysLock(L: var SysLock) {.importc: "InitializeCriticalSection",
+                                     header: "<windows.h>".}
     ## Initializes the lock `L`.
 
-  proc tryAcquireSysAux(L: var SysLock): int32 {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "TryEnterCriticalSection".}
+  proc tryAcquireSysAux(L: var SysLock): int32 {.importc: "TryEnterCriticalSection",
+                                                 header: "<windows.h>".}
     ## Tries to acquire the lock `L`.
 
   proc tryAcquireSys(L: var SysLock): bool {.inline.} =
     result = tryAcquireSysAux(L) != 0'i32
 
-  proc acquireSys(L: var SysLock) {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "EnterCriticalSection".}
+  proc acquireSys(L: var SysLock) {.importc: "EnterCriticalSection",
+                                    header: "<windows.h>".}
     ## Acquires the lock `L`.
 
-  proc releaseSys(L: var SysLock) {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "LeaveCriticalSection".}
+  proc releaseSys(L: var SysLock) {.importc: "LeaveCriticalSection",
+                                    header: "<windows.h>".}
     ## Releases the lock `L`.
 
-  proc deinitSys(L: var SysLock) {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "DeleteCriticalSection".}
+  proc deinitSys(L: var SysLock) {.importc: "DeleteCriticalSection",
+                                   header: "<windows.h>".}
 
   proc createEvent(lpEventAttributes: pointer,
                    bManualReset, bInitialState: int32,
@@ -86,17 +88,16 @@ else:
                           #include <pthread.h>""".} = object
     SysLockType = distinct cint
 
-  proc SysLockType_Reentrant: SysLockType =
-    {.emit: "`result` = PTHREAD_MUTEX_RECURSIVE;".}
-
   proc initSysLock(L: var SysLock, attr: ptr SysLockAttr = nil) {.
     importc: "pthread_mutex_init", header: "<pthread.h>", noSideEffect.}
 
-  proc initSysLockAttr(a: var SysLockAttr) {.
-    importc: "pthread_mutexattr_init", header: "<pthread.h>", noSideEffect.}
-
-  proc setSysLockType(a: var SysLockAttr, t: SysLockType) {.
-    importc: "pthread_mutexattr_settype", header: "<pthread.h>", noSideEffect.}
+  when insideRLocksModule:
+    proc SysLockType_Reentrant: SysLockType =
+      {.emit: "`result` = PTHREAD_MUTEX_RECURSIVE;".}
+    proc initSysLockAttr(a: var SysLockAttr) {.
+      importc: "pthread_mutexattr_init", header: "<pthread.h>", noSideEffect.}
+    proc setSysLockType(a: var SysLockAttr, t: SysLockType) {.
+      importc: "pthread_mutexattr_settype", header: "<pthread.h>", noSideEffect.}
 
   proc acquireSys(L: var SysLock) {.noSideEffect,
     importc: "pthread_mutex_lock", header: "<pthread.h>".}
@@ -111,14 +112,14 @@ else:
   proc deinitSys(L: var SysLock) {.noSideEffect,
     importc: "pthread_mutex_destroy", header: "<pthread.h>".}
 
-  proc initSysCond(cond: var SysCond, cond_attr: pointer = nil) {.
-    importc: "pthread_cond_init", header: "<pthread.h>", noSideEffect.}
-  proc waitSysCond(cond: var SysCond, lock: var SysLock) {.
-    importc: "pthread_cond_wait", header: "<pthread.h>", noSideEffect.}
-  proc signalSysCond(cond: var SysCond) {.
-    importc: "pthread_cond_signal", header: "<pthread.h>", noSideEffect.}
-
-  proc deinitSysCond(cond: var SysCond) {.noSideEffect,
-    importc: "pthread_cond_destroy", header: "<pthread.h>".}
+  when not insideRLocksModule:
+    proc initSysCond(cond: var SysCond, cond_attr: pointer = nil) {.
+      importc: "pthread_cond_init", header: "<pthread.h>", noSideEffect.}
+    proc waitSysCond(cond: var SysCond, lock: var SysLock) {.
+      importc: "pthread_cond_wait", header: "<pthread.h>", noSideEffect.}
+    proc signalSysCond(cond: var SysCond) {.
+      importc: "pthread_cond_signal", header: "<pthread.h>", noSideEffect.}
+    proc deinitSysCond(cond: var SysCond) {.noSideEffect,
+      importc: "pthread_cond_destroy", header: "<pthread.h>".}
 
 {.pop.}
diff --git a/lib/system/sysstr.nim b/lib/system/sysstr.nim
index e2137e8f4..eb3d276e0 100644
--- a/lib/system/sysstr.nim
+++ b/lib/system/sysstr.nim
@@ -30,7 +30,7 @@ proc eqStrings(a, b: NimString): bool {.inline, compilerProc.} =
   if a == b: return true
   if a == nil or b == nil: return false
   return a.len == b.len and
-    c_memcmp(a.data, b.data, a.len) == 0'i32
+    equalMem(addr(a.data), addr(b.data), a.len)
 
 when declared(allocAtomic):
   template allocStr(size: expr): expr =
@@ -71,7 +71,7 @@ proc copyStrLast(s: NimString, start, last: int): NimString {.compilerProc.} =
   if len > 0:
     result = rawNewStringNoInit(len)
     result.len = len
-    c_memcpy(result.data, addr(s.data[start]), len)
+    copyMem(addr(result.data), addr(s.data[start]), len)
     result.data[len] = '\0'
   else:
     result = rawNewString(len)
@@ -82,10 +82,11 @@ proc copyStr(s: NimString, start: int): NimString {.compilerProc.} =
 proc toNimStr(str: cstring, len: int): NimString {.compilerProc.} =
   result = rawNewStringNoInit(len)
   result.len = len
-  c_memcpy(result.data, str, len + 1)
+  copyMem(addr(result.data), str, len + 1)
 
 proc cstrToNimstr(str: cstring): NimString {.compilerRtl.} =
-  result = toNimStr(str, c_strlen(str))
+  if str == nil: NimString(nil)
+  else: toNimStr(str, str.len)
 
 proc copyString(src: NimString): NimString {.compilerRtl.} =
   if src != nil:
@@ -94,7 +95,7 @@ proc copyString(src: NimString): NimString {.compilerRtl.} =
     else:
       result = rawNewStringNoInit(src.len)
       result.len = src.len
-      c_memcpy(result.data, src.data, src.len + 1)
+      copyMem(addr(result.data), addr(src.data), src.len + 1)
 
 proc copyStringRC1(src: NimString): NimString {.compilerRtl.} =
   if src != nil:
@@ -107,7 +108,7 @@ proc copyStringRC1(src: NimString): NimString {.compilerRtl.} =
     else:
       result = rawNewStringNoInit(src.len)
     result.len = src.len
-    c_memcpy(result.data, src.data, src.len + 1)
+    copyMem(addr(result.data), addr(src.data), src.len + 1)
 
 
 proc hashString(s: string): int {.compilerproc.} =
@@ -177,7 +178,7 @@ proc resizeString(dest: NimString, addlen: int): NimString {.compilerRtl.} =
     # DO NOT UPDATE LEN YET: dest.len = newLen
 
 proc appendString(dest, src: NimString) {.compilerproc, inline.} =
-  c_memcpy(addr(dest.data[dest.len]), src.data, src.len + 1)
+  copyMem(addr(dest.data[dest.len]), addr(src.data), src.len + 1)
   inc(dest.len, src.len)
 
 proc appendChar(dest: NimString, c: char) {.compilerproc, inline.} =
@@ -228,7 +229,8 @@ proc setLengthSeq(seq: PGenericSeq, elemSize, newLen: int): PGenericSeq {.
   elif newLen < result.len:
     # we need to decref here, otherwise the GC leaks!
     when not defined(boehmGC) and not defined(nogc) and
-         not defined(gcMarkAndSweep) and not defined(gogc):
+         not defined(gcMarkAndSweep) and not defined(gogc) and
+         not defined(gcStack):
       when false: # compileOption("gc", "v2"):
         for i in newLen..result.len-1:
           let len0 = gch.tempStack.len
@@ -300,46 +302,42 @@ proc nimFloatToStr(f: float): string {.compilerproc.} =
   else:
     result = $buf
 
-proc strtod(buf: cstring, endptr: ptr cstring): float64 {.importc,
-  header: "<stdlib.h>", noSideEffect.}
-
-var decimalPoint: char
-
-proc getDecimalPoint(): char =
-  result = decimalPoint
-  if result == '\0':
-    if strtod("0,5", nil) == 0.5: result = ','
-    else: result = '.'
-    # yes this is threadsafe in practice, spare me:
-    decimalPoint = result
+proc c_strtod(buf: cstring, endptr: ptr cstring): float64 {.
+  importc: "strtod", header: "<stdlib.h>", noSideEffect.}
 
 const
   IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
+  powtens =   [ 1e0,   1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,
+                1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
+                1e20, 1e21, 1e22]
 
 proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
                           start = 0): int {.compilerProc.} =
-  # This routine leverages `strtod()` for the non-trivial task of
-  # parsing floating point numbers correctly. Because `strtod()` is
-  # locale-dependent with respect to the radix character, we create
-  # a copy where the decimal point is replaced with the locale's
-  # radix character.
+  # This routine attempt to parse float that can parsed quickly.
+  # ie whose integer part can fit inside a 53bits integer.
+  # their real exponent must also be <= 22. If the float doesn't follow
+  # these restrictions, transform the float into this form:
+  #  INTEGER * 10 ^ exponent and leave the work to standard `strtod()`.
+  # This avoid the problems of decimal character portability.
+  # see: http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
   var
     i = start
     sign = 1.0
-    t: array[500, char] # flaviu says: 325 is the longest reasonable literal
-    ti = 0
-    hasdigits = false
-
-  template addToBuf(c) =
-    if ti < t.high:
-      t[ti] = c; inc(ti)
+    kdigits, fdigits = 0
+    exponent: int
+    integer: uint64
+    fraction: uint64
+    frac_exponent= 0
+    exp_sign = 1
+    first_digit = -1
+    has_sign = false
 
   # Sign?
   if s[i] == '+' or s[i] == '-':
+    has_sign = true
     if s[i] == '-':
       sign = -1.0
-    t[ti] = s[i]
-    inc(i); inc(ti)
+    inc(i)
 
   # NaN?
   if s[i] == 'N' or s[i] == 'n':
@@ -359,40 +357,111 @@ proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
           return i+3 - start
     return 0
 
+  if s[i] in {'0'..'9'}:
+      first_digit = (s[i].ord - '0'.ord)
   # Integer part?
   while s[i] in {'0'..'9'}:
-    hasdigits = true
-    addToBuf(s[i])
-    inc(i);
+    inc(kdigits)
+    integer = integer * 10'u64 + (s[i].ord - '0'.ord).uint64
+    inc(i)
     while s[i] == '_': inc(i)
 
   # Fractional part?
   if s[i] == '.':
-    addToBuf(getDecimalPoint())
     inc(i)
+    # if no integer part, Skip leading zeros
+    if kdigits <= 0:
+      while s[i] == '0':
+        inc(frac_exponent)
+        inc(i)
+        while s[i] == '_': inc(i)
+
+    if first_digit == -1 and s[i] in {'0'..'9'}:
+      first_digit = (s[i].ord - '0'.ord)
+    # get fractional part
     while s[i] in {'0'..'9'}:
-      hasdigits = true
-      addToBuf(s[i])
+      inc(fdigits)
+      inc(frac_exponent)
+      integer = integer * 10'u64 + (s[i].ord - '0'.ord).uint64
       inc(i)
       while s[i] == '_': inc(i)
-  if not hasdigits:
+
+  # if has no digits: return error
+  if kdigits + fdigits <= 0 and
+     (i == start or # no char consumed (empty string).
+     (i == start + 1 and has_sign)): # or only '+' or '-
     return 0
 
-  # Exponent?
   if s[i] in {'e', 'E'}:
-    addToBuf(s[i])
     inc(i)
-    if s[i] in {'+', '-'}:
-      addToBuf(s[i])
+    if s[i] == '+' or s[i] == '-':
+      if s[i] == '-':
+        exp_sign = -1
+
       inc(i)
     if s[i] notin {'0'..'9'}:
       return 0
     while s[i] in {'0'..'9'}:
-      addToBuf(s[i])
+      exponent = exponent * 10 + (ord(s[i]) - ord('0'))
       inc(i)
-      while s[i] == '_': inc(i)
-  number = strtod(t, nil)
+      while s[i] == '_': inc(i) # underscores are allowed and ignored
+
+  var real_exponent = exp_sign*exponent - frac_exponent
+  let exp_negative = real_exponent < 0
+  var abs_exponent = abs(real_exponent)
+
+  # if exponent greater than can be represented: +/- zero or infinity
+  if abs_exponent > 999:
+    if exp_negative:
+      number = 0.0*sign
+    else:
+      number = Inf*sign
+    return i - start
+
+  # if integer is representable in 53 bits:  fast path
+  # max fast path integer is  1<<53 - 1 or  8999999999999999 (16 digits)
+  if kdigits + fdigits <= 16 and first_digit <= 8:
+    # max float power of ten with set bits above the 53th bit is 10^22
+    if abs_exponent <= 22:
+      if exp_negative:
+        number = sign * integer.float / powtens[abs_exponent]
+      else:
+        number = sign * integer.float * powtens[abs_exponent]
+      return i - start
+
+    # if exponent is greater try to fit extra exponent above 22 by multiplying
+    # integer part is there is space left.
+    let slop = 15 - kdigits - fdigits
+    if  abs_exponent <= 22 + slop and not exp_negative:
+      number = sign * integer.float * powtens[slop] * powtens[abs_exponent-slop]
+      return i - start
+
+  # if failed: slow path with strtod.
+  var t: array[500, char] # flaviu says: 325 is the longest reasonable literal
+  var ti = 0
+  let maxlen = t.high - "e+000".len # reserve enough space for exponent
+
   result = i - start
+  i = start
+  # re-parse without error checking, any error should be handled by the code above.
+  while s[i] in {'0'..'9','+','-'}:
+    if ti < maxlen:
+      t[ti] = s[i]; inc(ti)
+    inc(i)
+    while s[i] in {'.', '_'}: # skip underscore and decimal point
+      inc(i)
+
+  # insert exponent
+  t[ti] = 'E'; inc(ti)
+  t[ti] = if exp_negative: '-' else: '+'; inc(ti)
+  inc(ti, 3)
+
+  # insert adjusted exponent
+  t[ti-1] = ('0'.ord + abs_exponent mod 10).char; abs_exponent = abs_exponent div 10
+  t[ti-2] = ('0'.ord + abs_exponent mod 10).char; abs_exponent = abs_exponent div 10
+  t[ti-3] = ('0'.ord + abs_exponent mod 10).char
+
+  number = c_strtod(t, nil)
 
 proc nimInt64ToStr(x: int64): string {.compilerRtl.} =
   result = newString(sizeof(x)*4)
diff --git a/lib/system/threads.nim b/lib/system/threads.nim
index bdb737e35..583e3ae86 100644
--- a/lib/system/threads.nim
+++ b/lib/system/threads.nim
@@ -24,7 +24,7 @@
 ##  import locks
 ##
 ##  var
-##    thr: array [0..4, Thread[tuple[a,b: int]]]
+##    thr: array[0..4, Thread[tuple[a,b: int]]]
 ##    L: Lock
 ##
 ##  proc threadFunc(interval: tuple[a,b: int]) {.thread.} =
@@ -51,7 +51,7 @@ const
 
 when defined(windows):
   type
-    SysThread = Handle
+    SysThread* = Handle
     WinThreadProc = proc (x: pointer): int32 {.stdcall.}
   {.deprecated: [TSysThread: SysThread, TWinThreadProc: WinThreadProc].}
 
@@ -117,16 +117,21 @@ else:
     schedh = "#define _GNU_SOURCE\n#include <sched.h>"
     pthreadh = "#define _GNU_SOURCE\n#include <pthread.h>"
 
+  when defined(linux):
+    type Time = clong
+  else:
+    type Time = int
+
   type
-    SysThread {.importc: "pthread_t", header: "<sys/types.h>",
+    SysThread* {.importc: "pthread_t", header: "<sys/types.h>",
                  final, pure.} = object
     Pthread_attr {.importc: "pthread_attr_t",
                      header: "<sys/types.h>", final, pure.} = object
 
     Timespec {.importc: "struct timespec",
                 header: "<time.h>", final, pure.} = object
-      tv_sec: int
-      tv_nsec: int
+      tv_sec: Time
+      tv_nsec: clong
   {.deprecated: [TSysThread: SysThread, Tpthread_attr: PThreadAttr,
                 Ttimespec: Timespec].}
 
@@ -193,7 +198,7 @@ when emulatedThreadVars:
 # allocations are needed. Currently less than 7K are used on a 64bit machine.
 # We use ``float`` for proper alignment:
 type
-  ThreadLocalStorage = array [0..1_000, float]
+  ThreadLocalStorage = array[0..1_000, float]
 
   PGcThread = ptr GcThread
   GcThread {.pure, inheritable.} = object
@@ -301,7 +306,7 @@ type
                                        ## a pointer as a thread ID.
 {.deprecated: [TThread: Thread, TThreadId: ThreadId].}
 
-when not defined(boehmgc) and not hasSharedHeap and not defined(gogc):
+when not defined(boehmgc) and not hasSharedHeap and not defined(gogc) and not defined(gcstack):
   proc deallocOsPages()
 
 when defined(boehmgc):
@@ -331,7 +336,7 @@ else:
 proc threadProcWrapStackFrame[TArg](thrd: ptr Thread[TArg]) =
   when defined(boehmgc):
     boehmGC_call_with_stack_base(threadProcWrapDispatch[TArg], thrd)
-  elif not defined(nogc) and not defined(gogc):
+  elif not defined(nogc) and not defined(gogc) and not defined(gcstack):
     var p {.volatile.}: proc(a: ptr Thread[TArg]) {.nimcall.} =
       threadProcWrapDispatch[TArg]
     when not hasSharedHeap:
@@ -374,6 +379,10 @@ proc running*[TArg](t: Thread[TArg]): bool {.inline.} =
   ## returns true if `t` is running.
   result = t.dataFn != nil
 
+proc handle*[TArg](t: Thread[TArg]): SysThread {.inline.} =
+  ## returns the thread handle of `t`.
+  result = t.sys
+
 when hostOS == "windows":
   proc joinThread*[TArg](t: Thread[TArg]) {.inline.} =
     ## waits for the thread `t` to finish.
diff --git a/lib/system/timers.nim b/lib/system/timers.nim
index ac8418824..129a7d092 100644
--- a/lib/system/timers.nim
+++ b/lib/system/timers.nim
@@ -61,7 +61,7 @@ elif defined(posixRealtime):
                final, pure.} = object ## struct timespec
       tv_sec: int  ## Seconds.
       tv_nsec: int ## Nanoseconds.
-  {.deprecated: [TClockid: Clickid, TTimeSpec: TimeSpec].}
+  {.deprecated: [TClockid: Clockid, TTimeSpec: TimeSpec].}
 
   var
     CLOCK_REALTIME {.importc: "CLOCK_REALTIME", header: "<time.h>".}: Clockid
@@ -78,11 +78,16 @@ elif defined(posixRealtime):
 
 else:
   # fallback Posix implementation:
+  when defined(linux):
+    type Time = clong
+  else:
+    type Time = int
+
   type
     Timeval {.importc: "struct timeval", header: "<sys/select.h>",
                final, pure.} = object ## struct timeval
-      tv_sec: int  ## Seconds.
-      tv_usec: int ## Microseconds.
+      tv_sec: Time  ## Seconds.
+      tv_usec: clong ## Microseconds.
   {.deprecated: [Ttimeval: Timeval].}
   proc posix_gettimeofday(tp: var Timeval, unused: pointer = nil) {.
     importc: "gettimeofday", header: "<sys/time.h>".}
diff --git a/lib/system/widestrs.nim b/lib/system/widestrs.nim
index 5a30a7c0f..578bebe80 100644
--- a/lib/system/widestrs.nim
+++ b/lib/system/widestrs.nim
@@ -73,11 +73,11 @@ template fastRuneAt(s: cstring, i: int, result: expr, doInc = true) =
     result = 0xFFFD
     when doInc: inc(i)
 
-iterator runes(s: cstring): int =
+iterator runes(s: cstring, L: int): int =
   var
     i = 0
     result: int
-  while s[i] != '\0':
+  while i < L:
     fastRuneAt(s, i, result, true)
     yield result
 
@@ -85,7 +85,7 @@ proc newWideCString*(source: cstring, L: int): WideCString =
   unsafeNew(result, L * 4 + 2)
   #result = cast[wideCString](alloc(L * 4 + 2))
   var d = 0
-  for ch in runes(source):
+  for ch in runes(source, L):
     if ch <=% UNI_MAX_BMP:
       if ch >=% UNI_SUR_HIGH_START and ch <=% UNI_SUR_LOW_END:
         result[d] = UNI_REPLACEMENT_CHAR
@@ -104,12 +104,7 @@ proc newWideCString*(source: cstring, L: int): WideCString =
 proc newWideCString*(s: cstring): WideCString =
   if s.isNil: return nil
 
-  when not declared(c_strlen):
-    proc c_strlen(a: cstring): int {.
-      header: "<string.h>", noSideEffect, importc: "strlen".}
-
-  let L = c_strlen(s)
-  result = newWideCString(s, L)
+  result = newWideCString(s, s.len)
 
 proc newWideCString*(s: string): WideCString =
   result = newWideCString(s, s.len)