35 files changed, 2629 insertions, 1758 deletions
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim
index fd3ced832..3ebbc8c1e 100644
--- a/lib/system/alloc.nim
+++ b/lib/system/alloc.nim
@@ -8,50 +8,109 @@
 #
 
 # Low level allocator for Nim. Has been designed to support the GC.
-# TODO: 
+# TODO:
 # - eliminate "used" field
 # - make searching for block O(1)
 {.push profiler:off.}
 
+proc roundup(x, v: int): int {.inline.} =
+  result = (x + (v-1)) and not (v-1)
+  sysAssert(result >= x, "roundup: result < x")
+  #return ((-x) and (v-1)) +% x
+
+sysAssert(roundup(14, PageSize) == PageSize, "invalid PageSize")
+sysAssert(roundup(15, 8) == 16, "roundup broken")
+sysAssert(roundup(65, 8) == 72, "roundup broken 2")
+
 # ------------ platform specific chunk allocation code -----------------------
 
 # some platforms have really weird unmap behaviour: unmap(blockStart, PageSize)
 # really frees the whole block. Happens for Linux/PowerPC for example. Amd64
 # and x86 are safe though; Windows is special because MEM_RELEASE can only be
-# used with a size of 0:
-const weirdUnmap = not (defined(amd64) or defined(i386)) or defined(windows)
+# used with a size of 0. We also allow unmapping to be turned off with
+# -d:nimAllocNoUnmap:
+const doNotUnmap = not (defined(amd64) or defined(i386)) or
+                   defined(windows) or defined(nimAllocNoUnmap)
+
 
-when defined(posix): 
+when defined(emscripten):
   const
-    PROT_READ  = 1             # page can be read 
-    PROT_WRITE = 2             # page can be written 
-    MAP_PRIVATE = 2'i32        # Changes are private 
-  
+    PROT_READ  = 1             # page can be read
+    PROT_WRITE = 2             # page can be written
+    MAP_PRIVATE = 2'i32        # Changes are private
+
+  var MAP_ANONYMOUS {.importc: "MAP_ANONYMOUS", header: "<sys/mman.h>".}: cint
+  type 
+    PEmscriptenMMapBlock = ptr EmscriptenMMapBlock
+    EmscriptenMMapBlock {.pure, inheritable.} = object
+      realSize: int        # size of previous chunk; for coalescing
+      realPointer: pointer     # if < PageSize it is a small chunk
+
+  proc mmap(adr: pointer, len: int, prot, flags, fildes: cint,
+            off: int): pointer {.header: "<sys/mman.h>".}
+
+  proc munmap(adr: pointer, len: int) {.header: "<sys/mman.h>".}
+
+  proc osAllocPages(block_size: int): pointer {.inline.} =
+    let realSize = block_size + sizeof(EmscriptenMMapBlock) + PageSize + 1
+    result = mmap(nil, realSize, PROT_READ or PROT_WRITE,
+                             MAP_PRIVATE or MAP_ANONYMOUS, -1, 0)
+    if result == nil or result == cast[pointer](-1):
+      raiseOutOfMem()
+
+    let realPointer = result
+    let pos = cast[int](result)
+
+    # Convert pointer to PageSize correct one.
+    var new_pos = cast[ByteAddress](pos) +% (PageSize - (pos %% PageSize))
+    if (new_pos-pos)< sizeof(EmscriptenMMapBlock):
+      new_pos = new_pos +% PageSize
+    result = cast[pointer](new_pos)
+
+    var mmapDescrPos = cast[ByteAddress](result) -% sizeof(EmscriptenMMapBlock)
+
+    var mmapDescr = cast[EmscriptenMMapBlock](mmapDescrPos)
+    mmapDescr.realSize = realSize
+    mmapDescr.realPointer = realPointer
+
+    c_fprintf(c_stdout, "[Alloc] size %d %d realSize:%d realPos:%d\n", block_size, cast[int](result), realSize, cast[int](realPointer))
+
+  proc osDeallocPages(p: pointer, size: int) {.inline} =
+    var mmapDescrPos = cast[ByteAddress](p) -% sizeof(EmscriptenMMapBlock)
+    var mmapDescr = cast[EmscriptenMMapBlock](mmapDescrPos)
+    munmap(mmapDescr.realPointer, mmapDescr.realSize)
+
+elif defined(posix):
+  const
+    PROT_READ  = 1             # page can be read
+    PROT_WRITE = 2             # page can be written
+    MAP_PRIVATE = 2'i32        # Changes are private
+
   when defined(macosx) or defined(bsd):
     const MAP_ANONYMOUS = 0x1000
-  elif defined(solaris): 
+  elif defined(solaris):
     const MAP_ANONYMOUS = 0x100
   else:
     var
       MAP_ANONYMOUS {.importc: "MAP_ANONYMOUS", header: "<sys/mman.h>".}: cint
-    
+
   proc mmap(adr: pointer, len: int, prot, flags, fildes: cint,
             off: int): pointer {.header: "<sys/mman.h>".}
 
   proc munmap(adr: pointer, len: int) {.header: "<sys/mman.h>".}
-  
-  proc osAllocPages(size: int): pointer {.inline.} = 
-    result = mmap(nil, size, PROT_READ or PROT_WRITE, 
+
+  proc osAllocPages(size: int): pointer {.inline.} =
+    result = mmap(nil, size, PROT_READ or PROT_WRITE,
                              MAP_PRIVATE or MAP_ANONYMOUS, -1, 0)
     if result == nil or result == cast[pointer](-1):
       raiseOutOfMem()
-      
+
   proc osDeallocPages(p: pointer, size: int) {.inline} =
     when reallyOsDealloc: munmap(p, size)
-  
-elif defined(windows): 
+
+elif defined(windows):
   const
-    MEM_RESERVE = 0x2000 
+    MEM_RESERVE = 0x2000
     MEM_COMMIT = 0x1000
     MEM_TOP_DOWN = 0x100000
     PAGE_READWRITE = 0x04
@@ -62,12 +121,12 @@ elif defined(windows):
   proc virtualAlloc(lpAddress: pointer, dwSize: int, flAllocationType,
                     flProtect: int32): pointer {.
                     header: "<windows.h>", stdcall, importc: "VirtualAlloc".}
-  
-  proc virtualFree(lpAddress: pointer, dwSize: int, 
+
+  proc virtualFree(lpAddress: pointer, dwSize: int,
                    dwFreeType: int32) {.header: "<windows.h>", stdcall,
                    importc: "VirtualFree".}
-  
-  proc osAllocPages(size: int): pointer {.inline.} = 
+
+  proc osAllocPages(size: int): pointer {.inline.} =
     result = virtualAlloc(nil, size, MEM_RESERVE or MEM_COMMIT,
                           PAGE_READWRITE)
     if result == nil: raiseOutOfMem()
@@ -82,7 +141,22 @@ elif defined(windows):
     when reallyOsDealloc: virtualFree(p, 0, MEM_RELEASE)
     #VirtualFree(p, size, MEM_DECOMMIT)
 
-else: 
+elif hostOS == "standalone":
+  var
+    theHeap: array[1024*PageSize, float64] # 'float64' for alignment
+    bumpPointer = cast[int](addr theHeap)
+
+  proc osAllocPages(size: int): pointer {.inline.} =
+    if size+bumpPointer < cast[int](addr theHeap) + sizeof(theHeap):
+      result = cast[pointer](bumpPointer)
+      inc bumpPointer, size
+    else:
+      raiseOutOfMem()
+
+  proc osDeallocPages(p: pointer, size: int) {.inline.} =
+    if bumpPointer-size == cast[int](p):
+      dec bumpPointer, size
+else:
   {.error: "Port memory manager to your platform".}
 
 # --------------------- end of non-portable code -----------------------------
@@ -97,90 +171,85 @@ const
   InitialMemoryRequest = ChunkOsReturn div 2 # < ChunkOsReturn!
   SmallChunkSize = PageSize
 
-type 
-  PTrunk = ptr TTrunk
-  TTrunk {.final.} = object 
+type
+  PTrunk = ptr Trunk
+  Trunk {.final.} = object
     next: PTrunk         # all nodes are connected with this pointer
     key: int             # start address at bit 0
     bits: array[0..IntsPerTrunk-1, int] # a bit vector
-  
-  TTrunkBuckets = array[0..255, PTrunk]
-  TIntSet {.final.} = object 
-    data: TTrunkBuckets
-  
+
+  TrunkBuckets = array[0..255, PTrunk]
+  IntSet {.final.} = object
+    data: TrunkBuckets
+{.deprecated: [TIntSet: IntSet, TTrunk: Trunk, TTrunkBuckets: TrunkBuckets].}
+
 type
-  TAlignType = BiggestFloat
-  TFreeCell {.final, pure.} = object
-    next: ptr TFreeCell  # next free cell in chunk (overlaid with refcount)
+  AlignType = BiggestFloat
+  FreeCell {.final, pure.} = object
+    next: ptr FreeCell  # next free cell in chunk (overlaid with refcount)
     zeroField: int       # 0 means cell is not used (overlaid with typ field)
                          # 1 means cell is manually managed pointer
                          # otherwise a PNimType is stored in there
 
-  PChunk = ptr TBaseChunk
-  PBigChunk = ptr TBigChunk
-  PSmallChunk = ptr TSmallChunk
-  TBaseChunk {.pure, inheritable.} = object
+  PChunk = ptr BaseChunk
+  PBigChunk = ptr BigChunk
+  PSmallChunk = ptr SmallChunk
+  BaseChunk {.pure, inheritable.} = object
     prevSize: int        # size of previous chunk; for coalescing
     size: int            # if < PageSize it is a small chunk
     used: bool           # later will be optimized into prevSize...
-  
-  TSmallChunk = object of TBaseChunk
+
+  SmallChunk = object of BaseChunk
     next, prev: PSmallChunk  # chunks of the same size
-    freeList: ptr TFreeCell
-    free: int            # how many bytes remain    
+    freeList: ptr FreeCell
+    free: int            # how many bytes remain
     acc: int             # accumulator for small object allocation
-    data: TAlignType     # start of usable memory
-  
-  TBigChunk = object of TBaseChunk # not necessarily > PageSize!
+    data: AlignType      # start of usable memory
+
+  BigChunk = object of BaseChunk # not necessarily > PageSize!
     next, prev: PBigChunk    # chunks of the same (or bigger) size
     align: int
-    data: TAlignType     # start of usable memory
-
-template smallChunkOverhead(): expr = sizeof(TSmallChunk)-sizeof(TAlignType)
-template bigChunkOverhead(): expr = sizeof(TBigChunk)-sizeof(TAlignType)
+    data: AlignType      # start of usable memory
+{.deprecated: [TAlignType: AlignType, TFreeCell: FreeCell, TBaseChunk: BaseChunk,
+              TBigChunk: BigChunk, TSmallChunk: SmallChunk].}
 
-proc roundup(x, v: int): int {.inline.} = 
-  result = (x + (v-1)) and not (v-1)
-  sysAssert(result >= x, "roundup: result < x")
-  #return ((-x) and (v-1)) +% x
-
-sysAssert(roundup(14, PageSize) == PageSize, "invalid PageSize")
-sysAssert(roundup(15, 8) == 16, "roundup broken")
-sysAssert(roundup(65, 8) == 72, "roundup broken 2")
+template smallChunkOverhead(): expr = sizeof(SmallChunk)-sizeof(AlignType)
+template bigChunkOverhead(): expr = sizeof(BigChunk)-sizeof(AlignType)
 
 # ------------- chunk table ---------------------------------------------------
 # We use a PtrSet of chunk starts and a table[Page, chunksize] for chunk
 # endings of big chunks. This is needed by the merging operation. The only
 # remaining operation is best-fit for big chunks. Since there is a size-limit
 # for big chunks (because greater than the limit means they are returned back
-# to the OS), a fixed size array can be used. 
+# to the OS), a fixed size array can be used.
 
 type
-  PLLChunk = ptr TLLChunk
-  TLLChunk {.pure.} = object ## *low-level* chunk
+  PLLChunk = ptr LLChunk
+  LLChunk {.pure.} = object ## *low-level* chunk
     size: int                # remaining size
     acc: int                 # accumulator
     next: PLLChunk           # next low-level chunk; only needed for dealloc
 
-  PAvlNode = ptr TAvlNode
-  TAvlNode {.pure, final.} = object 
-    link: array[0..1, PAvlNode] # Left (0) and right (1) links 
+  PAvlNode = ptr AvlNode
+  AvlNode {.pure, final.} = object
+    link: array[0..1, PAvlNode] # Left (0) and right (1) links
     key, upperBound: int
     level: int
-    
-  TMemRegion {.final, pure.} = object
+
+  MemRegion {.final, pure.} = object
     minLargeObj, maxLargeObj: int
     freeSmallChunks: array[0..SmallChunkSize div MemAlign-1, PSmallChunk]
     llmem: PLLChunk
     currMem, maxMem, freeMem: int # memory sizes (allocated from OS)
-    lastSize: int # needed for the case that OS gives us pages linearly 
+    lastSize: int # needed for the case that OS gives us pages linearly
     freeChunksList: PBigChunk # XXX make this a datastructure with O(1) access
-    chunkStarts: TIntSet
+    chunkStarts: IntSet
     root, deleted, last, freeAvlNodes: PAvlNode
-  
+{.deprecated: [TLLChunk: LLChunk, TAvlNode: AvlNode, TMemRegion: MemRegion].}
+
 # shared:
 var
-  bottomData: TAvlNode
+  bottomData: AvlNode
   bottom: PAvlNode
 
 {.push stack_trace: off.}
@@ -191,44 +260,44 @@ proc initAllocator() =
     bottom.link[1] = bottom
 {.pop.}
 
-proc incCurrMem(a: var TMemRegion, bytes: int) {.inline.} = 
+proc incCurrMem(a: var MemRegion, bytes: int) {.inline.} =
   inc(a.currMem, bytes)
 
-proc decCurrMem(a: var TMemRegion, bytes: int) {.inline.} =
+proc decCurrMem(a: var MemRegion, bytes: int) {.inline.} =
   a.maxMem = max(a.maxMem, a.currMem)
   dec(a.currMem, bytes)
 
-proc getMaxMem(a: var TMemRegion): int =
-  # Since we update maxPagesCount only when freeing pages, 
+proc getMaxMem(a: var MemRegion): int =
+  # Since we update maxPagesCount only when freeing pages,
   # maxPagesCount may not be up to date. Thus we use the
   # maximum of these both values here:
   result = max(a.currMem, a.maxMem)
-  
-proc llAlloc(a: var TMemRegion, size: int): pointer =
+
+proc llAlloc(a: var MemRegion, size: int): pointer =
   # *low-level* alloc for the memory managers data structures. Deallocation
-  # is done at he end of the allocator's life time.
+  # is done at the end of the allocator's life time.
   if a.llmem == nil or size > a.llmem.size:
-    # the requested size is ``roundup(size+sizeof(TLLChunk), PageSize)``, but
+    # the requested size is ``roundup(size+sizeof(LLChunk), PageSize)``, but
     # since we know ``size`` is a (small) constant, we know the requested size
     # is one page:
-    sysAssert roundup(size+sizeof(TLLChunk), PageSize) == PageSize, "roundup 6"
+    sysAssert roundup(size+sizeof(LLChunk), PageSize) == PageSize, "roundup 6"
     var old = a.llmem # can be nil and is correct with nil
     a.llmem = cast[PLLChunk](osAllocPages(PageSize))
     incCurrMem(a, PageSize)
-    a.llmem.size = PageSize - sizeof(TLLChunk)
-    a.llmem.acc = sizeof(TLLChunk)
+    a.llmem.size = PageSize - sizeof(LLChunk)
+    a.llmem.acc = sizeof(LLChunk)
     a.llmem.next = old
   result = cast[pointer](cast[ByteAddress](a.llmem) + a.llmem.acc)
   dec(a.llmem.size, size)
   inc(a.llmem.acc, size)
   zeroMem(result, size)
 
-proc allocAvlNode(a: var TMemRegion, key, upperBound: int): PAvlNode =
+proc allocAvlNode(a: var MemRegion, key, upperBound: int): PAvlNode =
   if a.freeAvlNodes != nil:
     result = a.freeAvlNodes
     a.freeAvlNodes = a.freeAvlNodes.link[0]
   else:
-    result = cast[PAvlNode](llAlloc(a, sizeof(TAvlNode)))
+    result = cast[PAvlNode](llAlloc(a, sizeof(AvlNode)))
   result.key = key
   result.upperBound = upperBound
   result.link[0] = bottom
@@ -238,28 +307,28 @@ proc allocAvlNode(a: var TMemRegion, key, upperBound: int): PAvlNode =
   sysAssert(bottom.link[0] == bottom, "bottom link[0]")
   sysAssert(bottom.link[1] == bottom, "bottom link[1]")
 
-proc deallocAvlNode(a: var TMemRegion, n: PAvlNode) {.inline.} =
+proc deallocAvlNode(a: var MemRegion, n: PAvlNode) {.inline.} =
   n.link[0] = a.freeAvlNodes
   a.freeAvlNodes = n
 
 include "system/avltree"
 
-proc llDeallocAll(a: var TMemRegion) =
+proc llDeallocAll(a: var MemRegion) =
   var it = a.llmem
   while it != nil:
     # we know each block in the list has the size of 1 page:
     var next = it.next
     osDeallocPages(it, PageSize)
     it = next
-  
-proc intSetGet(t: TIntSet, key: int): PTrunk = 
+
+proc intSetGet(t: IntSet, key: int): PTrunk =
   var it = t.data[key and high(t.data)]
-  while it != nil: 
+  while it != nil:
     if it.key == key: return it
     it = it.next
   result = nil
 
-proc intSetPut(a: var TMemRegion, t: var TIntSet, key: int): PTrunk = 
+proc intSetPut(a: var MemRegion, t: var IntSet, key: int): PTrunk =
   result = intSetGet(t, key)
   if result == nil:
     result = cast[PTrunk](llAlloc(a, sizeof(result[])))
@@ -267,27 +336,27 @@ proc intSetPut(a: var TMemRegion, t: var TIntSet, key: int): PTrunk =
     t.data[key and high(t.data)] = result
     result.key = key
 
-proc contains(s: TIntSet, key: int): bool = 
+proc contains(s: IntSet, key: int): bool =
   var t = intSetGet(s, key shr TrunkShift)
-  if t != nil: 
+  if t != nil:
     var u = key and TrunkMask
     result = (t.bits[u shr IntShift] and (1 shl (u and IntMask))) != 0
-  else: 
+  else:
     result = false
-  
-proc incl(a: var TMemRegion, s: var TIntSet, key: int) = 
+
+proc incl(a: var MemRegion, s: var IntSet, key: int) =
   var t = intSetPut(a, s, key shr TrunkShift)
   var u = key and TrunkMask
   t.bits[u shr IntShift] = t.bits[u shr IntShift] or (1 shl (u and IntMask))
 
-proc excl(s: var TIntSet, key: int) = 
+proc excl(s: var IntSet, key: int) =
   var t = intSetGet(s, key shr TrunkShift)
   if t != nil:
     var u = key and TrunkMask
     t.bits[u shr IntShift] = t.bits[u shr IntShift] and not
         (1 shl (u and IntMask))
 
-iterator elements(t: TIntSet): int {.inline.} =
+iterator elements(t: IntSet): int {.inline.} =
   # while traversing it is forbidden to change the set!
   for h in 0..high(t.data):
     var r = t.data[h]
@@ -304,14 +373,14 @@ iterator elements(t: TIntSet): int {.inline.} =
           w = w shr 1
         inc(i)
       r = r.next
-  
-proc isSmallChunk(c: PChunk): bool {.inline.} = 
+
+proc isSmallChunk(c: PChunk): bool {.inline.} =
   return c.size <= SmallChunkSize-smallChunkOverhead()
-  
-proc chunkUnused(c: PChunk): bool {.inline.} = 
+
+proc chunkUnused(c: PChunk): bool {.inline.} =
   result = not c.used
 
-iterator allObjects(m: TMemRegion): pointer {.inline.} =
+iterator allObjects(m: MemRegion): pointer {.inline.} =
   for s in elements(m.chunkStarts):
     # we need to check here again as it could have been modified:
     if s in m.chunkStarts:
@@ -319,7 +388,7 @@ iterator allObjects(m: TMemRegion): pointer {.inline.} =
       if not chunkUnused(c):
         if isSmallChunk(c):
           var c = cast[PSmallChunk](c)
-          
+
           let size = c.size
           var a = cast[ByteAddress](addr(c.data))
           let limit = a + c.acc
@@ -331,20 +400,20 @@ iterator allObjects(m: TMemRegion): pointer {.inline.} =
           yield addr(c.data)
 
 proc isCell(p: pointer): bool {.inline.} =
-  result = cast[ptr TFreeCell](p).zeroField >% 1
+  result = cast[ptr FreeCell](p).zeroField >% 1
 
 # ------------- chunk management ----------------------------------------------
-proc pageIndex(c: PChunk): int {.inline.} = 
+proc pageIndex(c: PChunk): int {.inline.} =
   result = cast[ByteAddress](c) shr PageShift
 
-proc pageIndex(p: pointer): int {.inline.} = 
+proc pageIndex(p: pointer): int {.inline.} =
   result = cast[ByteAddress](p) shr PageShift
 
-proc pageAddr(p: pointer): PChunk {.inline.} = 
+proc pageAddr(p: pointer): PChunk {.inline.} =
   result = cast[PChunk](cast[ByteAddress](p) and not PageMask)
   #sysAssert(Contains(allocator.chunkStarts, pageIndex(result)))
 
-proc requestOsChunks(a: var TMemRegion, size: int): PBigChunk = 
+proc requestOsChunks(a: var MemRegion, size: int): PBigChunk =
   incCurrMem(a, size)
   inc(a.freeMem, size)
   result = cast[PBigChunk](osAllocPages(size))
@@ -373,7 +442,7 @@ proc requestOsChunks(a: var TMemRegion, size: int): PBigChunk =
     result.prevSize = 0 # unknown
   a.lastSize = size # for next request
 
-proc freeOsChunks(a: var TMemRegion, p: pointer, size: int) = 
+proc freeOsChunks(a: var MemRegion, p: pointer, size: int) =
   # update next.prevSize:
   var c = cast[PChunk](p)
   var nxt = cast[ByteAddress](p) +% c.size
@@ -387,36 +456,36 @@ proc freeOsChunks(a: var TMemRegion, p: pointer, size: int) =
   dec(a.freeMem, size)
   #c_fprintf(c_stdout, "[Alloc] back to OS: %ld\n", size)
 
-proc isAccessible(a: TMemRegion, p: pointer): bool {.inline.} = 
+proc isAccessible(a: MemRegion, p: pointer): bool {.inline.} =
   result = contains(a.chunkStarts, pageIndex(p))
 
-proc contains[T](list, x: T): bool = 
+proc contains[T](list, x: T): bool =
   var it = list
   while it != nil:
     if it == x: return true
     it = it.next
-    
-proc writeFreeList(a: TMemRegion) =
+
+proc writeFreeList(a: MemRegion) =
   var it = a.freeChunksList
   c_fprintf(c_stdout, "freeChunksList: %p\n", it)
-  while it != nil: 
-    c_fprintf(c_stdout, "it: %p, next: %p, prev: %p\n", 
+  while it != nil:
+    c_fprintf(c_stdout, "it: %p, next: %p, prev: %p\n",
               it, it.next, it.prev)
     it = it.next
 
-proc listAdd[T](head: var T, c: T) {.inline.} = 
+proc listAdd[T](head: var T, c: T) {.inline.} =
   sysAssert(c notin head, "listAdd 1")
   sysAssert c.prev == nil, "listAdd 2"
   sysAssert c.next == nil, "listAdd 3"
   c.next = head
-  if head != nil: 
+  if head != nil:
     sysAssert head.prev == nil, "listAdd 4"
     head.prev = c
   head = c
 
 proc listRemove[T](head: var T, c: T) {.inline.} =
   sysAssert(c in head, "listRemove")
-  if c == head: 
+  if c == head:
     head = c.next
     sysAssert c.prev == nil, "listRemove 2"
     if head != nil: head.prev = nil
@@ -426,15 +495,15 @@ proc listRemove[T](head: var T, c: T) {.inline.} =
     if c.next != nil: c.next.prev = c.prev
   c.next = nil
   c.prev = nil
-  
-proc updatePrevSize(a: var TMemRegion, c: PBigChunk, 
-                    prevSize: int) {.inline.} = 
+
+proc updatePrevSize(a: var MemRegion, c: PBigChunk,
+                    prevSize: int) {.inline.} =
   var ri = cast[PChunk](cast[ByteAddress](c) +% c.size)
   sysAssert((cast[ByteAddress](ri) and PageMask) == 0, "updatePrevSize")
   if isAccessible(a, ri):
     ri.prevSize = prevSize
-  
-proc freeBigChunk(a: var TMemRegion, c: PBigChunk) = 
+
+proc freeBigChunk(a: var MemRegion, c: PBigChunk) =
   var c = c
   sysAssert(c.size >= PageSize, "freeBigChunk")
   inc(a.freeMem, c.size)
@@ -448,7 +517,7 @@ proc freeBigChunk(a: var TMemRegion, c: PBigChunk) =
         inc(c.size, ri.size)
         excl(a.chunkStarts, pageIndex(ri))
   when coalescLeft:
-    if c.prevSize != 0: 
+    if c.prevSize != 0:
       var le = cast[PChunk](cast[ByteAddress](c) -% c.prevSize)
       sysAssert((cast[ByteAddress](le) and PageMask) == 0, "freeBigChunk 4")
       if isAccessible(a, le) and chunkUnused(le):
@@ -459,7 +528,7 @@ proc freeBigChunk(a: var TMemRegion, c: PBigChunk) =
           excl(a.chunkStarts, pageIndex(c))
           c = cast[PBigChunk](le)
 
-  if c.size < ChunkOsReturn or weirdUnmap:
+  if c.size < ChunkOsReturn or doNotUnmap:
     incl(a, a.chunkStarts, pageIndex(c))
     updatePrevSize(a, c, c.size)
     listAdd(a.freeChunksList, c)
@@ -467,7 +536,7 @@ proc freeBigChunk(a: var TMemRegion, c: PBigChunk) =
   else:
     freeOsChunks(a, c, c.size)
 
-proc splitChunk(a: var TMemRegion, c: PBigChunk, size: int) = 
+proc splitChunk(a: var MemRegion, c: PBigChunk, size: int) =
   var rest = cast[PBigChunk](cast[ByteAddress](c) +% size)
   sysAssert(rest notin a.freeChunksList, "splitChunk")
   rest.size = c.size - size
@@ -480,7 +549,7 @@ proc splitChunk(a: var TMemRegion, c: PBigChunk, size: int) =
   incl(a, a.chunkStarts, pageIndex(rest))
   listAdd(a.freeChunksList, rest)
 
-proc getBigChunk(a: var TMemRegion, size: int): PBigChunk = 
+proc getBigChunk(a: var MemRegion, size: int): PBigChunk =
   # use first fit for now:
   sysAssert((size and PageMask) == 0, "getBigChunk 1")
   sysAssert(size > 0, "getBigChunk 2")
@@ -488,7 +557,7 @@ proc getBigChunk(a: var TMemRegion, size: int): PBigChunk =
   block search:
     while result != nil:
       sysAssert chunkUnused(result), "getBigChunk 3"
-      if result.size == size: 
+      if result.size == size:
         listRemove(a.freeChunksList, result)
         break search
       elif result.size > size:
@@ -497,7 +566,7 @@ proc getBigChunk(a: var TMemRegion, size: int): PBigChunk =
         break search
       result = result.next
       sysAssert result != a.freeChunksList, "getBigChunk 4"
-    if size < InitialMemoryRequest: 
+    if size < InitialMemoryRequest:
       result = requestOsChunks(a, InitialMemoryRequest)
       splitChunk(a, result, size)
     else:
@@ -507,29 +576,29 @@ proc getBigChunk(a: var TMemRegion, size: int): PBigChunk =
   incl(a, a.chunkStarts, pageIndex(result))
   dec(a.freeMem, size)
 
-proc getSmallChunk(a: var TMemRegion): PSmallChunk = 
+proc getSmallChunk(a: var MemRegion): PSmallChunk =
   var res = getBigChunk(a, PageSize)
   sysAssert res.prev == nil, "getSmallChunk 1"
   sysAssert res.next == nil, "getSmallChunk 2"
   result = cast[PSmallChunk](res)
 
 # -----------------------------------------------------------------------------
-proc isAllocatedPtr(a: TMemRegion, p: pointer): bool {.benign.}
+proc isAllocatedPtr(a: MemRegion, p: pointer): bool {.benign.}
 
-proc allocInv(a: TMemRegion): bool =
+proc allocInv(a: MemRegion): bool =
   ## checks some (not all yet) invariants of the allocator's data structures.
   for s in low(a.freeSmallChunks)..high(a.freeSmallChunks):
     var c = a.freeSmallChunks[s]
     while c != nil:
-      if c.next == c: 
+      if c.next == c:
         echo "[SYSASSERT] c.next == c"
         return false
-      if c.size != s * MemAlign: 
+      if c.size != s * MemAlign:
         echo "[SYSASSERT] c.size != s * MemAlign"
         return false
       var it = c.freeList
       while it != nil:
-        if it.zeroField != 0: 
+        if it.zeroField != 0:
           echo "[SYSASSERT] it.zeroField != 0"
           c_printf("%ld %p\n", it.zeroField, it)
           return false
@@ -537,18 +606,18 @@ proc allocInv(a: TMemRegion): bool =
       c = c.next
   result = true
 
-proc rawAlloc(a: var TMemRegion, requestedSize: int): pointer =
+proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
   sysAssert(allocInv(a), "rawAlloc: begin")
-  sysAssert(roundup(65, 8) == 72, "rawAlloc 1")
-  sysAssert requestedSize >= sizeof(TFreeCell), "rawAlloc 2"
+  sysAssert(roundup(65, 8) == 72, "rawAlloc: roundup broken")
+  sysAssert(requestedSize >= sizeof(FreeCell), "rawAlloc: requested size too small")
   var size = roundup(requestedSize, MemAlign)
   sysAssert(size >= requestedSize, "insufficient allocated size!")
   #c_fprintf(c_stdout, "alloc; size: %ld; %ld\n", requestedSize, size)
-  if size <= SmallChunkSize-smallChunkOverhead(): 
+  if size <= SmallChunkSize-smallChunkOverhead():
     # allocate a small block: for small chunks, we use only its next pointer
     var s = size div MemAlign
     var c = a.freeSmallChunks[s]
-    if c == nil: 
+    if c == nil:
       c = getSmallChunk(a)
       c.freeList = nil
       sysAssert c.size == PageSize, "rawAlloc 3"
@@ -567,7 +636,7 @@ proc rawAlloc(a: var TMemRegion, requestedSize: int): pointer =
       #  c_fprintf(c_stdout, "csize: %lld; size %lld\n", c.size, size)
       sysAssert c.size == size, "rawAlloc 6"
       if c.freeList == nil:
-        sysAssert(c.acc + smallChunkOverhead() + size <= SmallChunkSize, 
+        sysAssert(c.acc + smallChunkOverhead() + size <= SmallChunkSize,
                   "rawAlloc 7")
         result = cast[pointer](cast[ByteAddress](addr(c.data)) +% c.acc)
         inc(c.acc, size)
@@ -601,11 +670,11 @@ proc rawAlloc(a: var TMemRegion, requestedSize: int): pointer =
   sysAssert(allocInv(a), "rawAlloc: end")
   when logAlloc: cprintf("rawAlloc: %ld %p\n", requestedSize, result)
 
-proc rawAlloc0(a: var TMemRegion, requestedSize: int): pointer =
+proc rawAlloc0(a: var MemRegion, requestedSize: int): pointer =
   result = rawAlloc(a, requestedSize)
   zeroMem(result, requestedSize)
 
-proc rawDealloc(a: var TMemRegion, p: pointer) =
+proc rawDealloc(a: var MemRegion, p: pointer) =
   #sysAssert(isAllocatedPtr(a, p), "rawDealloc: no allocated pointer")
   sysAssert(allocInv(a), "rawDealloc: begin")
   var c = pageAddr(p)
@@ -615,16 +684,16 @@ proc rawDealloc(a: var TMemRegion, p: pointer) =
     var s = c.size
     sysAssert(((cast[ByteAddress](p) and PageMask) - smallChunkOverhead()) %%
                s == 0, "rawDealloc 3")
-    var f = cast[ptr TFreeCell](p)
+    var f = cast[ptr FreeCell](p)
     #echo("setting to nil: ", $cast[TAddress](addr(f.zeroField)))
     sysAssert(f.zeroField != 0, "rawDealloc 1")
     f.zeroField = 0
     f.next = c.freeList
     c.freeList = f
-    when overwriteFree: 
+    when overwriteFree:
       # set to 0xff to check for usage after free bugs:
-      c_memset(cast[pointer](cast[int](p) +% sizeof(TFreeCell)), -1'i32, 
-               s -% sizeof(TFreeCell))
+      c_memset(cast[pointer](cast[int](p) +% sizeof(FreeCell)), -1'i32,
+               s -% sizeof(FreeCell))
     # check if it is not in the freeSmallChunks[s] list:
     if c.free < s:
       # add it to the freeSmallChunks[s] array:
@@ -649,36 +718,36 @@ proc rawDealloc(a: var TMemRegion, p: pointer) =
   sysAssert(allocInv(a), "rawDealloc: end")
   when logAlloc: cprintf("rawDealloc: %p\n", p)
 
-proc isAllocatedPtr(a: TMemRegion, p: pointer): bool = 
+proc isAllocatedPtr(a: MemRegion, p: pointer): bool =
   if isAccessible(a, p):
     var c = pageAddr(p)
     if not chunkUnused(c):
       if isSmallChunk(c):
         var c = cast[PSmallChunk](c)
-        var offset = (cast[ByteAddress](p) and (PageSize-1)) -% 
+        var offset = (cast[ByteAddress](p) and (PageSize-1)) -%
                      smallChunkOverhead()
         result = (c.acc >% offset) and (offset %% c.size == 0) and
-          (cast[ptr TFreeCell](p).zeroField >% 1)
+          (cast[ptr FreeCell](p).zeroField >% 1)
       else:
         var c = cast[PBigChunk](c)
-        result = p == addr(c.data) and cast[ptr TFreeCell](p).zeroField >% 1
+        result = p == addr(c.data) and cast[ptr FreeCell](p).zeroField >% 1
 
-proc prepareForInteriorPointerChecking(a: var TMemRegion) {.inline.} =
+proc prepareForInteriorPointerChecking(a: var MemRegion) {.inline.} =
   a.minLargeObj = lowGauge(a.root)
   a.maxLargeObj = highGauge(a.root)
 
-proc interiorAllocatedPtr(a: TMemRegion, p: pointer): pointer =
+proc interiorAllocatedPtr(a: MemRegion, p: pointer): pointer =
   if isAccessible(a, p):
     var c = pageAddr(p)
     if not chunkUnused(c):
       if isSmallChunk(c):
         var c = cast[PSmallChunk](c)
-        var offset = (cast[ByteAddress](p) and (PageSize-1)) -% 
+        var offset = (cast[ByteAddress](p) and (PageSize-1)) -%
                      smallChunkOverhead()
         if c.acc >% offset:
           sysAssert(cast[ByteAddress](addr(c.data)) +% offset ==
                     cast[ByteAddress](p), "offset is not what you think it is")
-          var d = cast[ptr TFreeCell](cast[ByteAddress](addr(c.data)) +% 
+          var d = cast[ptr FreeCell](cast[ByteAddress](addr(c.data)) +%
                     offset -% (offset %% c.size))
           if d.zeroField >% 1:
             result = d
@@ -686,7 +755,7 @@ proc interiorAllocatedPtr(a: TMemRegion, p: pointer): pointer =
       else:
         var c = cast[PBigChunk](c)
         var d = addr(c.data)
-        if p >= d and cast[ptr TFreeCell](d).zeroField >% 1:
+        if p >= d and cast[ptr FreeCell](d).zeroField >% 1:
           result = d
           sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
   else:
@@ -699,38 +768,38 @@ proc interiorAllocatedPtr(a: TMemRegion, p: pointer): pointer =
         var k = cast[pointer](avlNode.key)
         var c = cast[PBigChunk](pageAddr(k))
         sysAssert(addr(c.data) == k, " k is not the same as addr(c.data)!")
-        if cast[ptr TFreeCell](k).zeroField >% 1:
+        if cast[ptr FreeCell](k).zeroField >% 1:
           result = k
           sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
 
 proc ptrSize(p: pointer): int =
-  var x = cast[pointer](cast[ByteAddress](p) -% sizeof(TFreeCell))
+  var x = cast[pointer](cast[ByteAddress](p) -% sizeof(FreeCell))
   var c = pageAddr(p)
   sysAssert(not chunkUnused(c), "ptrSize")
-  result = c.size -% sizeof(TFreeCell)
+  result = c.size -% sizeof(FreeCell)
   if not isSmallChunk(c):
     dec result, bigChunkOverhead()
 
-proc alloc(allocator: var TMemRegion, size: int): pointer =
-  result = rawAlloc(allocator, size+sizeof(TFreeCell))
-  cast[ptr TFreeCell](result).zeroField = 1 # mark it as used
+proc alloc(allocator: var MemRegion, size: Natural): pointer =
+  result = rawAlloc(allocator, size+sizeof(FreeCell))
+  cast[ptr FreeCell](result).zeroField = 1 # mark it as used
   sysAssert(not isAllocatedPtr(allocator, result), "alloc")
-  result = cast[pointer](cast[ByteAddress](result) +% sizeof(TFreeCell))
+  result = cast[pointer](cast[ByteAddress](result) +% sizeof(FreeCell))
 
-proc alloc0(allocator: var TMemRegion, size: int): pointer =
+proc alloc0(allocator: var MemRegion, size: Natural): pointer =
   result = alloc(allocator, size)
   zeroMem(result, size)
 
-proc dealloc(allocator: var TMemRegion, p: pointer) =
+proc dealloc(allocator: var MemRegion, p: pointer) =
   sysAssert(p != nil, "dealloc 0")
-  var x = cast[pointer](cast[ByteAddress](p) -% sizeof(TFreeCell))
+  var x = cast[pointer](cast[ByteAddress](p) -% sizeof(FreeCell))
   sysAssert(x != nil, "dealloc 1")
   sysAssert(isAccessible(allocator, x), "is not accessible")
-  sysAssert(cast[ptr TFreeCell](x).zeroField == 1, "dealloc 2")
+  sysAssert(cast[ptr FreeCell](x).zeroField == 1, "dealloc 2")
   rawDealloc(allocator, x)
   sysAssert(not isAllocatedPtr(allocator, x), "dealloc 3")
 
-proc realloc(allocator: var TMemRegion, p: pointer, newsize: int): pointer =
+proc realloc(allocator: var MemRegion, p: pointer, newsize: Natural): pointer =
   if newsize > 0:
     result = alloc0(allocator, newsize)
     if p != nil:
@@ -739,11 +808,11 @@ proc realloc(allocator: var TMemRegion, p: pointer, newsize: int): pointer =
   elif p != nil:
     dealloc(allocator, p)
 
-proc deallocOsPages(a: var TMemRegion) =
+proc deallocOsPages(a: var MemRegion) =
   # we free every 'ordinarily' allocated page by iterating over the page bits:
   for p in elements(a.chunkStarts):
     var page = cast[PChunk](p shl PageShift)
-    when not weirdUnmap:
+    when not doNotUnmap:
       var size = if page.size < PageSize: PageSize else: page.size
       osDeallocPages(page, size)
     else:
@@ -756,9 +825,9 @@ proc deallocOsPages(a: var TMemRegion) =
   # And then we free the pages that are in use for the page bits:
   llDeallocAll(a)
 
-proc getFreeMem(a: TMemRegion): int {.inline.} = result = a.freeMem
-proc getTotalMem(a: TMemRegion): int {.inline.} = result = a.currMem
-proc getOccupiedMem(a: TMemRegion): int {.inline.} = 
+proc getFreeMem(a: MemRegion): int {.inline.} = result = a.freeMem
+proc getTotalMem(a: MemRegion): int {.inline.} = result = a.currMem
+proc getOccupiedMem(a: MemRegion): int {.inline.} =
   result = a.currMem - a.freeMem
 
 # ---------------------- thread memory region -------------------------------
@@ -769,21 +838,21 @@ template instantiateForRegion(allocator: expr) =
       result = interiorAllocatedPtr(allocator, p)
 
     proc isAllocatedPtr*(p: pointer): bool =
-      let p = cast[pointer](cast[ByteAddress](p)-%ByteAddress(sizeof(TCell)))
+      let p = cast[pointer](cast[ByteAddress](p)-%ByteAddress(sizeof(Cell)))
       result = isAllocatedPtr(allocator, p)
 
   proc deallocOsPages = deallocOsPages(allocator)
 
-  proc alloc(size: int): pointer =
+  proc alloc(size: Natural): pointer =
     result = alloc(allocator, size)
 
-  proc alloc0(size: int): pointer =
+  proc alloc0(size: Natural): pointer =
     result = alloc0(allocator, size)
 
   proc dealloc(p: pointer) =
     dealloc(allocator, p)
 
-  proc realloc(p: pointer, newsize: int): pointer =
+  proc realloc(p: pointer, newsize: Natural): pointer =
     result = realloc(allocator, p, newSize)
 
   when false:
@@ -794,7 +863,7 @@ template instantiateForRegion(allocator: expr) =
         inc(result, it.size)
         it = it.next
 
-  proc getFreeMem(): int = 
+  proc getFreeMem(): int =
     result = allocator.freeMem
     #sysAssert(result == countFreeMem())
 
@@ -803,11 +872,11 @@ template instantiateForRegion(allocator: expr) =
 
   # -------------------- shared heap region ----------------------------------
   when hasThreadSupport:
-    var sharedHeap: TMemRegion
-    var heapLock: TSysLock
+    var sharedHeap: MemRegion
+    var heapLock: SysLock
     initSysLock(heapLock)
 
-  proc allocShared(size: int): pointer =
+  proc allocShared(size: Natural): pointer =
     when hasThreadSupport:
       acquireSys(heapLock)
       result = alloc(sharedHeap, size)
@@ -815,20 +884,20 @@ template instantiateForRegion(allocator: expr) =
     else:
       result = alloc(size)
 
-  proc allocShared0(size: int): pointer =
+  proc allocShared0(size: Natural): pointer =
     result = allocShared(size)
     zeroMem(result, size)
 
   proc deallocShared(p: pointer) =
-    when hasThreadSupport: 
+    when hasThreadSupport:
       acquireSys(heapLock)
       dealloc(sharedHeap, p)
       releaseSys(heapLock)
     else:
       dealloc(p)
 
-  proc reallocShared(p: pointer, newsize: int): pointer =
-    when hasThreadSupport: 
+  proc reallocShared(p: pointer, newsize: Natural): pointer =
+    when hasThreadSupport:
       acquireSys(heapLock)
       result = realloc(sharedHeap, p, newsize)
       releaseSys(heapLock)
diff --git a/lib/system/ansi_c.nim b/lib/system/ansi_c.nim
index 9406f26c9..702559034 100644
--- a/lib/system/ansi_c.nim
+++ b/lib/system/ansi_c.nim
@@ -13,20 +13,20 @@
 
 {.push hints:off}
 
-proc c_strcmp(a, b: cstring): cint {.header: "<string.h>", 
+proc c_strcmp(a, b: cstring): cint {.header: "<string.h>",
   noSideEffect, importc: "strcmp".}
-proc c_memcmp(a, b: cstring, size: int): cint {.header: "<string.h>", 
+proc c_memcmp(a, b: cstring, size: int): cint {.header: "<string.h>",
   noSideEffect, importc: "memcmp".}
 proc c_memcpy(a, b: cstring, size: int) {.header: "<string.h>", importc: "memcpy".}
-proc c_strlen(a: cstring): int {.header: "<string.h>", 
+proc c_strlen(a: cstring): int {.header: "<string.h>",
   noSideEffect, importc: "strlen".}
 proc c_memset(p: pointer, value: cint, size: int) {.
   header: "<string.h>", importc: "memset".}
 
 type
-  C_TextFile {.importc: "FILE", header: "<stdio.h>", 
+  C_TextFile {.importc: "FILE", header: "<stdio.h>",
                final, incompleteStruct.} = object
-  C_BinaryFile {.importc: "FILE", header: "<stdio.h>", 
+  C_BinaryFile {.importc: "FILE", header: "<stdio.h>",
                  final, incompleteStruct.} = object
   C_TextFileStar = ptr C_TextFile
   C_BinaryFileStar = ptr C_BinaryFile
@@ -101,15 +101,15 @@ proc c_signal(sig: cint, handler: proc (a: cint) {.noconv.}) {.
   importc: "signal", header: "<signal.h>".}
 proc c_raise(sig: cint) {.importc: "raise", header: "<signal.h>".}
 
-proc c_fputs(c: cstring, f: C_TextFileStar) {.importc: "fputs", 
+proc c_fputs(c: cstring, f: C_TextFileStar) {.importc: "fputs",
   header: "<stdio.h>".}
 proc c_fgets(c: cstring, n: int, f: C_TextFileStar): cstring  {.
   importc: "fgets", header: "<stdio.h>".}
-proc c_fgetc(stream: C_TextFileStar): int {.importc: "fgetc", 
+proc c_fgetc(stream: C_TextFileStar): int {.importc: "fgetc",
   header: "<stdio.h>".}
-proc c_ungetc(c: int, f: C_TextFileStar) {.importc: "ungetc", 
+proc c_ungetc(c: int, f: C_TextFileStar) {.importc: "ungetc",
   header: "<stdio.h>".}
-proc c_putc(c: char, stream: C_TextFileStar) {.importc: "putc", 
+proc c_putc(c: char, stream: C_TextFileStar) {.importc: "putc",
   header: "<stdio.h>".}
 proc c_fprintf(f: C_TextFileStar, frmt: cstring) {.
   importc: "fprintf", header: "<stdio.h>", varargs.}
@@ -120,7 +120,7 @@ proc c_fopen(filename, mode: cstring): C_TextFileStar {.
   importc: "fopen", header: "<stdio.h>".}
 proc c_fclose(f: C_TextFileStar) {.importc: "fclose", header: "<stdio.h>".}
 
-proc c_sprintf(buf, frmt: cstring): cint {.header: "<stdio.h>", 
+proc c_sprintf(buf, frmt: cstring): cint {.header: "<stdio.h>",
   importc: "sprintf", varargs, noSideEffect.}
   # we use it only in a way that cannot lead to security issues
 
@@ -149,7 +149,7 @@ when hostOS != "standalone":
   when not declared(errno):
     when defined(NimrodVM):
       var vmErrnoWrapper {.importc.}: ptr cint
-      template errno: expr = 
+      template errno: expr =
         bind vmErrnoWrapper
         vmErrnoWrapper[]
     else:
diff --git a/lib/system/arithm.nim b/lib/system/arithm.nim
index f68e2dcd9..69c558799 100644
--- a/lib/system/arithm.nim
+++ b/lib/system/arithm.nim
@@ -10,24 +10,121 @@
 
 # simple integer arithmetic with overflow checking
 
-proc raiseOverflow {.compilerproc, noinline, noreturn.} =
+proc raiseOverflow {.compilerproc, noinline.} =
   # a single proc to reduce code size to a minimum
   sysFatal(OverflowError, "over- or underflow")
 
-proc raiseDivByZero {.compilerproc, noinline, noreturn.} =
+proc raiseDivByZero {.compilerproc, noinline.} =
   sysFatal(DivByZeroError, "division by zero")
 
-proc addInt64(a, b: int64): int64 {.compilerProc, inline.} =
-  result = a +% b
-  if (result xor a) >= int64(0) or (result xor b) >= int64(0):
-    return result
-  raiseOverflow()
+when defined(builtinOverflow):
+  # Builtin compiler functions for improved performance
+  when sizeof(clong) == 8:
+    proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
+      importc: "__builtin_saddl_overflow", nodecl, nosideeffect.}
 
-proc subInt64(a, b: int64): int64 {.compilerProc, inline.} =
-  result = a -% b
-  if (result xor a) >= int64(0) or (result xor not b) >= int64(0):
-    return result
-  raiseOverflow()
+    proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
+      importc: "__builtin_ssubl_overflow", nodecl, nosideeffect.}
+
+    proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
+      importc: "__builtin_smull_overflow", nodecl, nosideeffect.}
+
+  elif sizeof(clonglong) == 8:
+    proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
+      importc: "__builtin_saddll_overflow", nodecl, nosideeffect.}
+
+    proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
+      importc: "__builtin_ssubll_overflow", nodecl, nosideeffect.}
+
+    proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
+      importc: "__builtin_smulll_overflow", nodecl, nosideeffect.}
+
+  when sizeof(int) == 8:
+    proc addIntOverflow(a, b: int, c: var int): bool {.inline.} =
+      addInt64Overflow(a, b, c)
+
+    proc subIntOverflow(a, b: int, c: var int): bool {.inline.} =
+      subInt64Overflow(a, b, c)
+
+    proc mulIntOverflow(a, b: int, c: var int): bool {.inline.} =
+      mulInt64Overflow(a, b, c)
+
+  elif sizeof(int) == 4 and sizeof(cint) == 4:
+    proc addIntOverflow(a, b: int, c: var int): bool {.
+      importc: "__builtin_sadd_overflow", nodecl, nosideeffect.}
+
+    proc subIntOverflow(a, b: int, c: var int): bool {.
+      importc: "__builtin_ssub_overflow", nodecl, nosideeffect.}
+
+    proc mulIntOverflow(a, b: int, c: var int): bool {.
+      importc: "__builtin_smul_overflow", nodecl, nosideeffect.}
+
+  proc addInt64(a, b: int64): int64 {.compilerProc, inline.} =
+    if addInt64Overflow(a, b, result):
+      raiseOverflow()
+
+  proc subInt64(a, b: int64): int64 {.compilerProc, inline.} =
+    if subInt64Overflow(a, b, result):
+      raiseOverflow()
+
+  proc mulInt64(a, b: int64): int64 {.compilerproc, inline.} =
+    if mulInt64Overflow(a, b, result):
+      raiseOverflow()
+else:
+  proc addInt64(a, b: int64): int64 {.compilerProc, inline.} =
+    result = a +% b
+    if (result xor a) >= int64(0) or (result xor b) >= int64(0):
+      return result
+    raiseOverflow()
+
+  proc subInt64(a, b: int64): int64 {.compilerProc, inline.} =
+    result = a -% b
+    if (result xor a) >= int64(0) or (result xor not b) >= int64(0):
+      return result
+    raiseOverflow()
+
+  #
+  # This code has been inspired by Python's source code.
+  # The native int product x*y is either exactly right or *way* off, being
+  # just the last n bits of the true product, where n is the number of bits
+  # in an int (the delivered product is the true product plus i*2**n for
+  # some integer i).
+  #
+  # The native float64 product x*y is subject to three
+  # rounding errors: on a sizeof(int)==8 box, each cast to double can lose
+  # info, and even on a sizeof(int)==4 box, the multiplication can lose info.
+  # But, unlike the native int product, it's not in *range* trouble:  even
+  # if sizeof(int)==32 (256-bit ints), the product easily fits in the
+  # dynamic range of a float64. So the leading 50 (or so) bits of the float64
+  # product are correct.
+  #
+  # We check these two ways against each other, and declare victory if they're
+  # approximately the same. Else, because the native int product is the only
+  # one that can lose catastrophic amounts of information, it's the native int
+  # product that must have overflowed.
+  #
+  proc mulInt64(a, b: int64): int64 {.compilerproc.} =
+    var
+      resAsFloat, floatProd: float64
+    result = a *% b
+    floatProd = toBiggestFloat(a) # conversion
+    floatProd = floatProd * toBiggestFloat(b)
+    resAsFloat = toBiggestFloat(result)
+
+    # Fast path for normal case: small multiplicands, and no info
+    # is lost in either method.
+    if resAsFloat == floatProd: return result
+
+    # Somebody somewhere lost info. Close enough, or way off? Note
+    # that a != 0 and b != 0 (else resAsFloat == floatProd == 0).
+    # The difference either is or isn't significant compared to the
+    # true value (of which floatProd is a good approximation).
+
+    # abs(diff)/abs(prod) <= 1/32 iff
+    #   32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough"
+    if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
+      return result
+    raiseOverflow()
 
 proc negInt64(a: int64): int64 {.compilerProc, inline.} =
   if a != low(int64): return -a
@@ -51,50 +148,6 @@ proc modInt64(a, b: int64): int64 {.compilerProc, inline.} =
     raiseDivByZero()
   return a mod b
 
-#
-# This code has been inspired by Python's source code.
-# The native int product x*y is either exactly right or *way* off, being
-# just the last n bits of the true product, where n is the number of bits
-# in an int (the delivered product is the true product plus i*2**n for
-# some integer i).
-#
-# The native float64 product x*y is subject to three
-# rounding errors: on a sizeof(int)==8 box, each cast to double can lose
-# info, and even on a sizeof(int)==4 box, the multiplication can lose info.
-# But, unlike the native int product, it's not in *range* trouble:  even
-# if sizeof(int)==32 (256-bit ints), the product easily fits in the
-# dynamic range of a float64. So the leading 50 (or so) bits of the float64
-# product are correct.
-#
-# We check these two ways against each other, and declare victory if they're
-# approximately the same. Else, because the native int product is the only
-# one that can lose catastrophic amounts of information, it's the native int
-# product that must have overflowed.
-#
-proc mulInt64(a, b: int64): int64 {.compilerproc.} =
-  var
-    resAsFloat, floatProd: float64
-  result = a *% b
-  floatProd = toBiggestFloat(a) # conversion
-  floatProd = floatProd * toBiggestFloat(b)
-  resAsFloat = toBiggestFloat(result)
-
-  # Fast path for normal case: small multiplicands, and no info
-  # is lost in either method.
-  if resAsFloat == floatProd: return result
-
-  # Somebody somewhere lost info. Close enough, or way off? Note
-  # that a != 0 and b != 0 (else resAsFloat == floatProd == 0).
-  # The difference either is or isn't significant compared to the
-  # true value (of which floatProd is a good approximation).
-
-  # abs(diff)/abs(prod) <= 1/32 iff
-  #   32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough"
-  if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
-    return result
-  raiseOverflow()
-
-
 proc absInt(a: int): int {.compilerProc, inline.} =
   if a != low(int):
     if a >= 0: return a
@@ -246,6 +299,21 @@ elif false: # asmVersion and (defined(gcc) or defined(llvm_gcc)):
             :"%edx"
     """
 
+when not declared(addInt) and defined(builtinOverflow):
+  proc addInt(a, b: int): int {.compilerProc, inline.} =
+    if addIntOverflow(a, b, result):
+      raiseOverflow()
+
+when not declared(subInt) and defined(builtinOverflow):
+  proc subInt(a, b: int): int {.compilerProc, inline.} =
+    if subIntOverflow(a, b, result):
+      raiseOverflow()
+
+when not declared(mulInt) and defined(builtinOverflow):
+  proc mulInt(a, b: int): int {.compilerProc, inline.} =
+    if mulIntOverflow(a, b, result):
+      raiseOverflow()
+
 # Platform independent versions of the above (slower!)
 when not declared(addInt):
   proc addInt(a, b: int): int {.compilerProc, inline.} =
@@ -327,13 +395,13 @@ when not declared(mulInt):
 # We avoid setting the FPU control word here for compatibility with libraries
 # written in other languages.
 
-proc raiseFloatInvalidOp {.noinline, noreturn.} =
+proc raiseFloatInvalidOp {.noinline.} =
   sysFatal(FloatInvalidOpError, "FPU operation caused a NaN result")
 
 proc nanCheck(x: float64) {.compilerProc, inline.} =
   if x != x: raiseFloatInvalidOp()
 
-proc raiseFloatOverflow(x: float64) {.noinline, noreturn.} =
+proc raiseFloatOverflow(x: float64) {.noinline.} =
   if x > 0.0:
     sysFatal(FloatOverflowError, "FPU operation caused an overflow")
   else:
diff --git a/lib/system/assign.nim b/lib/system/assign.nim
index 78995954f..55d7572e2 100644
--- a/lib/system/assign.nim
+++ b/lib/system/assign.nim
@@ -17,7 +17,7 @@ proc genericAssignAux(dest, src: pointer, n: ptr TNimNode,
     s = cast[ByteAddress](src)
   case n.kind
   of nkSlot:
-    genericAssignAux(cast[pointer](d +% n.offset), 
+    genericAssignAux(cast[pointer](d +% n.offset),
                      cast[pointer](s +% n.offset), n.typ, shallow)
   of nkList:
     for i in 0..n.len-1:
@@ -54,7 +54,7 @@ proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) =
       unsureAsgnRef(x, copyString(cast[NimString](s2)))
   of tySequence:
     var s2 = cast[PPointer](src)[]
-    var seq = cast[PGenericSeq](s2)      
+    var seq = cast[PGenericSeq](s2)
     var x = cast[PPointer](dest)
     if s2 == nil or shallow or (seq.reserved and seqShallowFlag) != 0:
       # this can happen! nil sequences are allowed
@@ -100,7 +100,7 @@ proc genericShallowAssign(dest, src: pointer, mt: PNimType) {.compilerProc.} =
 
 when false:
   proc debugNimType(t: PNimType) =
-    if t.isNil: 
+    if t.isNil:
       cprintf("nil!")
       return
     var k: cstring
@@ -170,21 +170,13 @@ proc objectInit(dest: pointer, typ: PNimType) =
     for i in 0..(typ.size div typ.base.size)-1:
       objectInit(cast[pointer](d +% i * typ.base.size), typ.base)
   else: discard # nothing to do
-  
+
 # ---------------------- assign zero -----------------------------------------
 
-when not defined(nimmixin):
-  proc destroy(x: int) = discard
-  proc nimDestroyRange*[T](r: T) =
-    # internal proc used for destroying sequences and arrays
-    for i in countup(0, r.len - 1): destroy(r[i])
-else:
-  # XXX Why is this exported and no compilerproc? -> compilerprocs cannot be
-  # generic for now
-  proc nimDestroyRange*[T](r: T) =
-    # internal proc used for destroying sequences and arrays
-    mixin destroy
-    for i in countup(0, r.len - 1): destroy(r[i])
+proc nimDestroyRange[T](r: T) {.compilerProc.} =
+  # internal proc used for destroying sequences and arrays
+  mixin `=destroy`
+  for i in countup(0, r.len - 1): `=destroy`(r[i])
 
 proc genericReset(dest: pointer, mt: PNimType) {.compilerProc, benign.}
 proc genericResetAux(dest: pointer, n: ptr TNimNode) =
@@ -198,7 +190,7 @@ proc genericResetAux(dest: pointer, n: ptr TNimNode) =
     var m = selectBranch(dest, n)
     if m != nil: genericResetAux(dest, m)
     zeroMem(cast[pointer](d +% n.offset), n.typ.size)
-  
+
 proc genericReset(dest: pointer, mt: PNimType) =
   var d = cast[ByteAddress](dest)
   sysAssert(mt != nil, "genericReset 2")
@@ -218,15 +210,15 @@ proc genericReset(dest: pointer, mt: PNimType) =
   else:
     zeroMem(dest, mt.size) # set raw bits to zero
 
-proc selectBranch(discVal, L: int, 
+proc selectBranch(discVal, L: int,
                   a: ptr array [0..0x7fff, ptr TNimNode]): ptr TNimNode =
   result = a[L] # a[L] contains the ``else`` part (but may be nil)
   if discVal <% L:
     var x = a[discVal]
     if x != nil: result = x
-  
-proc FieldDiscriminantCheck(oldDiscVal, newDiscVal: int, 
-                            a: ptr array [0..0x7fff, ptr TNimNode], 
+
+proc FieldDiscriminantCheck(oldDiscVal, newDiscVal: int,
+                            a: ptr array [0..0x7fff, ptr TNimNode],
                             L: int) {.compilerProc.} =
   var oldBranch = selectBranch(oldDiscVal, L, a)
   var newBranch = selectBranch(newDiscVal, L, a)
diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim
index 300fa85f3..158fe91bc 100644
--- a/lib/system/atomics.nim
+++ b/lib/system/atomics.nim
@@ -37,39 +37,40 @@ when someGcc and hasThreadSupport:
     ## and release stores in all threads.
 
   type
-    TAtomType* = SomeNumber|pointer|ptr|char|bool
+    AtomType* = SomeNumber|pointer|ptr|char|bool
       ## Type Class representing valid types for use with atomic procs
+  {.deprecated: [TAtomType: AtomType].}
 
-  proc atomicLoadN*[T: TAtomType](p: ptr T, mem: AtomMemModel): T {.
+  proc atomicLoadN*[T: AtomType](p: ptr T, mem: AtomMemModel): T {.
     importc: "__atomic_load_n", nodecl.}
     ## This proc implements an atomic load operation. It returns the contents at p.
     ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_ACQUIRE, ATOMIC_CONSUME.
 
-  proc atomicLoad*[T: TAtomType](p, ret: ptr T, mem: AtomMemModel) {.
+  proc atomicLoad*[T: AtomType](p, ret: ptr T, mem: AtomMemModel) {.
     importc: "__atomic_load", nodecl.}
     ## This is the generic version of an atomic load. It returns the contents at p in ret.
 
-  proc atomicStoreN*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel) {.
+  proc atomicStoreN*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel) {.
     importc: "__atomic_store_n", nodecl.}
     ## This proc implements an atomic store operation. It writes val at p.
     ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, and ATOMIC_RELEASE.
 
-  proc atomicStore*[T: TAtomType](p, val: ptr T, mem: AtomMemModel) {.
+  proc atomicStore*[T: AtomType](p, val: ptr T, mem: AtomMemModel) {.
     importc: "__atomic_store", nodecl.}
     ## This is the generic version of an atomic store. It stores the value of val at p
 
-  proc atomicExchangeN*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
+  proc atomicExchangeN*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_exchange_n", nodecl.}
     ## This proc implements an atomic exchange operation. It writes val at p,
     ## and returns the previous contents at p.
     ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_ACQUIRE, ATOMIC_RELEASE, ATOMIC_ACQ_REL
 
-  proc atomicExchange*[T: TAtomType](p, val, ret: ptr T, mem: AtomMemModel) {.
+  proc atomicExchange*[T: AtomType](p, val, ret: ptr T, mem: AtomMemModel) {.
     importc: "__atomic_exchange", nodecl.}
     ## This is the generic version of an atomic exchange. It stores the contents at val at p.
     ## The original value at p is copied into ret.
 
-  proc atomicCompareExchangeN*[T: TAtomType](p, expected: ptr T, desired: T,
+  proc atomicCompareExchangeN*[T: AtomType](p, expected: ptr T, desired: T,
     weak: bool, success_memmodel: AtomMemModel, failure_memmodel: AtomMemModel): bool {.
     importc: "__atomic_compare_exchange_n ", nodecl.}
     ## This proc implements an atomic compare and exchange operation. This compares the
@@ -85,39 +86,39 @@ when someGcc and hasThreadSupport:
     ## cannot be __ATOMIC_RELEASE nor __ATOMIC_ACQ_REL. It also cannot be a stronger model
     ## than that specified by success_memmodel.
 
-  proc atomicCompareExchange*[T: TAtomType](p, expected, desired: ptr T,
+  proc atomicCompareExchange*[T: AtomType](p, expected, desired: ptr T,
     weak: bool, success_memmodel: AtomMemModel, failure_memmodel: AtomMemModel): bool {.
-    importc: "__atomic_compare_exchange_n ", nodecl.}
+    importc: "__atomic_compare_exchange", nodecl.}
     ## This proc implements the generic version of atomic_compare_exchange.
     ## The proc is virtually identical to atomic_compare_exchange_n, except the desired
     ## value is also a pointer.
 
   ## Perform the operation return the new value, all memory models are valid
-  proc atomicAddFetch*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
+  proc atomicAddFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_add_fetch", nodecl.}
-  proc atomicSubFetch*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
+  proc atomicSubFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_sub_fetch", nodecl.}
-  proc atomicOrFetch*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
+  proc atomicOrFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_or_fetch ", nodecl.}
-  proc atomicAndFetch*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
+  proc atomicAndFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_and_fetch", nodecl.}
-  proc atomicXorFetch*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
+  proc atomicXorFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_xor_fetch", nodecl.}
-  proc atomicNandFetch*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
+  proc atomicNandFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_nand_fetch ", nodecl.}
 
   ## Perform the operation return the old value, all memory models are valid
-  proc atomicFetchAdd*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
+  proc atomicFetchAdd*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_fetch_add", nodecl.}
-  proc atomicFetchSub*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
+  proc atomicFetchSub*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_fetch_sub", nodecl.}
-  proc atomicFetchOr*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
+  proc atomicFetchOr*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_fetch_or", nodecl.}
-  proc atomicFetchAnd*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
+  proc atomicFetchAnd*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_fetch_and", nodecl.}
-  proc atomicFetchXor*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
+  proc atomicFetchXor*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_fetch_xor", nodecl.}
-  proc atomicFetchNand*[T: TAtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
+  proc atomicFetchNand*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
     importc: "__atomic_fetch_nand", nodecl.}
 
   proc atomicTestAndSet*(p: pointer, mem: AtomMemModel): bool {.
@@ -190,11 +191,11 @@ proc atomicDec*(memLoc: var int, x: int = 1): int =
     result = memLoc
 
 when defined(windows) and not someGcc:
-  proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32
+  proc interlockedCompareExchange(p: pointer; exchange, comparand: int): int
     {.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.}
 
   proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
-    interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0
+    interlockedCompareExchange(p, cast[int](newValue), cast[int](oldValue)) != 0
   # XXX fix for 64 bit build
 else:
   # this is valid for GCC and Intel C++
diff --git a/lib/system/avltree.nim b/lib/system/avltree.nim
index 292097062..d5c901542 100644
--- a/lib/system/avltree.nim
+++ b/lib/system/avltree.nim
@@ -16,7 +16,7 @@ proc lowGauge(n: PAvlNode): int =
   while not isBottom(it):
     result = it.key
     it = it.link[0]
-  
+
 proc highGauge(n: PAvlNode): int =
   result = -1
   var it = n
@@ -24,7 +24,7 @@ proc highGauge(n: PAvlNode): int =
     result = it.upperBound
     it = it.link[1]
 
-proc find(root: PAvlNode, key: int): PAvlNode = 
+proc find(root: PAvlNode, key: int): PAvlNode =
   var it = root
   while not isBottom(it):
     if it.key == key: return it
@@ -51,7 +51,7 @@ proc split(t: var PAvlNode) =
     t.link[0] = temp
     inc t.level
 
-proc add(a: var TMemRegion, t: var PAvlNode, key, upperBound: int) {.benign.} =
+proc add(a: var MemRegion, t: var PAvlNode, key, upperBound: int) {.benign.} =
   if t == bottom:
     t = allocAvlNode(a, key, upperBound)
   else:
@@ -64,7 +64,7 @@ proc add(a: var TMemRegion, t: var PAvlNode, key, upperBound: int) {.benign.} =
     skew(t)
     split(t)
 
-proc del(a: var TMemRegion, t: var PAvlNode, x: int) {.benign.} =
+proc del(a: var MemRegion, t: var PAvlNode, x: int) {.benign.} =
   if t == bottom: return
   a.last = t
   if x <% t.key:
diff --git a/lib/system/cellsets.nim b/lib/system/cellsets.nim
index 0e3a01eba..bb5de6f42 100644
--- a/lib/system/cellsets.nim
+++ b/lib/system/cellsets.nim
@@ -10,10 +10,10 @@
 # Efficient set of pointers for the GC (and repr)
 
 type
-  TRefCount = int
+  RefCount = int
 
-  TCell {.pure.} = object
-    refcount: TRefCount  # the refcount and some flags
+  Cell {.pure.} = object
+    refcount: RefCount  # the refcount and some flags
     typ: PNimType
     when trackAllocationSource:
       filename: cstring
@@ -21,34 +21,35 @@ type
     when useCellIds:
       id: int
 
-  PCell = ptr TCell
+  PCell = ptr Cell
 
-  PPageDesc = ptr TPageDesc
-  TBitIndex = range[0..UnitsPerPage-1]
-  TPageDesc {.final, pure.} = object
+  PPageDesc = ptr PageDesc
+  BitIndex = range[0..UnitsPerPage-1]
+  PageDesc {.final, pure.} = object
     next: PPageDesc # all nodes are connected with this pointer
     key: ByteAddress   # start address at bit 0
-    bits: array[TBitIndex, int] # a bit vector
+    bits: array[BitIndex, int] # a bit vector
 
   PPageDescArray = ptr array[0..1000_000, PPageDesc]
-  TCellSet {.final, pure.} = object
+  CellSet {.final, pure.} = object
     counter, max: int
     head: PPageDesc
     data: PPageDescArray
 
   PCellArray = ptr array[0..100_000_000, PCell]
-  TCellSeq {.final, pure.} = object
+  CellSeq {.final, pure.} = object
     len, cap: int
     d: PCellArray
-
+{.deprecated: [TCell: Cell, TBitIndex: BitIndex, TPageDesc: PageDesc,
+              TRefCount: RefCount, TCellSet: CellSet, TCellSeq: CellSeq].}
 # ------------------- cell seq handling ---------------------------------------
 
-proc contains(s: TCellSeq, c: PCell): bool {.inline.} =
+proc contains(s: CellSeq, c: PCell): bool {.inline.} =
   for i in 0 .. s.len-1:
     if s.d[i] == c: return true
   return false
 
-proc add(s: var TCellSeq, c: PCell) {.inline.} =
+proc add(s: var CellSeq, c: PCell) {.inline.} =
   if s.len >= s.cap:
     s.cap = s.cap * 3 div 2
     var d = cast[PCellArray](alloc(s.cap * sizeof(PCell)))
@@ -59,12 +60,12 @@ proc add(s: var TCellSeq, c: PCell) {.inline.} =
   s.d[s.len] = c
   inc(s.len)
 
-proc init(s: var TCellSeq, cap: int = 1024) =
+proc init(s: var CellSeq, cap: int = 1024) =
   s.len = 0
   s.cap = cap
   s.d = cast[PCellArray](alloc0(cap * sizeof(PCell)))
 
-proc deinit(s: var TCellSeq) = 
+proc deinit(s: var CellSeq) =
   dealloc(s.d)
   s.d = nil
   s.len = 0
@@ -75,13 +76,13 @@ proc deinit(s: var TCellSeq) =
 const
   InitCellSetSize = 1024 # must be a power of two!
 
-proc init(s: var TCellSet) =
+proc init(s: var CellSet) =
   s.data = cast[PPageDescArray](alloc0(InitCellSetSize * sizeof(PPageDesc)))
   s.max = InitCellSetSize-1
   s.counter = 0
   s.head = nil
 
-proc deinit(s: var TCellSet) =
+proc deinit(s: var CellSet) =
   var it = s.head
   while it != nil:
     var n = it.next
@@ -97,15 +98,15 @@ proc nextTry(h, maxHash: int): int {.inline.} =
   # For any initial h in range(maxHash), repeating that maxHash times
   # generates each int in range(maxHash) exactly once (see any text on
   # random-number generation for proof).
-  
-proc cellSetGet(t: TCellSet, key: ByteAddress): PPageDesc =
+
+proc cellSetGet(t: CellSet, key: ByteAddress): PPageDesc =
   var h = cast[int](key) and t.max
   while t.data[h] != nil:
     if t.data[h].key == key: return t.data[h]
     h = nextTry(h, t.max)
   return nil
 
-proc cellSetRawInsert(t: TCellSet, data: PPageDescArray, desc: PPageDesc) =
+proc cellSetRawInsert(t: CellSet, data: PPageDescArray, desc: PPageDesc) =
   var h = cast[int](desc.key) and t.max
   while data[h] != nil:
     sysAssert(data[h] != desc, "CellSetRawInsert 1")
@@ -113,7 +114,7 @@ proc cellSetRawInsert(t: TCellSet, data: PPageDescArray, desc: PPageDesc) =
   sysAssert(data[h] == nil, "CellSetRawInsert 2")
   data[h] = desc
 
-proc cellSetEnlarge(t: var TCellSet) =
+proc cellSetEnlarge(t: var CellSet) =
   var oldMax = t.max
   t.max = ((t.max+1)*2)-1
   var n = cast[PPageDescArray](alloc0((t.max + 1) * sizeof(PPageDesc)))
@@ -123,7 +124,7 @@ proc cellSetEnlarge(t: var TCellSet) =
   dealloc(t.data)
   t.data = n
 
-proc cellSetPut(t: var TCellSet, key: ByteAddress): PPageDesc =
+proc cellSetPut(t: var CellSet, key: ByteAddress): PPageDesc =
   var h = cast[int](key) and t.max
   while true:
     var x = t.data[h]
@@ -138,7 +139,7 @@ proc cellSetPut(t: var TCellSet, key: ByteAddress): PPageDesc =
   while t.data[h] != nil: h = nextTry(h, t.max)
   sysAssert(t.data[h] == nil, "CellSetPut")
   # the new page descriptor goes into result
-  result = cast[PPageDesc](alloc0(sizeof(TPageDesc)))
+  result = cast[PPageDesc](alloc0(sizeof(PageDesc)))
   result.next = t.head
   result.key = key
   t.head = result
@@ -146,7 +147,7 @@ proc cellSetPut(t: var TCellSet, key: ByteAddress): PPageDesc =
 
 # ---------- slightly higher level procs --------------------------------------
 
-proc contains(s: TCellSet, cell: PCell): bool =
+proc contains(s: CellSet, cell: PCell): bool =
   var u = cast[ByteAddress](cell)
   var t = cellSetGet(s, u shr PageShift)
   if t != nil:
@@ -155,13 +156,13 @@ proc contains(s: TCellSet, cell: PCell): bool =
   else:
     result = false
 
-proc incl(s: var TCellSet, cell: PCell) {.noinline.} =
+proc incl(s: var CellSet, cell: PCell) {.noinline.} =
   var u = cast[ByteAddress](cell)
   var t = cellSetPut(s, u shr PageShift)
   u = (u %% PageSize) /% MemAlign
   t.bits[u shr IntShift] = t.bits[u shr IntShift] or (1 shl (u and IntMask))
 
-proc excl(s: var TCellSet, cell: PCell) =
+proc excl(s: var CellSet, cell: PCell) =
   var u = cast[ByteAddress](cell)
   var t = cellSetGet(s, u shr PageShift)
   if t != nil:
@@ -169,20 +170,20 @@ proc excl(s: var TCellSet, cell: PCell) =
     t.bits[u shr IntShift] = (t.bits[u shr IntShift] and
                               not (1 shl (u and IntMask)))
 
-proc containsOrIncl(s: var TCellSet, cell: PCell): bool = 
+proc containsOrIncl(s: var CellSet, cell: PCell): bool =
   var u = cast[ByteAddress](cell)
   var t = cellSetGet(s, u shr PageShift)
   if t != nil:
     u = (u %% PageSize) /% MemAlign
     result = (t.bits[u shr IntShift] and (1 shl (u and IntMask))) != 0
-    if not result: 
+    if not result:
       t.bits[u shr IntShift] = t.bits[u shr IntShift] or
           (1 shl (u and IntMask))
-  else: 
+  else:
     incl(s, cell)
     result = false
 
-iterator elements(t: TCellSet): PCell {.inline.} =
+iterator elements(t: CellSet): PCell {.inline.} =
   # while traversing it is forbidden to add pointers to the tree!
   var r = t.head
   while r != nil:
@@ -200,7 +201,7 @@ iterator elements(t: TCellSet): PCell {.inline.} =
       inc(i)
     r = r.next
 
-iterator elementsExcept(t, s: TCellSet): PCell {.inline.} =
+iterator elementsExcept(t, s: CellSet): PCell {.inline.} =
   var r = t.head
   while r != nil:
     let ss = cellSetGet(s, r.key)
diff --git a/lib/system/cgprocs.nim b/lib/system/cgprocs.nim
index f3acc81f2..660c68116 100644
--- a/lib/system/cgprocs.nim
+++ b/lib/system/cgprocs.nim
@@ -9,17 +9,13 @@
 
 # Headers for procs that the code generator depends on ("compilerprocs")
 
-proc addChar(s: NimString, c: char): NimString {.compilerProc, benign.}
-
 type
-  TLibHandle = pointer       # private type
-  TProcAddr = pointer        # library loading and loading of procs:
+  LibHandle = pointer       # private type
+  ProcAddr = pointer        # library loading and loading of procs:
+{.deprecated: [TLibHandle: LibHandle, TProcAddr: ProcAddr].}
 
-proc nimLoadLibrary(path: string): TLibHandle {.compilerproc.}
-proc nimUnloadLibrary(lib: TLibHandle) {.compilerproc.}
-proc nimGetProcAddr(lib: TLibHandle, name: cstring): TProcAddr {.compilerproc.}
+proc nimLoadLibrary(path: string): LibHandle {.compilerproc.}
+proc nimUnloadLibrary(lib: LibHandle) {.compilerproc.}
+proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr {.compilerproc.}
 
 proc nimLoadLibraryError(path: string) {.compilerproc, noinline.}
-
-proc setStackBottom(theStackBottom: pointer) {.compilerRtl, noinline, benign.}
-
diff --git a/lib/system/channels.nim b/lib/system/channels.nim
index ebd30c353..68c0e32d2 100644
--- a/lib/system/channels.nim
+++ b/lib/system/channels.nim
@@ -1,265 +1,267 @@
-#
-#
-#            Nim's Runtime Library
+#
+#
+#            Nim's Runtime Library
 #        (c) Copyright 2015 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## Channel support for threads. **Note**: This is part of the system module.
-## Do not import it directly. To activate thread support you need to compile
-## with the ``--threads:on`` command line switch.
-##
-## **Note:** The current implementation of message passing is slow and does
-## not work with cyclic data structures.
-  
-when not declared(NimString):
-  {.error: "You must not import this module explicitly".}
-
-type
-  pbytes = ptr array[0.. 0xffff, byte]
-  TRawChannel {.pure, final.} = object ## msg queue for a thread
-    rd, wr, count, mask: int
-    data: pbytes
-    lock: TSysLock
-    cond: TSysCond
-    elemType: PNimType
-    ready: bool
-    region: TMemRegion
-  PRawChannel = ptr TRawChannel
-  TLoadStoreMode = enum mStore, mLoad
-  TChannel* {.gcsafe.}[TMsg] = TRawChannel ## a channel for thread communication
-
-const ChannelDeadMask = -2
-
-proc initRawChannel(p: pointer) =
-  var c = cast[PRawChannel](p)
-  initSysLock(c.lock)
-  initSysCond(c.cond)
-  c.mask = -1
-
-proc deinitRawChannel(p: pointer) =
-  var c = cast[PRawChannel](p)
-  # we need to grab the lock to be safe against sending threads!
-  acquireSys(c.lock)
-  c.mask = ChannelDeadMask
-  deallocOsPages(c.region)
-  deinitSys(c.lock)
-  deinitSysCond(c.cond)
-
-proc storeAux(dest, src: pointer, mt: PNimType, t: PRawChannel, 
-              mode: TLoadStoreMode) {.benign.}
-proc storeAux(dest, src: pointer, n: ptr TNimNode, t: PRawChannel,
-              mode: TLoadStoreMode) {.benign.} =
-  var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
-  case n.kind
-  of nkSlot: storeAux(cast[pointer](d +% n.offset), 
-                      cast[pointer](s +% n.offset), n.typ, t, mode)
-  of nkList:
-    for i in 0..n.len-1: storeAux(dest, src, n.sons[i], t, mode)
-  of nkCase:
-    copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset),
-            n.typ.size)
-    var m = selectBranch(src, n)
-    if m != nil: storeAux(dest, src, m, t, mode)
-  of nkNone: sysAssert(false, "storeAux")
-
-proc storeAux(dest, src: pointer, mt: PNimType, t: PRawChannel, 
-              mode: TLoadStoreMode) =
-  var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
-  sysAssert(mt != nil, "mt == nil")
-  case mt.kind
-  of tyString:
-    if mode == mStore:
-      var x = cast[PPointer](dest)
-      var s2 = cast[PPointer](s)[]
-      if s2 == nil: 
-        x[] = nil
-      else:
-        var ss = cast[NimString](s2)
-        var ns = cast[NimString](alloc(t.region, ss.len+1 + GenericSeqSize))
-        copyMem(ns, ss, ss.len+1 + GenericSeqSize)
-        x[] = ns
-    else:
-      var x = cast[PPointer](dest)
-      var s2 = cast[PPointer](s)[]
-      if s2 == nil:
-        unsureAsgnRef(x, s2)
-      else:
-        unsureAsgnRef(x, copyString(cast[NimString](s2)))
-        dealloc(t.region, s2)
-  of tySequence:
-    var s2 = cast[PPointer](src)[]
-    var seq = cast[PGenericSeq](s2)
-    var x = cast[PPointer](dest)
-    if s2 == nil:
-      if mode == mStore:
-        x[] = nil
-      else:
-        unsureAsgnRef(x, nil)
-    else:
-      sysAssert(dest != nil, "dest == nil")
-      if mode == mStore:
-        x[] = alloc(t.region, seq.len *% mt.base.size +% GenericSeqSize)
-      else:
-        unsureAsgnRef(x, newObj(mt, seq.len * mt.base.size + GenericSeqSize))
-      var dst = cast[ByteAddress](cast[PPointer](dest)[])
-      for i in 0..seq.len-1:
-        storeAux(
-          cast[pointer](dst +% i*% mt.base.size +% GenericSeqSize),
-          cast[pointer](cast[ByteAddress](s2) +% i *% mt.base.size +%
-                        GenericSeqSize),
-          mt.base, t, mode)
-      var dstseq = cast[PGenericSeq](dst)
-      dstseq.len = seq.len
-      dstseq.reserved = seq.len
-      if mode != mStore: dealloc(t.region, s2)
-  of tyObject:
-    # copy type field:
-    var pint = cast[ptr PNimType](dest)
-    # XXX use dynamic type here!
-    pint[] = mt
-    if mt.base != nil:
-      storeAux(dest, src, mt.base, t, mode)
-    storeAux(dest, src, mt.node, t, mode)
-  of tyTuple:
-    storeAux(dest, src, mt.node, t, mode)
-  of tyArray, tyArrayConstr:
-    for i in 0..(mt.size div mt.base.size)-1:
-      storeAux(cast[pointer](d +% i*% mt.base.size),
-               cast[pointer](s +% i*% mt.base.size), mt.base, t, mode)
-  of tyRef:
-    var s = cast[PPointer](src)[]
-    var x = cast[PPointer](dest)
-    if s == nil:
-      if mode == mStore:
-        x[] = nil
-      else:
-        unsureAsgnRef(x, nil)
-    else:
-      if mode == mStore:
-        x[] = alloc(t.region, mt.base.size)
-      else:
-        # XXX we should use the dynamic type here too, but that is not stored
-        # in the inbox at all --> use source[]'s object type? but how? we need
-        # a tyRef to the object!
-        var obj = newObj(mt, mt.base.size)
-        unsureAsgnRef(x, obj)
-      storeAux(x[], s, mt.base, t, mode)
-      if mode != mStore: dealloc(t.region, s)
-  else:
-    copyMem(dest, src, mt.size) # copy raw bits
-
-proc rawSend(q: PRawChannel, data: pointer, typ: PNimType) =
-  ## adds an `item` to the end of the queue `q`.
-  var cap = q.mask+1
-  if q.count >= cap:
-    # start with capacity for 2 entries in the queue:
-    if cap == 0: cap = 1
-    var n = cast[pbytes](alloc0(q.region, cap*2*typ.size))
-    var z = 0
-    var i = q.rd
-    var c = q.count
-    while c > 0:
-      dec c
-      copyMem(addr(n[z*typ.size]), addr(q.data[i*typ.size]), typ.size)
-      i = (i + 1) and q.mask
-      inc z
-    if q.data != nil: dealloc(q.region, q.data)
-    q.data = n
-    q.mask = cap*2 - 1
-    q.wr = q.count
-    q.rd = 0
-  storeAux(addr(q.data[q.wr * typ.size]), data, typ, q, mStore)
-  inc q.count
-  q.wr = (q.wr + 1) and q.mask
-
-proc rawRecv(q: PRawChannel, data: pointer, typ: PNimType) =
-  sysAssert q.count > 0, "rawRecv"
-  dec q.count
-  storeAux(data, addr(q.data[q.rd * typ.size]), typ, q, mLoad)
-  q.rd = (q.rd + 1) and q.mask
-
-template lockChannel(q: expr, action: stmt) {.immediate.} =
-  acquireSys(q.lock)
-  action
-  releaseSys(q.lock)
-
-template sendImpl(q: expr) {.immediate.} =  
-  if q.mask == ChannelDeadMask:
-    sysFatal(DeadThreadError, "cannot send message; thread died")
-  acquireSys(q.lock)
-  var m: TMsg
-  shallowCopy(m, msg)
-  var typ = cast[PNimType](getTypeInfo(msg))
-  rawSend(q, addr(m), typ)
-  q.elemType = typ
-  releaseSys(q.lock)
-  signalSysCond(q.cond)
-
-proc send*[TMsg](c: var TChannel[TMsg], msg: TMsg) =
-  ## sends a message to a thread. `msg` is deeply copied.
-  var q = cast[PRawChannel](addr(c))
-  sendImpl(q)
-
-proc llRecv(q: PRawChannel, res: pointer, typ: PNimType) =
-  # to save space, the generic is as small as possible
-  q.ready = true
-  while q.count <= 0:
-    waitSysCond(q.cond, q.lock)
-  q.ready = false
-  if typ != q.elemType:
-    releaseSys(q.lock)
-    sysFatal(ValueError, "cannot receive message of wrong type")
-  rawRecv(q, res, typ)
-
-proc recv*[TMsg](c: var TChannel[TMsg]): TMsg =
-  ## receives a message from the channel `c`. This blocks until
-  ## a message has arrived! You may use ``peek`` to avoid the blocking.
-  var q = cast[PRawChannel](addr(c))
-  acquireSys(q.lock)
-  llRecv(q, addr(result), cast[PNimType](getTypeInfo(result)))
-  releaseSys(q.lock)
-
-proc tryRecv*[TMsg](c: var TChannel[TMsg]): tuple[dataAvailable: bool,
-                                                  msg: TMsg] =
-  ## try to receives a message from the channel `c` if available. Otherwise
-  ## it returns ``(false, default(msg))``.
-  var q = cast[PRawChannel](addr(c))
-  if q.mask != ChannelDeadMask:
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Channel support for threads. **Note**: This is part of the system module.
+## Do not import it directly. To activate thread support you need to compile
+## with the ``--threads:on`` command line switch.
+##
+## **Note:** The current implementation of message passing is slow and does
+## not work with cyclic data structures.
+
+when not declared(NimString):
+  {.error: "You must not import this module explicitly".}
+
+type
+  pbytes = ptr array[0.. 0xffff, byte]
+  RawChannel {.pure, final.} = object ## msg queue for a thread
+    rd, wr, count, mask: int
+    data: pbytes
+    lock: SysLock
+    cond: SysCond
+    elemType: PNimType
+    ready: bool
+    region: MemRegion
+  PRawChannel = ptr RawChannel
+  LoadStoreMode = enum mStore, mLoad
+  Channel* {.gcsafe.}[TMsg] = RawChannel ## a channel for thread communication
+{.deprecated: [TRawChannel: RawChannel, TLoadStoreMode: LoadStoreMode,
+              TChannel: Channel].}
+
+const ChannelDeadMask = -2
+
+proc initRawChannel(p: pointer) =
+  var c = cast[PRawChannel](p)
+  initSysLock(c.lock)
+  initSysCond(c.cond)
+  c.mask = -1
+
+proc deinitRawChannel(p: pointer) =
+  var c = cast[PRawChannel](p)
+  # we need to grab the lock to be safe against sending threads!
+  acquireSys(c.lock)
+  c.mask = ChannelDeadMask
+  deallocOsPages(c.region)
+  deinitSys(c.lock)
+  deinitSysCond(c.cond)
+
+proc storeAux(dest, src: pointer, mt: PNimType, t: PRawChannel,
+              mode: LoadStoreMode) {.benign.}
+proc storeAux(dest, src: pointer, n: ptr TNimNode, t: PRawChannel,
+              mode: LoadStoreMode) {.benign.} =
+  var
+    d = cast[ByteAddress](dest)
+    s = cast[ByteAddress](src)
+  case n.kind
+  of nkSlot: storeAux(cast[pointer](d +% n.offset),
+                      cast[pointer](s +% n.offset), n.typ, t, mode)
+  of nkList:
+    for i in 0..n.len-1: storeAux(dest, src, n.sons[i], t, mode)
+  of nkCase:
+    copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset),
+            n.typ.size)
+    var m = selectBranch(src, n)
+    if m != nil: storeAux(dest, src, m, t, mode)
+  of nkNone: sysAssert(false, "storeAux")
+
+proc storeAux(dest, src: pointer, mt: PNimType, t: PRawChannel,
+              mode: LoadStoreMode) =
+  var
+    d = cast[ByteAddress](dest)
+    s = cast[ByteAddress](src)
+  sysAssert(mt != nil, "mt == nil")
+  case mt.kind
+  of tyString:
+    if mode == mStore:
+      var x = cast[PPointer](dest)
+      var s2 = cast[PPointer](s)[]
+      if s2 == nil:
+        x[] = nil
+      else:
+        var ss = cast[NimString](s2)
+        var ns = cast[NimString](alloc(t.region, ss.len+1 + GenericSeqSize))
+        copyMem(ns, ss, ss.len+1 + GenericSeqSize)
+        x[] = ns
+    else:
+      var x = cast[PPointer](dest)
+      var s2 = cast[PPointer](s)[]
+      if s2 == nil:
+        unsureAsgnRef(x, s2)
+      else:
+        unsureAsgnRef(x, copyString(cast[NimString](s2)))
+        dealloc(t.region, s2)
+  of tySequence:
+    var s2 = cast[PPointer](src)[]
+    var seq = cast[PGenericSeq](s2)
+    var x = cast[PPointer](dest)
+    if s2 == nil:
+      if mode == mStore:
+        x[] = nil
+      else:
+        unsureAsgnRef(x, nil)
+    else:
+      sysAssert(dest != nil, "dest == nil")
+      if mode == mStore:
+        x[] = alloc(t.region, seq.len *% mt.base.size +% GenericSeqSize)
+      else:
+        unsureAsgnRef(x, newObj(mt, seq.len * mt.base.size + GenericSeqSize))
+      var dst = cast[ByteAddress](cast[PPointer](dest)[])
+      for i in 0..seq.len-1:
+        storeAux(
+          cast[pointer](dst +% i*% mt.base.size +% GenericSeqSize),
+          cast[pointer](cast[ByteAddress](s2) +% i *% mt.base.size +%
+                        GenericSeqSize),
+          mt.base, t, mode)
+      var dstseq = cast[PGenericSeq](dst)
+      dstseq.len = seq.len
+      dstseq.reserved = seq.len
+      if mode != mStore: dealloc(t.region, s2)
+  of tyObject:
+    # copy type field:
+    var pint = cast[ptr PNimType](dest)
+    # XXX use dynamic type here!
+    pint[] = mt
+    if mt.base != nil:
+      storeAux(dest, src, mt.base, t, mode)
+    storeAux(dest, src, mt.node, t, mode)
+  of tyTuple:
+    storeAux(dest, src, mt.node, t, mode)
+  of tyArray, tyArrayConstr:
+    for i in 0..(mt.size div mt.base.size)-1:
+      storeAux(cast[pointer](d +% i*% mt.base.size),
+               cast[pointer](s +% i*% mt.base.size), mt.base, t, mode)
+  of tyRef:
+    var s = cast[PPointer](src)[]
+    var x = cast[PPointer](dest)
+    if s == nil:
+      if mode == mStore:
+        x[] = nil
+      else:
+        unsureAsgnRef(x, nil)
+    else:
+      if mode == mStore:
+        x[] = alloc(t.region, mt.base.size)
+      else:
+        # XXX we should use the dynamic type here too, but that is not stored
+        # in the inbox at all --> use source[]'s object type? but how? we need
+        # a tyRef to the object!
+        var obj = newObj(mt, mt.base.size)
+        unsureAsgnRef(x, obj)
+      storeAux(x[], s, mt.base, t, mode)
+      if mode != mStore: dealloc(t.region, s)
+  else:
+    copyMem(dest, src, mt.size) # copy raw bits
+
+proc rawSend(q: PRawChannel, data: pointer, typ: PNimType) =
+  ## adds an `item` to the end of the queue `q`.
+  var cap = q.mask+1
+  if q.count >= cap:
+    # start with capacity for 2 entries in the queue:
+    if cap == 0: cap = 1
+    var n = cast[pbytes](alloc0(q.region, cap*2*typ.size))
+    var z = 0
+    var i = q.rd
+    var c = q.count
+    while c > 0:
+      dec c
+      copyMem(addr(n[z*typ.size]), addr(q.data[i*typ.size]), typ.size)
+      i = (i + 1) and q.mask
+      inc z
+    if q.data != nil: dealloc(q.region, q.data)
+    q.data = n
+    q.mask = cap*2 - 1
+    q.wr = q.count
+    q.rd = 0
+  storeAux(addr(q.data[q.wr * typ.size]), data, typ, q, mStore)
+  inc q.count
+  q.wr = (q.wr + 1) and q.mask
+
+proc rawRecv(q: PRawChannel, data: pointer, typ: PNimType) =
+  sysAssert q.count > 0, "rawRecv"
+  dec q.count
+  storeAux(data, addr(q.data[q.rd * typ.size]), typ, q, mLoad)
+  q.rd = (q.rd + 1) and q.mask
+
+template lockChannel(q: expr, action: stmt) {.immediate.} =
+  acquireSys(q.lock)
+  action
+  releaseSys(q.lock)
+
+template sendImpl(q: expr) {.immediate.} =
+  if q.mask == ChannelDeadMask:
+    sysFatal(DeadThreadError, "cannot send message; thread died")
+  acquireSys(q.lock)
+  var m: TMsg
+  shallowCopy(m, msg)
+  var typ = cast[PNimType](getTypeInfo(msg))
+  rawSend(q, addr(m), typ)
+  q.elemType = typ
+  releaseSys(q.lock)
+  signalSysCond(q.cond)
+
+proc send*[TMsg](c: var Channel[TMsg], msg: TMsg) =
+  ## sends a message to a thread. `msg` is deeply copied.
+  var q = cast[PRawChannel](addr(c))
+  sendImpl(q)
+
+proc llRecv(q: PRawChannel, res: pointer, typ: PNimType) =
+  # to save space, the generic is as small as possible
+  q.ready = true
+  while q.count <= 0:
+    waitSysCond(q.cond, q.lock)
+  q.ready = false
+  if typ != q.elemType:
+    releaseSys(q.lock)
+    sysFatal(ValueError, "cannot receive message of wrong type")
+  rawRecv(q, res, typ)
+
+proc recv*[TMsg](c: var Channel[TMsg]): TMsg =
+  ## receives a message from the channel `c`. This blocks until
+  ## a message has arrived! You may use ``peek`` to avoid the blocking.
+  var q = cast[PRawChannel](addr(c))
+  acquireSys(q.lock)
+  llRecv(q, addr(result), cast[PNimType](getTypeInfo(result)))
+  releaseSys(q.lock)
+
+proc tryRecv*[TMsg](c: var Channel[TMsg]): tuple[dataAvailable: bool,
+                                                  msg: TMsg] =
+  ## try to receives a message from the channel `c` if available. Otherwise
+  ## it returns ``(false, default(msg))``.
+  var q = cast[PRawChannel](addr(c))
+  if q.mask != ChannelDeadMask:
     if tryAcquireSys(q.lock):
-      if q.count > 0:
-        llRecv(q, addr(result.msg), cast[PNimType](getTypeInfo(result.msg)))
-        result.dataAvailable = true
-      releaseSys(q.lock)
-
-proc peek*[TMsg](c: var TChannel[TMsg]): int =
-  ## returns the current number of messages in the channel `c`. Returns -1
-  ## if the channel has been closed. **Note**: This is dangerous to use
-  ## as it encourages races. It's much better to use ``tryRecv`` instead.
-  var q = cast[PRawChannel](addr(c))
-  if q.mask != ChannelDeadMask:
-    lockChannel(q):
-      result = q.count
-  else:
-    result = -1
-
-proc open*[TMsg](c: var TChannel[TMsg]) =
-  ## opens a channel `c` for inter thread communication.
-  initRawChannel(addr(c))
-
-proc close*[TMsg](c: var TChannel[TMsg]) =
-  ## closes a channel `c` and frees its associated resources.
-  deinitRawChannel(addr(c))
-
-proc ready*[TMsg](c: var TChannel[TMsg]): bool =
-  ## returns true iff some thread is waiting on the channel `c` for
-  ## new messages.
-  var q = cast[PRawChannel](addr(c))
-  result = q.ready
-
+      if q.count > 0:
+        llRecv(q, addr(result.msg), cast[PNimType](getTypeInfo(result.msg)))
+        result.dataAvailable = true
+      releaseSys(q.lock)
+
+proc peek*[TMsg](c: var Channel[TMsg]): int =
+  ## returns the current number of messages in the channel `c`. Returns -1
+  ## if the channel has been closed. **Note**: This is dangerous to use
+  ## as it encourages races. It's much better to use ``tryRecv`` instead.
+  var q = cast[PRawChannel](addr(c))
+  if q.mask != ChannelDeadMask:
+    lockChannel(q):
+      result = q.count
+  else:
+    result = -1
+
+proc open*[TMsg](c: var Channel[TMsg]) =
+  ## opens a channel `c` for inter thread communication.
+  initRawChannel(addr(c))
+
+proc close*[TMsg](c: var Channel[TMsg]) =
+  ## closes a channel `c` and frees its associated resources.
+  deinitRawChannel(addr(c))
+
+proc ready*[TMsg](c: var Channel[TMsg]): bool =
+  ## returns true iff some thread is waiting on the channel `c` for
+  ## new messages.
+  var q = cast[PRawChannel](addr(c))
+  result = q.ready
+
diff --git a/lib/system/chcks.nim b/lib/system/chcks.nim
index 2f6d25a12..6caf99d27 100644
--- a/lib/system/chcks.nim
+++ b/lib/system/chcks.nim
@@ -9,16 +9,16 @@
 
 # Implementation of some runtime checks.
 
-proc raiseRangeError(val: BiggestInt) {.compilerproc, noreturn, noinline.} =
+proc raiseRangeError(val: BiggestInt) {.compilerproc, noinline.} =
   when hostOS == "standalone":
     sysFatal(RangeError, "value out of range")
   else:
     sysFatal(RangeError, "value out of range: ", $val)
 
-proc raiseIndexError() {.compilerproc, noreturn, noinline.} =
+proc raiseIndexError() {.compilerproc, noinline.} =
   sysFatal(IndexError, "index out of bounds")
 
-proc raiseFieldError(f: string) {.compilerproc, noreturn, noinline.} =
+proc raiseFieldError(f: string) {.compilerproc, noinline.} =
   sysFatal(FieldError, f, " is not accessible")
 
 proc chckIndx(i, a, b: int): int =
diff --git a/lib/system/debugger.nim b/lib/system/debugger.nim
index 7b5169344..b18c61755 100644
--- a/lib/system/debugger.nim
+++ b/lib/system/debugger.nim
@@ -10,22 +10,23 @@
 ## This file implements basic features for any debugger.
 
 type
-  TVarSlot* {.compilerproc, final.} = object ## a slot in a frame
+  VarSlot* {.compilerproc, final.} = object ## a slot in a frame
     address*: pointer ## the variable's address
     typ*: PNimType    ## the variable's type
     name*: cstring    ## the variable's name; for globals this is "module.name"
 
-  PExtendedFrame = ptr TExtendedFrame
-  TExtendedFrame = object  # If the debugger is enabled the compiler
+  PExtendedFrame = ptr ExtendedFrame
+  ExtendedFrame = object  # If the debugger is enabled the compiler
                            # provides an extended frame. Of course
                            # only slots that are
                            # needed are allocated and not 10_000,
                            # except for the global data description.
     f: TFrame
-    slots: array[0..10_000, TVarSlot]
+    slots: array[0..10_000, VarSlot]
+{.deprecated: [TVarSlot: VarSlot, TExtendedFrame: ExtendedFrame].}
 
 var
-  dbgGlobalData: TExtendedFrame # this reserves much space, but
+  dbgGlobalData: ExtendedFrame  # this reserves much space, but
                                 # for now it is the most practical way
 
 proc dbgRegisterGlobal(name: cstring, address: pointer,
@@ -39,7 +40,7 @@ proc dbgRegisterGlobal(name: cstring, address: pointer,
   dbgGlobalData.slots[i].address = address
   inc(dbgGlobalData.f.len)
 
-proc getLocal*(frame: PFrame; slot: int): TVarSlot {.inline.} =
+proc getLocal*(frame: PFrame; slot: int): VarSlot {.inline.} =
   ## retrieves the meta data for the local variable at `slot`. CAUTION: An
   ## invalid `slot` value causes a corruption!
   result = cast[PExtendedFrame](frame).slots[slot]
@@ -48,7 +49,7 @@ proc getGlobalLen*(): int {.inline.} =
   ## gets the number of registered globals.
   result = dbgGlobalData.f.len
 
-proc getGlobal*(slot: int): TVarSlot {.inline.} =
+proc getGlobal*(slot: int): VarSlot {.inline.} =
   ## retrieves the meta data for the global variable at `slot`. CAUTION: An
   ## invalid `slot` value causes a corruption!
   result = dbgGlobalData.slots[slot]
@@ -56,16 +57,16 @@ proc getGlobal*(slot: int): TVarSlot {.inline.} =
 # ------------------- breakpoint support ------------------------------------
 
 type
-  TBreakpoint* = object  ## represents a break point
+  Breakpoint* = object   ## represents a break point
     low*, high*: int     ## range from low to high; if disabled
                          ## both low and high are set to their negative values
     filename*: cstring   ## the filename of the breakpoint
 
 var
-  dbgBP: array[0..127, TBreakpoint] # breakpoints
+  dbgBP: array[0..127, Breakpoint] # breakpoints
   dbgBPlen: int
   dbgBPbloom: int64  # we use a bloom filter to speed up breakpoint checking
-  
+
   dbgFilenames*: array[0..300, cstring] ## registered filenames;
                                         ## 'nil' terminated
   dbgFilenameLen: int
@@ -131,16 +132,16 @@ proc canonFilename*(filename: cstring): cstring =
     if fileMatches(result, filename): return result
   result = nil
 
-iterator listBreakpoints*(): ptr TBreakpoint =
+iterator listBreakpoints*(): ptr Breakpoint =
   ## lists all breakpoints.
   for i in 0..dbgBPlen-1: yield addr(dbgBP[i])
 
-proc isActive*(b: ptr TBreakpoint): bool = b.low > 0
-proc flip*(b: ptr TBreakpoint) =
+proc isActive*(b: ptr Breakpoint): bool = b.low > 0
+proc flip*(b: ptr Breakpoint) =
   ## enables or disables 'b' depending on its current state.
   b.low = -b.low; b.high = -b.high
 
-proc checkBreakpoints*(filename: cstring, line: int): ptr TBreakpoint =
+proc checkBreakpoints*(filename: cstring, line: int): ptr Breakpoint =
   ## in which breakpoint (if any) we are.
   if (dbgBPbloom and line) != line: return nil
   for b in listBreakpoints():
@@ -149,29 +150,30 @@ proc checkBreakpoints*(filename: cstring, line: int): ptr TBreakpoint =
 # ------------------- watchpoint support ------------------------------------
 
 type
-  THash = int
-  TWatchpoint {.pure, final.} = object
+  Hash = int
+  Watchpoint {.pure, final.} = object
     name: cstring
     address: pointer
     typ: PNimType
-    oldValue: THash
+    oldValue: Hash
+{.deprecated: [THash: Hash, TWatchpoint: Watchpoint].}
 
 var
-  watchpoints: array [0..99, TWatchpoint]
+  watchpoints: array [0..99, Watchpoint]
   watchpointsLen: int
 
-proc `!&`(h: THash, val: int): THash {.inline.} =
+proc `!&`(h: Hash, val: int): Hash {.inline.} =
   result = h +% val
   result = result +% result shl 10
   result = result xor (result shr 6)
 
-proc `!$`(h: THash): THash {.inline.} =
+proc `!$`(h: Hash): Hash {.inline.} =
   result = h +% h shl 3
   result = result xor (result shr 11)
   result = result +% result shl 15
 
-proc hash(data: pointer, size: int): THash =
-  var h: THash = 0
+proc hash(data: pointer, size: int): Hash =
+  var h: Hash = 0
   var p = cast[cstring](data)
   var i = 0
   var s = size
@@ -181,21 +183,21 @@ proc hash(data: pointer, size: int): THash =
     dec(s)
   result = !$h
 
-proc hashGcHeader(data: pointer): THash =
+proc hashGcHeader(data: pointer): Hash =
   const headerSize = sizeof(int)*2
   result = hash(cast[pointer](cast[int](data) -% headerSize), headerSize)
 
 proc genericHashAux(dest: pointer, mt: PNimType, shallow: bool,
-                    h: THash): THash
+                    h: Hash): Hash
 proc genericHashAux(dest: pointer, n: ptr TNimNode, shallow: bool,
-                    h: THash): THash =
+                    h: Hash): Hash =
   var d = cast[ByteAddress](dest)
   case n.kind
   of nkSlot:
     result = genericHashAux(cast[pointer](d +% n.offset), n.typ, shallow, h)
   of nkList:
     result = h
-    for i in 0..n.len-1: 
+    for i in 0..n.len-1:
       result = result !& genericHashAux(dest, n.sons[i], shallow, result)
   of nkCase:
     result = h !& hash(cast[pointer](d +% n.offset), n.typ.size)
@@ -203,8 +205,8 @@ proc genericHashAux(dest: pointer, n: ptr TNimNode, shallow: bool,
     if m != nil: result = genericHashAux(dest, m, shallow, result)
   of nkNone: sysAssert(false, "genericHashAux")
 
-proc genericHashAux(dest: pointer, mt: PNimType, shallow: bool, 
-                    h: THash): THash =
+proc genericHashAux(dest: pointer, mt: PNimType, shallow: bool,
+                    h: Hash): Hash =
   sysAssert(mt != nil, "genericHashAux 2")
   case mt.kind
   of tyString:
@@ -255,7 +257,7 @@ proc genericHashAux(dest: pointer, mt: PNimType, shallow: bool,
 
 proc genericHash(dest: pointer, mt: PNimType): int =
   result = genericHashAux(dest, mt, false, 0)
-  
+
 proc dbgRegisterWatchpoint(address: pointer, name: cstring,
                            typ: PNimType) {.compilerproc.} =
   let L = watchPointsLen
@@ -283,7 +285,7 @@ var
     ## Only code compiled with the ``debugger:on`` switch calls this hook.
 
   dbgWatchpointHook*: proc (watchpointName: cstring) {.nimcall.}
-  
+
 proc checkWatchpoints =
   let L = watchPointsLen
   for i in 0.. <L:
diff --git a/lib/system/deepcopy.nim b/lib/system/deepcopy.nim
index 093c0f3a7..03230e541 100644
--- a/lib/system/deepcopy.nim
+++ b/lib/system/deepcopy.nim
@@ -14,7 +14,7 @@ proc genericDeepCopyAux(dest, src: pointer, n: ptr TNimNode) {.benign.} =
     s = cast[ByteAddress](src)
   case n.kind
   of nkSlot:
-    genericDeepCopyAux(cast[pointer](d +% n.offset), 
+    genericDeepCopyAux(cast[pointer](d +% n.offset),
                        cast[pointer](s +% n.offset), n.typ)
   of nkList:
     for i in 0..n.len-1:
@@ -24,7 +24,7 @@ proc genericDeepCopyAux(dest, src: pointer, n: ptr TNimNode) {.benign.} =
     var m = selectBranch(src, n)
     # reset if different branches are in use; note different branches also
     # imply that's not self-assignment (``x = x``)!
-    if m != dd and dd != nil: 
+    if m != dd and dd != nil:
       genericResetAux(dest, dd)
     copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset),
             n.typ.size)
@@ -103,16 +103,16 @@ proc genericDeepCopyAux(dest, src: pointer, mt: PNimType) =
         else:
           let realType = x.typ
           let z = newObj(realType, realType.base.size)
-          
+
           unsureAsgnRef(cast[PPointer](dest), z)
           x.typ = cast[PNimType](cast[int](z) or 1)
           genericDeepCopyAux(z, s2, realType.base)
           x.typ = realType
       else:
         let realType = mt
-        let z = newObj(realType, realType.base.size)        
+        let z = newObj(realType, realType.base.size)
         unsureAsgnRef(cast[PPointer](dest), z)
-        genericDeepCopyAux(z, s2, realType.base)        
+        genericDeepCopyAux(z, s2, realType.base)
   of tyPtr:
     # no cycle check here, but also not really required
     let s2 = cast[PPointer](src)[]
diff --git a/lib/system/dyncalls.nim b/lib/system/dyncalls.nim
index 44f7b67c3..fe98b1e6f 100644
--- a/lib/system/dyncalls.nim
+++ b/lib/system/dyncalls.nim
@@ -15,9 +15,9 @@
 {.push stack_trace: off.}
 
 const
-  NilLibHandle: TLibHandle = nil
+  NilLibHandle: LibHandle = nil
 
-proc rawWrite(f: File, s: string) = 
+proc rawWrite(f: File, s: string) =
   # we cannot throw an exception here!
   discard writeBuffer(f, cstring(s), s.len)
 
@@ -55,22 +55,22 @@ when defined(posix):
   var
     RTLD_NOW {.importc: "RTLD_NOW", header: "<dlfcn.h>".}: int
 
-  proc dlclose(lib: TLibHandle) {.importc, header: "<dlfcn.h>".}
-  proc dlopen(path: cstring, mode: int): TLibHandle {.
+  proc dlclose(lib: LibHandle) {.importc, header: "<dlfcn.h>".}
+  proc dlopen(path: cstring, mode: int): LibHandle {.
       importc, header: "<dlfcn.h>".}
-  proc dlsym(lib: TLibHandle, name: cstring): TProcAddr {.
+  proc dlsym(lib: LibHandle, name: cstring): ProcAddr {.
       importc, header: "<dlfcn.h>".}
 
   proc dlerror(): cstring {.importc, header: "<dlfcn.h>".}
 
-  proc nimUnloadLibrary(lib: TLibHandle) =
+  proc nimUnloadLibrary(lib: LibHandle) =
     dlclose(lib)
 
-  proc nimLoadLibrary(path: string): TLibHandle =
+  proc nimLoadLibrary(path: string): LibHandle =
     result = dlopen(path, RTLD_NOW)
     #c_fprintf(c_stdout, "%s\n", dlerror())
 
-  proc nimGetProcAddr(lib: TLibHandle, name: cstring): TProcAddr =
+  proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
     result = dlsym(lib, name)
     if result == nil: procAddrError(name)
 
@@ -84,12 +84,12 @@ elif defined(windows) or defined(dos):
     type
       THINSTANCE {.importc: "HINSTANCE".} = object
         x: pointer
-    proc getProcAddress(lib: THINSTANCE, name: cstring): TProcAddr {.
+    proc getProcAddress(lib: THINSTANCE, name: cstring): ProcAddr {.
         importcpp: "(void*)GetProcAddress(@)", header: "<windows.h>", stdcall.}
   else:
     type
       THINSTANCE {.importc: "HINSTANCE".} = pointer
-    proc getProcAddress(lib: THINSTANCE, name: cstring): TProcAddr {.
+    proc getProcAddress(lib: THINSTANCE, name: cstring): ProcAddr {.
         importc: "GetProcAddress", header: "<windows.h>", stdcall.}
 
   proc freeLibrary(lib: THINSTANCE) {.
@@ -97,13 +97,13 @@ elif defined(windows) or defined(dos):
   proc winLoadLibrary(path: cstring): THINSTANCE {.
       importc: "LoadLibraryA", header: "<windows.h>", stdcall.}
 
-  proc nimUnloadLibrary(lib: TLibHandle) =
+  proc nimUnloadLibrary(lib: LibHandle) =
     freeLibrary(cast[THINSTANCE](lib))
 
-  proc nimLoadLibrary(path: string): TLibHandle =
-    result = cast[TLibHandle](winLoadLibrary(path))
+  proc nimLoadLibrary(path: string): LibHandle =
+    result = cast[LibHandle](winLoadLibrary(path))
 
-  proc nimGetProcAddr(lib: TLibHandle, name: cstring): TProcAddr =
+  proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
     result = getProcAddress(cast[THINSTANCE](lib), name)
     if result == nil: procAddrError(name)
 
@@ -115,13 +115,13 @@ elif defined(mac):
   #
   {.error: "no implementation for dyncalls yet".}
 
-  proc nimUnloadLibrary(lib: TLibHandle) =
+  proc nimUnloadLibrary(lib: LibHandle) =
     NSUnLinkModule(NSModule(lib), NSUNLINKMODULE_OPTION_RESET_LAZY_REFERENCES)
 
   var
     dyld_present {.importc: "_dyld_present", header: "<dyld.h>".}: int
 
-  proc nimLoadLibrary(path: string): TLibHandle =
+  proc nimLoadLibrary(path: string): LibHandle =
     var
       img: NSObjectFileImage
       ret: NSObjectFileImageReturnCode
@@ -134,17 +134,16 @@ elif defined(mac):
         modul = NSLinkModule(img, path, NSLINKMODULE_OPTION_PRIVATE or
                                         NSLINKMODULE_OPTION_RETURN_ON_ERROR)
         NSDestroyObjectFileImage(img)
-        result = TLibHandle(modul)
+        result = LibHandle(modul)
 
-  proc nimGetProcAddr(lib: TLibHandle, name: cstring): TProcAddr =
+  proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
     var
       nss: NSSymbol
     nss = NSLookupSymbolInModule(NSModule(lib), name)
-    result = TProcAddr(NSAddressOfSymbol(nss))
+    result = ProcAddr(NSAddressOfSymbol(nss))
     if result == nil: ProcAddrError(name)
 
 else:
   {.error: "no implementation for dyncalls".}
-  
-{.pop.}
 
+{.pop.}
diff --git a/lib/system/endb.nim b/lib/system/endb.nim
index 003698421..b2cc5624b 100644
--- a/lib/system/endb.nim
+++ b/lib/system/endb.nim
@@ -20,63 +20,65 @@ const
   EndbEnd = "***\n"
 
 type
-  TStaticStr = object
+  StaticStr = object
     len: int
     data: array[0..100, char]
 
-  TBreakpointFilename = object
-    b: ptr TBreakpoint
-    filename: TStaticStr
+  BreakpointFilename = object
+    b: ptr Breakpoint
+    filename: StaticStr
 
-  TDbgState = enum
+  DbgState = enum
     dbOff,        # debugger is turned off
     dbStepInto,   # debugger is in tracing mode
     dbStepOver,
     dbSkipCurrent,
     dbQuiting,    # debugger wants to quit
     dbBreakpoints # debugger is only interested in breakpoints
+{.deprecated: [TStaticStr: StaticStr, TBreakpointFilename: BreakpointFilename,
+              TDbgState: DbgState].}
 
 var
-  dbgUser: TStaticStr   # buffer for user input; first command is ``step_into``
+  dbgUser: StaticStr    # buffer for user input; first command is ``step_into``
                         # needs to be global cause we store the last command
                         # in it
-  dbgState: TDbgState   # state of debugger
+  dbgState: DbgState    # state of debugger
   dbgSkipToFrame: PFrame # frame to be skipped to
 
   maxDisplayRecDepth: int = 5 # do not display too much data!
 
-  brkPoints: array[0..127, TBreakpointFilename]
+  brkPoints: array[0..127, BreakpointFilename]
 
-proc setLen(s: var TStaticStr, newLen=0) =
+proc setLen(s: var StaticStr, newLen=0) =
   s.len = newLen
   s.data[newLen] = '\0'
 
-proc add(s: var TStaticStr, c: char) =
+proc add(s: var StaticStr, c: char) =
   if s.len < high(s.data)-1:
     s.data[s.len] = c
     s.data[s.len+1] = '\0'
     inc s.len
 
-proc add(s: var TStaticStr, c: cstring) =
+proc add(s: var StaticStr, c: cstring) =
   var i = 0
   while c[i] != '\0':
     add s, c[i]
     inc i
 
-proc assign(s: var TStaticStr, c: cstring) =
+proc assign(s: var StaticStr, c: cstring) =
   setLen(s)
   add s, c
 
-proc `==`(a, b: TStaticStr): bool =
+proc `==`(a, b: StaticStr): bool =
   if a.len == b.len:
     for i in 0 .. a.len-1:
       if a.data[i] != b.data[i]: return false
     return true
 
-proc `==`(a: TStaticStr, b: cstring): bool =
+proc `==`(a: StaticStr, b: cstring): bool =
   result = c_strcmp(a.data, b) == 0
 
-proc write(f: TFile, s: TStaticStr) =
+proc write(f: File, s: StaticStr) =
   write(f, cstring(s.data))
 
 proc listBreakPoints() =
@@ -95,14 +97,14 @@ proc listBreakPoints() =
       write(stdout, "\n")
   write(stdout, EndbEnd)
 
-proc openAppend(filename: cstring): TFile =
+proc openAppend(filename: cstring): File =
   var p: pointer = fopen(filename, "ab")
   if p != nil:
-    result = cast[TFile](p)
+    result = cast[File](p)
     write(result, "----------------------------------------\n")
 
 proc dbgRepr(p: pointer, typ: PNimType): string =
-  var cl: TReprClosure
+  var cl: ReprClosure
   initReprClosure(cl)
   cl.recDepth = maxDisplayRecDepth
   # locks for the GC turned out to be a bad idea...
@@ -112,21 +114,21 @@ proc dbgRepr(p: pointer, typ: PNimType): string =
   # dec(recGcLock)
   deinitReprClosure(cl)
 
-proc writeVariable(stream: TFile, slot: TVarSlot) =
+proc writeVariable(stream: File, slot: VarSlot) =
   write(stream, slot.name)
   write(stream, " = ")
-  writeln(stream, dbgRepr(slot.address, slot.typ))
+  writeLine(stream, dbgRepr(slot.address, slot.typ))
 
-proc listFrame(stream: TFile, f: PFrame) =
+proc listFrame(stream: File, f: PFrame) =
   write(stream, EndbBeg)
   write(stream, "| Frame (")
   write(stream, f.len)
   write(stream, " slots):\n")
   for i in 0 .. f.len-1:
-    writeln(stream, getLocal(f, i).name)
+    writeLine(stream, getLocal(f, i).name)
   write(stream, EndbEnd)
 
-proc listLocals(stream: TFile, f: PFrame) =
+proc listLocals(stream: File, f: PFrame) =
   write(stream, EndbBeg)
   write(stream, "| Frame (")
   write(stream, f.len)
@@ -135,11 +137,11 @@ proc listLocals(stream: TFile, f: PFrame) =
     writeVariable(stream, getLocal(f, i))
   write(stream, EndbEnd)
 
-proc listGlobals(stream: TFile) =
+proc listGlobals(stream: File) =
   write(stream, EndbBeg)
   write(stream, "| Globals:\n")
   for i in 0 .. getGlobalLen()-1:
-    writeln(stream, getGlobal(i).name)
+    writeLine(stream, getGlobal(i).name)
   write(stream, EndbEnd)
 
 proc debugOut(msg: cstring) =
@@ -172,7 +174,7 @@ proc dbgShowExecutionPoint() =
   write(stdout, framePtr.procname)
   write(stdout, " ***\n")
 
-proc scanAndAppendWord(src: cstring, a: var TStaticStr, start: int): int =
+proc scanAndAppendWord(src: cstring, a: var StaticStr, start: int): int =
   result = start
   # skip whitespace:
   while src[result] in {'\t', ' '}: inc(result)
@@ -184,11 +186,11 @@ proc scanAndAppendWord(src: cstring, a: var TStaticStr, start: int): int =
     else: break
     inc(result)
 
-proc scanWord(src: cstring, a: var TStaticStr, start: int): int =
+proc scanWord(src: cstring, a: var StaticStr, start: int): int =
   setlen(a)
   result = scanAndAppendWord(src, a, start)
 
-proc scanFilename(src: cstring, a: var TStaticStr, start: int): int =
+proc scanFilename(src: cstring, a: var StaticStr, start: int): int =
   result = start
   setLen a
   while src[result] in {'\t', ' '}: inc(result)
@@ -250,8 +252,8 @@ proc hasExt(s: cstring): bool =
     if s[i] == '.': return true
     inc i
 
-proc parseBreakpoint(s: cstring, start: int): TBreakpoint =
-  var dbgTemp: TStaticStr
+proc parseBreakpoint(s: cstring, start: int): Breakpoint =
+  var dbgTemp: StaticStr
   var i = scanNumber(s, result.low, start)
   if result.low == 0: result.low = framePtr.line
   i = scanNumber(s, result.high, i)
@@ -279,11 +281,11 @@ proc breakpointToggle(s: cstring, start: int) =
     if not b.isNil: b.flip
     else: debugOut("[Warning] unknown breakpoint ")
 
-proc dbgEvaluate(stream: TFile, s: cstring, start: int, f: PFrame) =
-  var dbgTemp: TStaticStr
+proc dbgEvaluate(stream: File, s: cstring, start: int, f: PFrame) =
+  var dbgTemp: StaticStr
   var i = scanWord(s, dbgTemp, start)
   while s[i] in {' ', '\t'}: inc(i)
-  var v: TVarSlot
+  var v: VarSlot
   if s[i] == '.':
     inc(i)
     add(dbgTemp, '.')
@@ -296,10 +298,10 @@ proc dbgEvaluate(stream: TFile, s: cstring, start: int, f: PFrame) =
     for i in 0 .. f.len-1:
       let v = getLocal(f, i)
       if c_strcmp(v.name, dbgTemp.data) == 0:
-        writeVariable(stream, v)  
+        writeVariable(stream, v)
 
 proc dbgOut(s: cstring, start: int, currFrame: PFrame) =
-  var dbgTemp: TStaticStr
+  var dbgTemp: StaticStr
   var i = scanFilename(s, dbgTemp, start)
   if dbgTemp.len == 0:
     invalidCommand()
@@ -312,7 +314,7 @@ proc dbgOut(s: cstring, start: int, currFrame: PFrame) =
   close(stream)
 
 proc dbgStackFrame(s: cstring, start: int, currFrame: PFrame) =
-  var dbgTemp: TStaticStr
+  var dbgTemp: StaticStr
   var i = scanFilename(s, dbgTemp, start)
   if dbgTemp.len == 0:
     # just write it to stdout:
@@ -325,7 +327,7 @@ proc dbgStackFrame(s: cstring, start: int, currFrame: PFrame) =
     listFrame(stream, currFrame)
     close(stream)
 
-proc readLine(f: TFile, line: var TStaticStr): bool =
+proc readLine(f: File, line: var StaticStr): bool =
   while true:
     var c = fgetc(f)
     if c < 0'i32:
@@ -358,7 +360,7 @@ proc commandPrompt() =
     again = true
     dbgFramePtr = framePtr # for going down and up the stack
     dbgDown = 0 # how often we did go down
-    dbgTemp: TStaticStr
+    dbgTemp: StaticStr
 
   while again:
     write(stdout, "*** endb| >>")
@@ -394,7 +396,13 @@ proc commandPrompt() =
       again = false
       quit(1) # BUGFIX: quit with error code > 0
     elif ?"e" or ?"eval":
+      var
+        prevState = dbgState
+        prevSkipFrame = dbgSkipToFrame
+      dbgState = dbSkipCurrent
       dbgEvaluate(stdout, dbgUser.data, i, dbgFramePtr)
+      dbgState = prevState
+      dbgSkipToFrame = prevSkipFrame
     elif ?"o" or ?"out":
       dbgOut(dbgUser.data, i, dbgFramePtr)
     elif ?"stackframe":
@@ -402,9 +410,21 @@ proc commandPrompt() =
     elif ?"w" or ?"where":
       dbgShowExecutionPoint()
     elif ?"l" or ?"locals":
+      var
+        prevState = dbgState
+        prevSkipFrame = dbgSkipToFrame
+      dbgState = dbSkipCurrent
       listLocals(stdout, dbgFramePtr)
+      dbgState = prevState
+      dbgSkipToFrame = prevSkipFrame
     elif ?"g" or ?"globals":
+      var
+        prevState = dbgState
+        prevSkipFrame = dbgSkipToFrame
+      dbgState = dbSkipCurrent
       listGlobals(stdout)
+      dbgState = prevState
+      dbgSkipToFrame = prevSkipFrame
     elif ?"u" or ?"up":
       if dbgDown <= 0:
         debugOut("[Warning] cannot go up any further ")
@@ -482,7 +502,7 @@ proc dbgWriteStackTrace(f: PFrame) =
     inc(i)
     b = b.prev
   for j in countdown(i-1, 0):
-    if tempFrames[j] == nil: 
+    if tempFrames[j] == nil:
       write(stdout, "(")
       write(stdout, skipped)
       write(stdout, " calls omitted) ...")
diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim
index 1b3471978..df28c1493 100644
--- a/lib/system/excpt.nim
+++ b/lib/system/excpt.nim
@@ -44,10 +44,12 @@ var
     # a global variable for the root of all try blocks
   currException {.threadvar.}: ref Exception
 
+proc getFrame*(): PFrame {.compilerRtl, inl.} = framePtr
+
 proc popFrame {.compilerRtl, inl.} =
   framePtr = framePtr.prev
 
-proc setFrame(s: PFrame) {.compilerRtl, inl.} =
+proc setFrame*(s: PFrame) {.compilerRtl, inl.} =
   framePtr = s
 
 proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} =
@@ -58,23 +60,23 @@ proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} =
 proc popSafePoint {.compilerRtl, inl.} =
   excHandler = excHandler.prev
 
-proc pushCurrentException(e: ref Exception) {.compilerRtl, inl.} = 
+proc pushCurrentException(e: ref Exception) {.compilerRtl, inl.} =
   e.parent = currException
   currException = e
 
 proc popCurrentException {.compilerRtl, inl.} =
-  currException = currException.parent
+  currException = nil # currException.parent
 
 # some platforms have native support for stack traces:
 const
   nativeStackTraceSupported* = (defined(macosx) or defined(linux)) and
                               not NimStackTrace
-  hasSomeStackTrace = NimStackTrace or 
+  hasSomeStackTrace = NimStackTrace or
     defined(nativeStackTrace) and nativeStackTraceSupported
 
 when defined(nativeStacktrace) and nativeStackTraceSupported:
   type
-    TDl_info {.importc: "Dl_info", header: "<dlfcn.h>", 
+    TDl_info {.importc: "Dl_info", header: "<dlfcn.h>",
                final, pure.} = object
       dli_fname: cstring
       dli_fbase: pointer
@@ -98,7 +100,7 @@ when defined(nativeStacktrace) and nativeStackTraceSupported:
         tempDlInfo: TDl_info
     # This is allowed to be expensive since it only happens during crashes
     # (but this way you don't need manual stack tracing)
-    var size = backtrace(cast[ptr pointer](addr(tempAddresses)), 
+    var size = backtrace(cast[ptr pointer](addr(tempAddresses)),
                          len(tempAddresses))
     var enabled = false
     for i in 0..size-1:
@@ -123,7 +125,7 @@ when defined(nativeStacktrace) and nativeStackTraceSupported:
 when not hasThreadSupport:
   var
     tempFrames: array [0..127, PFrame] # should not be alloc'd on stack
-  
+
 proc auxWriteStackTrace(f: PFrame, s: var string) =
   when hasThreadSupport:
     var
@@ -160,7 +162,7 @@ proc auxWriteStackTrace(f: PFrame, s: var string) =
     inc(i)
     b = b.prev
   for j in countdown(i-1, 0):
-    if tempFrames[j] == nil: 
+    if tempFrames[j] == nil:
       add(s, "(")
       add(s, $skipped)
       add(s, " calls omitted) ...")
@@ -214,41 +216,49 @@ proc raiseExceptionAux(e: ref Exception) =
     if not localRaiseHook(e): return
   if globalRaiseHook != nil:
     if not globalRaiseHook(e): return
-  if excHandler != nil:
-    if not excHandler.hasRaiseAction or excHandler.raiseAction(e):
+  when defined(cpp):
+    if e[] of OutOfMemError:
+      showErrorMessage(e.name)
+      quitOrDebug()
+    else:
       pushCurrentException(e)
-      c_longjmp(excHandler.context, 1)
-  elif e[] of OutOfMemError:
-    showErrorMessage(e.name)
-    quitOrDebug()
+      {.emit: "throw NimException(`e`, `e`->name);".}
   else:
-    when hasSomeStackTrace:
-      var buf = newStringOfCap(2000)
-      if isNil(e.trace): rawWriteStackTrace(buf)
-      else: add(buf, e.trace)
-      add(buf, "Error: unhandled exception: ")
-      if not isNil(e.msg): add(buf, e.msg)
-      add(buf, " [")
-      add(buf, $e.name)
-      add(buf, "]\n")
-      showErrorMessage(buf)
+    if excHandler != nil:
+      if not excHandler.hasRaiseAction or excHandler.raiseAction(e):
+        pushCurrentException(e)
+        c_longjmp(excHandler.context, 1)
+    elif e[] of OutOfMemError:
+      showErrorMessage(e.name)
+      quitOrDebug()
     else:
-      # ugly, but avoids heap allocations :-)
-      template xadd(buf, s, slen: expr) =
-        if L + slen < high(buf):
-          copyMem(addr(buf[L]), cstring(s), slen)
-          inc L, slen
-      template add(buf, s: expr) =
-        xadd(buf, s, s.len)
-      var buf: array [0..2000, char]
-      var L = 0
-      add(buf, "Error: unhandled exception: ")
-      if not isNil(e.msg): add(buf, e.msg)
-      add(buf, " [")
-      xadd(buf, e.name, c_strlen(e.name))
-      add(buf, "]\n")
-      showErrorMessage(buf)
-    quitOrDebug()
+      when hasSomeStackTrace:
+        var buf = newStringOfCap(2000)
+        if isNil(e.trace): rawWriteStackTrace(buf)
+        else: add(buf, e.trace)
+        add(buf, "Error: unhandled exception: ")
+        if not isNil(e.msg): add(buf, e.msg)
+        add(buf, " [")
+        add(buf, $e.name)
+        add(buf, "]\n")
+        showErrorMessage(buf)
+      else:
+        # ugly, but avoids heap allocations :-)
+        template xadd(buf, s, slen: expr) =
+          if L + slen < high(buf):
+            copyMem(addr(buf[L]), cstring(s), slen)
+            inc L, slen
+        template add(buf, s: expr) =
+          xadd(buf, s, s.len)
+        var buf: array [0..2000, char]
+        var L = 0
+        add(buf, "Error: unhandled exception: ")
+        if not isNil(e.msg): add(buf, e.msg)
+        add(buf, " [")
+        xadd(buf, e.name, c_strlen(e.name))
+        add(buf, "]\n")
+        showErrorMessage(buf)
+      quitOrDebug()
 
 proc raiseException(e: ref Exception, ename: cstring) {.compilerRtl.} =
   e.name = ename
@@ -309,7 +319,7 @@ when not defined(noSignalHandler):
   proc signalHandler(sig: cint) {.exportc: "signalHandler", noconv.} =
     template processSignal(s, action: expr) {.immediate,  dirty.} =
       if s == SIGINT: action("SIGINT: Interrupted by Ctrl-C.\n")
-      elif s == SIGSEGV: 
+      elif s == SIGSEGV:
         action("SIGSEGV: Illegal storage access. (Attempt to read from nil?)\n")
       elif s == SIGABRT:
         when defined(endb):
@@ -357,5 +367,6 @@ when not defined(noSignalHandler):
 
 proc setControlCHook(hook: proc () {.noconv.} not nil) =
   # ugly cast, but should work on all architectures:
-  type TSignalHandler = proc (sig: cint) {.noconv, benign.}
-  c_signal(SIGINT, cast[TSignalHandler](hook))
+  type SignalHandler = proc (sig: cint) {.noconv, benign.}
+  {.deprecated: [TSignalHandler: SignalHandler].}
+  c_signal(SIGINT, cast[SignalHandler](hook))
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index c4374d00c..0c632aeb1 100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -16,6 +16,10 @@
 # Special care has been taken to avoid recursion as far as possible to avoid
 # stack overflows when traversing deep datastructures. It is well-suited
 # for soft real time applications (like games).
+
+when defined(nimCoroutines):
+  import arch
+
 {.push profiler:off.}
 
 const
@@ -45,17 +49,17 @@ const
   rcShift = 3      # shift by rcShift to get the reference counter
   colorMask = 0b011
 type
-  TWalkOp = enum
+  WalkOp = enum
     waMarkGlobal,    # part of the backup/debug mark&sweep
     waMarkPrecise,   # part of the backup/debug mark&sweep
     waZctDecRef, waPush, waCycleDecRef, waMarkGray, waScan, waScanBlack,
     waCollectWhite #, waDebug
 
-  TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
+  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
     # A ref type can have a finalizer that is called before the object's
     # storage is freed.
 
-  TGcStat {.final, pure.} = object
+  GcStat {.final, pure.} = object
     stackScans: int          # number of performed stack scans (for statistics)
     cycleCollections: int    # number of performed full collections
     maxThreshold: int        # max threshold that has been set
@@ -64,35 +68,44 @@ type
     cycleTableSize: int      # max entries in cycle table
     maxPause: int64          # max measured GC pause in nanoseconds
 
-  TGcHeap {.final, pure.} = object # this contains the zero count and
+  GcStack {.final.} = object
+    prev: ptr GcStack
+    next: ptr GcStack
+    starts: pointer
+    pos: pointer
+    maxStackSize: int
+
+  GcHeap {.final, pure.} = object # this contains the zero count and
                                    # non-zero count table
+    stack: ptr GcStack
     stackBottom: pointer
     cycleThreshold: int
     when useCellIds:
       idGenerator: int
-    zct: TCellSeq            # the zero count table
-    decStack: TCellSeq       # cells in the stack that are to decref again
-    cycleRoots: TCellSet
-    tempStack: TCellSeq      # temporary stack for recursion elimination
+    zct: CellSeq             # the zero count table
+    decStack: CellSeq        # cells in the stack that are to decref again
+    cycleRoots: CellSet
+    tempStack: CellSeq       # temporary stack for recursion elimination
     recGcLock: int           # prevent recursion via finalizers; no thread lock
     when withRealTime:
-      maxPause: TNanos       # max allowed pause in nanoseconds; active if > 0
-    region: TMemRegion       # garbage collected region
-    stat: TGcStat
+      maxPause: Nanos        # max allowed pause in nanoseconds; active if > 0
+    region: MemRegion        # garbage collected region
+    stat: GcStat
     when useMarkForDebug or useBackupGc:
-      marked: TCellSet
-
+      marked: CellSet
+{.deprecated: [TWalkOp: WalkOp, TFinalizer: Finalizer, TGcHeap: GcHeap,
+              TGcStat: GcStat].}
 var
-  gch {.rtlThreadVar.}: TGcHeap
+  gch {.rtlThreadVar.}: GcHeap
 
 when not defined(useNimRtl):
   instantiateForRegion(gch.region)
 
-template acquire(gch: TGcHeap) =
+template acquire(gch: GcHeap) =
   when hasThreadSupport and hasSharedHeap:
     acquireSys(HeapLock)
 
-template release(gch: TGcHeap) =
+template release(gch: GcHeap) =
   when hasThreadSupport and hasSharedHeap:
     releaseSys(HeapLock)
 
@@ -104,18 +117,18 @@ template gcAssert(cond: bool, msg: string) =
       writeStackTrace()
       quit 1
 
-proc addZCT(s: var TCellSeq, c: PCell) {.noinline.} =
+proc addZCT(s: var CellSeq, c: PCell) {.noinline.} =
   if (c.refcount and ZctFlag) == 0:
     c.refcount = c.refcount or ZctFlag
     add(s, c)
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(TCell)))
+  result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell)))
 
 proc usrToCell(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(TCell)))
+  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
 
 proc canBeCycleRoot(c: PCell): bool {.inline.} =
   result = ntfAcyclic notin c.typ.flags
@@ -152,11 +165,11 @@ template gcTrace(cell, state: expr): stmt {.immediate.} =
   when traceGC: traceCell(cell, state)
 
 # forward declarations:
-proc collectCT(gch: var TGcHeap) {.benign.}
-proc isOnStack*(p: pointer): bool {.noinline, benign.}
-proc forAllChildren(cell: PCell, op: TWalkOp) {.benign.}
-proc doOperation(p: pointer, op: TWalkOp) {.benign.}
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp) {.benign.}
+proc collectCT(gch: var GcHeap) {.benign.}
+proc isOnStack(p: pointer): bool {.noinline, benign.}
+proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
+proc doOperation(p: pointer, op: WalkOp) {.benign.}
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
 # we need the prototype here for debugging purposes
 
 when hasThreadSupport and hasSharedHeap:
@@ -178,7 +191,7 @@ proc prepareDealloc(cell: PCell) =
     # prevend recursive entering here by a lock.
     # XXX: we should set the cell's children to nil!
     inc(gch.recGcLock)
-    (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
+    (cast[Finalizer](cell.typ.finalizer))(cellToUsr(cell))
     dec(gch.recGcLock)
 
 proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} =
@@ -276,7 +289,7 @@ proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerProc.} =
 proc initGC() =
   when not defined(useNimRtl):
     when traceGC:
-      for i in low(TCellState)..high(TCellState): init(states[i])
+      for i in low(CellState)..high(CellState): init(states[i])
     gch.cycleThreshold = InitialCycleThreshold
     gch.stat.stackScans = 0
     gch.stat.cycleCollections = 0
@@ -292,28 +305,15 @@ proc initGC() =
     when useMarkForDebug or useBackupGc:
       init(gch.marked)
 
-var
-  localGcInitialized {.rtlThreadVar.}: bool
-
-proc setupForeignThreadGc*() =
-  ## call this if you registered a callback that will be run from a thread not
-  ## under your control. This has a cheap thread-local guard, so the GC for
-  ## this thread will only be initialized once per thread, no matter how often
-  ## it is called.
-  if not localGcInitialized:
-    localGcInitialized = true
-    var stackTop {.volatile.}: pointer
-    setStackBottom(addr(stackTop))
-    initGC()
-
 when useMarkForDebug or useBackupGc:
   type
-    TGlobalMarkerProc = proc () {.nimcall, benign.}
+    GlobalMarkerProc = proc () {.nimcall, benign.}
+  {.deprecated: [TGlobalMarkerProc: GlobalMarkerProc].}
   var
     globalMarkersLen: int
-    globalMarkers: array[0.. 7_000, TGlobalMarkerProc]
+    globalMarkers: array[0.. 7_000, GlobalMarkerProc]
 
-  proc nimRegisterGlobalMarker(markerProc: TGlobalMarkerProc) {.compilerProc.} =
+  proc nimRegisterGlobalMarker(markerProc: GlobalMarkerProc) {.compilerProc.} =
     if globalMarkersLen <= high(globalMarkers):
       globalMarkers[globalMarkersLen] = markerProc
       inc globalMarkersLen
@@ -321,11 +321,11 @@ when useMarkForDebug or useBackupGc:
       echo "[GC] cannot register global variable; too many global variables"
       quit 1
 
-proc cellsetReset(s: var TCellSet) =
+proc cellsetReset(s: var CellSet) =
   deinit(s)
   init(s)
 
-proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) {.benign.} =
+proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
   var d = cast[ByteAddress](dest)
   case n.kind
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
@@ -345,7 +345,7 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) {.benign.} =
     if m != nil: forAllSlotsAux(dest, m, op)
   of nkNone: sysAssert(false, "forAllSlotsAux")
 
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp) =
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
   var d = cast[ByteAddress](dest)
   if dest == nil: return # nothing to do
   if ntfNoRefs notin mt.flags:
@@ -359,7 +359,7 @@ proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp) =
         forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op)
     else: discard
 
-proc forAllChildren(cell: PCell, op: TWalkOp) =
+proc forAllChildren(cell: PCell, op: WalkOp) =
   gcAssert(cell != nil, "forAllChildren: 1")
   gcAssert(isAllocatedPtr(gch.region, cell), "forAllChildren: 2")
   gcAssert(cell.typ != nil, "forAllChildren: 3")
@@ -380,7 +380,7 @@ proc forAllChildren(cell: PCell, op: TWalkOp) =
             GenericSeqSize), cell.typ.base, op)
     else: discard
 
-proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
+proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} =
   # we check the last 8 entries (cache line) for a slot that could be reused.
   # In 63% of all cases we succeed here! But we have to optimize the heck
   # out of this small linear search so that ``newObj`` is not slowed down.
@@ -431,13 +431,13 @@ proc gcInvariant*() =
     markForDebug(gch)
 {.pop.}
 
-proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
+proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   # generates a new object and sets its reference counter to 0
   sysAssert(allocInv(gch.region), "rawNewObj begin")
   acquire(gch)
   gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
   collectCT(gch)
-  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
   gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
@@ -486,7 +486,7 @@ proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
   collectCT(gch)
   sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")
 
-  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
   sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
   sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
@@ -515,7 +515,7 @@ proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
   cast[PGenericSeq](result).reserved = len
   when defined(memProfiler): nimProfile(size)
 
-proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
+proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
   acquire(gch)
   collectCT(gch)
   var ol = usrToCell(old)
@@ -523,13 +523,13 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
   gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
   sysAssert(allocInv(gch.region), "growObj begin")
 
-  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell)))
+  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
   var elemSize = 1
   if ol.typ.kind != tyString: elemSize = ol.typ.base.size
 
   var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
-  copyMem(res, ol, oldsize + sizeof(TCell))
-  zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(TCell)),
+  copyMem(res, ol, oldsize + sizeof(Cell))
+  zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)),
           newsize-oldsize)
   sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
   # This can be wrong for intermediate temps that are nevertheless on the
@@ -564,7 +564,7 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
       decRef(ol)
   else:
     sysAssert(ol.typ != nil, "growObj: 5")
-    zeroMem(ol, sizeof(TCell))
+    zeroMem(ol, sizeof(Cell))
   release(gch)
   when useCellIds:
     inc gch.idGenerator
@@ -580,7 +580,7 @@ proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
 
 # ---------------- cycle collector -------------------------------------------
 
-proc freeCyclicCell(gch: var TGcHeap, c: PCell) =
+proc freeCyclicCell(gch: var GcHeap, c: PCell) =
   prepareDealloc(c)
   gcTrace(c, csCycFreed)
   when logGC: writeCell("cycle collector dealloc cell", c)
@@ -589,7 +589,7 @@ proc freeCyclicCell(gch: var TGcHeap, c: PCell) =
     rawDealloc(gch.region, c)
   else:
     gcAssert(c.typ != nil, "freeCyclicCell")
-    zeroMem(c, sizeof(TCell))
+    zeroMem(c, sizeof(Cell))
 
 proc markGray(s: PCell) =
   if s.color != rcGray:
@@ -620,7 +620,7 @@ proc collectWhite(s: PCell) =
     forAllChildren(s, waCollectWhite)
     freeCyclicCell(gch, s)
 
-proc markRoots(gch: var TGcHeap) =
+proc markRoots(gch: var GcHeap) =
   var tabSize = 0
   for s in elements(gch.cycleRoots):
     #writeCell("markRoot", s)
@@ -635,7 +635,7 @@ proc markRoots(gch: var TGcHeap) =
   gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize)
 
 when useBackupGc:
-  proc sweep(gch: var TGcHeap) =
+  proc sweep(gch: var GcHeap) =
     for x in allObjects(gch.region):
       if isCell(x):
         # cast to PCell is correct here:
@@ -643,7 +643,7 @@ when useBackupGc:
         if c notin gch.marked: freeCyclicCell(gch, c)
 
 when useMarkForDebug or useBackupGc:
-  proc markS(gch: var TGcHeap, c: PCell) =
+  proc markS(gch: var GcHeap, c: PCell) =
     incl(gch.marked, c)
     gcAssert gch.tempStack.len == 0, "stack not empty!"
     forAllChildren(c, waMarkPrecise)
@@ -653,10 +653,10 @@ when useMarkForDebug or useBackupGc:
       if not containsOrIncl(gch.marked, d):
         forAllChildren(d, waMarkPrecise)
 
-  proc markGlobals(gch: var TGcHeap) =
+  proc markGlobals(gch: var GcHeap) =
     for i in 0 .. < globalMarkersLen: globalMarkers[i]()
 
-  proc stackMarkS(gch: var TGcHeap, p: pointer) {.inline.} =
+  proc stackMarkS(gch: var GcHeap, p: pointer) {.inline.} =
     # the addresses are not as cells on the stack, so turn them to cells:
     var cell = usrToCell(p)
     var c = cast[TAddress](cell)
@@ -688,7 +688,7 @@ when logGC:
       forAllChildren(s, waDebug)
       c_fprintf(c_stdout, "}\n")
 
-proc doOperation(p: pointer, op: TWalkOp) =
+proc doOperation(p: pointer, op: WalkOp) =
   if p == nil: return
   var c: PCell = usrToCell(p)
   gcAssert(c != nil, "doOperation: 1")
@@ -733,19 +733,19 @@ proc doOperation(p: pointer, op: TWalkOp) =
   #of waDebug: debugGraph(c)
 
 proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
-  doOperation(d, TWalkOp(op))
+  doOperation(d, WalkOp(op))
 
-proc collectZCT(gch: var TGcHeap): bool {.benign.}
+proc collectZCT(gch: var GcHeap): bool {.benign.}
 
 when useMarkForDebug or useBackupGc:
-  proc markStackAndRegistersForSweep(gch: var TGcHeap) {.noinline, cdecl,
+  proc markStackAndRegistersForSweep(gch: var GcHeap) {.noinline, cdecl,
                                                          benign.}
 
-proc collectRoots(gch: var TGcHeap) =
+proc collectRoots(gch: var GcHeap) =
   for s in elements(gch.cycleRoots):
     collectWhite(s)
 
-proc collectCycles(gch: var TGcHeap) =
+proc collectCycles(gch: var GcHeap) =
   # ensure the ZCT 'color' is not used:
   while gch.zct.len > 0: discard collectZCT(gch)
   when useBackupGc:
@@ -778,7 +778,7 @@ proc collectCycles(gch: var TGcHeap) =
     if cycleRootsLen != 0:
       cfprintf(cstdout, "cycle roots: %ld\n", cycleRootsLen)
 
-proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
+proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
   sysAssert(allocInv(gch.region), "gcMark begin")
   var cell = usrToCell(p)
@@ -798,7 +798,7 @@ proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
         add(gch.decStack, cell)
   sysAssert(allocInv(gch.region), "gcMark end")
 
-proc markThreadStacks(gch: var TGcHeap) =
+proc markThreadStacks(gch: var GcHeap) =
   when hasThreadSupport and hasSharedHeap:
     {.error: "not fully implemented".}
     var it = threadList
@@ -814,142 +814,16 @@ proc markThreadStacks(gch: var TGcHeap) =
         sp = sp +% sizeof(pointer)
       it = it.next
 
-# ----------------- stack management --------------------------------------
-#  inspired from Smart Eiffel
-
-when defined(sparc):
-  const stackIncreases = false
-elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
-     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
-  const stackIncreases = true
-else:
-  const stackIncreases = false
-
-when not defined(useNimRtl):
-  {.push stack_trace: off.}
-  proc setStackBottom(theStackBottom: pointer) =
-    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
-    # the first init must be the one that defines the stack bottom:
-    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
-    else:
-      var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[ByteAddress](gch.stackBottom)
-      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
-      when stackIncreases:
-        gch.stackBottom = cast[pointer](min(a, b))
-      else:
-        gch.stackBottom = cast[pointer](max(a, b))
-  {.pop.}
-
-proc stackSize(): int {.noinline.} =
-  var stackTop {.volatile.}: pointer
-  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
-
-when defined(sparc): # For SPARC architecture.
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[TAddress](gch.stackBottom)
-    var a = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-
-  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
-    when defined(sparcv9):
-      asm  """"flushw \n" """
-    else:
-      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
-
-    var
-      max = gch.stackBottom
-      sp: PPointer
-      stackTop: array[0..1, pointer]
-    sp = addr(stackTop[0])
-    # Addresses decrease as the stack grows.
-    while sp <= max:
-      gcMark(gch, sp[])
-      sp = cast[PPointer](cast[TAddress](sp) +% sizeof(pointer))
-
-elif defined(ELATE):
-  {.error: "stack marking code is to be written for this architecture".}
-
-elif stackIncreases:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses increase as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var a = cast[TAddress](gch.stackBottom)
-    var b = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-
-  var
-    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-      # a little hack to get the size of a TJmpBuf in the generated C code
-      # in a platform independent way
-
-  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[TAddress](gch.stackBottom)
-      var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
-      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
-      # otherwise sp would be below the registers structure).
-      while sp >=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp -% sizeof(pointer)
-
-else:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses decrease as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
-    # We use a jmp_buf buffer that is in the C stack.
-    # Used to traverse the stack and registers assuming
-    # that 'setjmp' will save registers in the C stack.
-    type PStackSlice = ptr array [0..7, pointer]
-    var registers {.noinit.}: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[ByteAddress](gch.stackBottom)
-      var sp = cast[ByteAddress](addr(registers))
-      # loop unrolled:
-      while sp <% max - 8*sizeof(pointer):
-        gcMark(gch, cast[PStackSlice](sp)[0])
-        gcMark(gch, cast[PStackSlice](sp)[1])
-        gcMark(gch, cast[PStackSlice](sp)[2])
-        gcMark(gch, cast[PStackSlice](sp)[3])
-        gcMark(gch, cast[PStackSlice](sp)[4])
-        gcMark(gch, cast[PStackSlice](sp)[5])
-        gcMark(gch, cast[PStackSlice](sp)[6])
-        gcMark(gch, cast[PStackSlice](sp)[7])
-        sp = sp +% sizeof(pointer)*8
-      # last few entries:
-      while sp <=% max:
-        gcMark(gch, cast[PPointer](sp)[])
-        sp = sp +% sizeof(pointer)
+include gc_common
 
-proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
+proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
   forEachStackSlot(gch, gcMark)
 
 when useMarkForDebug or useBackupGc:
-  proc markStackAndRegistersForSweep(gch: var TGcHeap) =
+  proc markStackAndRegistersForSweep(gch: var GcHeap) =
     forEachStackSlot(gch, stackMarkS)
 
-# ----------------------------------------------------------------------------
-# end of non-portable code
-# ----------------------------------------------------------------------------
-
-proc collectZCT(gch: var TGcHeap): bool =
+proc collectZCT(gch: var GcHeap): bool =
   # Note: Freeing may add child objects to the ZCT! So essentially we do
   # deep freeing, which is bad for incremental operation. In order to
   # avoid a deep stack, we move objects to keep the ZCT small.
@@ -959,7 +833,7 @@ proc collectZCT(gch: var TGcHeap): bool =
 
   when withRealTime:
     var steps = workPackage
-    var t0: TTicks
+    var t0: Ticks
     if gch.maxPause > 0: t0 = getticks()
   while L[] > 0:
     var c = gch.zct.d[0]
@@ -992,7 +866,7 @@ proc collectZCT(gch: var TGcHeap): bool =
         rawDealloc(gch.region, c)
       else:
         sysAssert(c.typ != nil, "collectZCT 2")
-        zeroMem(c, sizeof(TCell))
+        zeroMem(c, sizeof(Cell))
     when withRealTime:
       if steps == 0:
         steps = workPackage
@@ -1005,7 +879,7 @@ proc collectZCT(gch: var TGcHeap): bool =
             return false
   result = true
 
-proc unmarkStackAndRegisters(gch: var TGcHeap) =
+proc unmarkStackAndRegisters(gch: var GcHeap) =
   var d = gch.decStack.d
   for i in 0..gch.decStack.len-1:
     sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters"
@@ -1017,12 +891,13 @@ proc unmarkStackAndRegisters(gch: var TGcHeap) =
     #sysAssert c.typ != nil, "unmarkStackAndRegisters 2"
   gch.decStack.len = 0
 
-proc collectCTBody(gch: var TGcHeap) =
+proc collectCTBody(gch: var GcHeap) =
   when withRealTime:
     let t0 = getticks()
   sysAssert(allocInv(gch.region), "collectCT: begin")
 
-  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+  when not defined(nimCoroutines):
+    gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
   sysAssert(gch.decStack.len == 0, "collectCT")
   prepareForInteriorPointerChecking(gch.region)
   markStackAndRegisters(gch)
@@ -1049,15 +924,23 @@ proc collectCTBody(gch: var TGcHeap) =
         c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration)
 
 when useMarkForDebug or useBackupGc:
-  proc markForDebug(gch: var TGcHeap) =
+  proc markForDebug(gch: var GcHeap) =
     markStackAndRegistersForSweep(gch)
     markGlobals(gch)
 
-proc collectCT(gch: var TGcHeap) =
+when defined(nimCoroutines):
+  proc currentStackSizes(): int =
+    for stack in items(gch.stack):
+      result = result + stackSize(stack.starts, stack.pos)
+
+proc collectCT(gch: var GcHeap) =
   # stackMarkCosts prevents some pathological behaviour: Stack marking
   # becomes more expensive with large stacks and large stacks mean that
   # cells with RC=0 are more likely to be kept alive by the stack.
-  let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
+  when defined(nimCoroutines):
+    let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold)
+  else:
+    let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
   if (gch.zct.len >= stackMarkCosts or (cycleGC and
       getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
       gch.recGcLock == 0:
@@ -1068,13 +951,13 @@ proc collectCT(gch: var TGcHeap) =
     collectCTBody(gch)
 
 when withRealTime:
-  proc toNano(x: int): TNanos {.inline.} =
+  proc toNano(x: int): Nanos {.inline.} =
     result = x * 1000
 
   proc GC_setMaxPause*(MaxPauseInUs: int) =
     gch.maxPause = MaxPauseInUs.toNano
 
-  proc GC_step(gch: var TGcHeap, us: int, strongAdvice: bool) =
+  proc GC_step(gch: var GcHeap, us: int, strongAdvice: bool) =
     acquire(gch)
     gch.maxPause = us.toNano
     if (gch.zct.len >= ZctThreshold or (cycleGC and
@@ -1126,8 +1009,13 @@ when not defined(useNimRtl):
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
              "[GC] zct capacity: " & $gch.zct.cap & "\n" &
              "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
-             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" &
              "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
+    when defined(nimCoroutines):
+      result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
+      for stack in items(gch.stack):
+        result = result & "[GC]   stack " & stack.starts.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
+    else:
+      result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
     GC_enable()
 
 {.pop.}
diff --git a/lib/system/gc2.nim b/lib/system/gc2.nim
index 4e3dee51c..4ca0d144f 100644
--- a/lib/system/gc2.nim
+++ b/lib/system/gc2.nim
@@ -37,24 +37,24 @@ const
 
   rcAlive = 0b00000           # object is reachable.
                               # color *black* in the original paper
-                              
+
   rcCycleCandidate = 0b00001  # possible root of a cycle. *purple*
 
   rcDecRefApplied = 0b00010   # the first dec-ref phase of the
                               # collector was already applied to this
                               # object. *gray*
-                              
+
   rcMaybeDead = 0b00011       # this object is a candidate for deletion
                               # during the collect cycles algorithm.
                               # *white*.
-                              
+
   rcReallyDead = 0b00100      # this is proved to be garbage
-  
+
   rcRetiredBuffer = 0b00101   # this is a seq or string buffer that
                               # was replaced by a resize operation.
                               # see growObj for details
 
-  rcColorMask = TRefCount(0b00111)
+  rcColorMask = RefCount(0b00111)
 
   rcZct = 0b01000             # already added to ZCT
   rcInCycleRoots = 0b10000    # already buffered as cycle candidate
@@ -80,14 +80,14 @@ const
     # The bit must also be set for new objects that are not rc1 and it must be
     # examined in the decref loop in collectCycles.
     # XXX: not implemented yet as tests didn't show any improvement from this
-   
+
   MarkingSkipsAcyclicObjects = true
-    # Acyclic objects can be safely ignored in the mark and scan phases, 
+    # Acyclic objects can be safely ignored in the mark and scan phases,
     # because they cannot contribute to the internal count.
     # XXX: if we generate specialized `markCyclic` and `markAcyclic`
     # procs we can further optimize this as there won't be need for any
     # checks in the code
-  
+
   MinimumStackMarking = false
     # Try to scan only the user stack and ignore the part of the stack
     # belonging to the GC itself. see setStackTop for further info.
@@ -97,34 +97,34 @@ const
   CollectCyclesStats = false
 
 type
-  TWalkOp = enum
+  WalkOp = enum
     waPush
 
-  TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall.}
+  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall.}
     # A ref type can have a finalizer that is called before the object's
     # storage is freed.
 
-  TGcStat {.final, pure.} = object
+  GcStat {.final, pure.} = object
     stackScans: int          # number of performed stack scans (for statistics)
     cycleCollections: int    # number of performed full collections
     maxThreshold: int        # max threshold that has been set
     maxStackSize: int        # max stack size
     maxStackCells: int       # max stack cells in ``decStack``
-    cycleTableSize: int      # max entries in cycle table  
+    cycleTableSize: int      # max entries in cycle table
     maxPause: int64          # max measured GC pause in nanoseconds
-  
-  TGcHeap {.final, pure.} = object # this contains the zero count and
+
+  GcHeap {.final, pure.} = object # this contains the zero count and
                                    # non-zero count table
     stackBottom: pointer
     stackTop: pointer
     cycleThreshold: int
-    zct: TCellSeq            # the zero count table
-    decStack: TCellSeq       # cells in the stack that are to decref again
-    cycleRoots: TCellSeq
-    tempStack: TCellSeq      # temporary stack for recursion elimination
-    freeStack: TCellSeq      # objects ready to be freed
+    zct: CellSeq             # the zero count table
+    decStack: CellSeq        # cells in the stack that are to decref again
+    cycleRoots: CellSeq
+    tempStack: CellSeq       # temporary stack for recursion elimination
+    freeStack: CellSeq       # objects ready to be freed
     recGcLock: int           # prevent recursion via finalizers; no thread lock
-    cycleRootsTrimIdx: int   # Trimming is a light-weight collection of the 
+    cycleRootsTrimIdx: int   # Trimming is a light-weight collection of the
                              # cycle roots table that uses a cheap linear scan
                              # to find only possitively dead objects.
                              # One strategy is to perform it only for new objects
@@ -132,21 +132,22 @@ type
                              # This index indicates the start of the range of
                              # such new objects within the table.
     when withRealTime:
-      maxPause: TNanos       # max allowed pause in nanoseconds; active if > 0
-    region: TMemRegion       # garbage collected region
-    stat: TGcStat
-
+      maxPause: Nanos        # max allowed pause in nanoseconds; active if > 0
+    region: MemRegion        # garbage collected region
+    stat: GcStat
+{.deprecated: [TWalkOp: WalkOp, TFinalizer: Finalizer, TGcStat: GcStat,
+              TGcHeap: GcHeap].}
 var
-  gch* {.rtlThreadVar.}: TGcHeap
+  gch* {.rtlThreadVar.}: GcHeap
 
 when not defined(useNimRtl):
   instantiateForRegion(gch.region)
 
-template acquire(gch: TGcHeap) = 
+template acquire(gch: GcHeap) =
   when hasThreadSupport and hasSharedHeap:
     AcquireSys(HeapLock)
 
-template release(gch: TGcHeap) = 
+template release(gch: GcHeap) =
   when hasThreadSupport and hasSharedHeap:
     releaseSys(HeapLock)
 
@@ -169,7 +170,7 @@ template isDead(c: Pcell): expr =
   c.isBitUp(rcReallyDead) # also covers rcRetiredBuffer
 
 template clearBit(c: PCell, bit): expr =
-  c.refcount = c.refcount and (not TRefCount(bit))
+  c.refcount = c.refcount and (not RefCount(bit))
 
 when debugGC:
   var gcCollectionIdx = 0
@@ -184,7 +185,7 @@ when debugGC:
     of rcRetiredBuffer: return "retired"
     of rcReallyDead: return "dead"
     else: return "unknown?"
-  
+
   proc inCycleRootsStr(c: PCell): cstring =
     if c.isBitUp(rcInCycleRoots): result = "cycleroot"
     else: result = ""
@@ -206,7 +207,7 @@ when debugGC:
       c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n",
                 msg, c, kind, c.refcount shr rcShift)
 
-proc addZCT(zct: var TCellSeq, c: PCell) {.noinline.} =
+proc addZCT(zct: var CellSeq, c: PCell) {.noinline.} =
   if c.isBitDown(rcZct):
     c.setBit rcZct
     zct.add c
@@ -221,10 +222,10 @@ template setStackTop(gch) =
     var stackTop {.volatile.}: pointer
     gch.stackTop = addr(stackTop)
 
-template addCycleRoot(cycleRoots: var TCellSeq, c: PCell) =
+template addCycleRoot(cycleRoots: var CellSeq, c: PCell) =
   if c.color != rcCycleCandidate:
     c.setColor rcCycleCandidate
-    
+
     # the object may be buffered already. for example, consider:
     # decref; incref; decref
     if c.isBitDown(rcInCycleRoots):
@@ -233,11 +234,11 @@ template addCycleRoot(cycleRoots: var TCellSeq, c: PCell) =
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(TCell)))
+  result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell)))
 
 proc usrToCell*(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(TCell)))
+  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
 
 proc canbeCycleRoot(c: PCell): bool {.inline.} =
   result = ntfAcyclic notin c.typ.flags
@@ -254,11 +255,11 @@ when BitsPerPage mod (sizeof(int)*8) != 0:
   {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
 
 # forward declarations:
-proc collectCT(gch: var TGcHeap)
+proc collectCT(gch: var GcHeap)
 proc isOnStack*(p: pointer): bool {.noinline.}
-proc forAllChildren(cell: PCell, op: TWalkOp)
-proc doOperation(p: pointer, op: TWalkOp)
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp)
+proc forAllChildren(cell: PCell, op: WalkOp)
+proc doOperation(p: pointer, op: WalkOp)
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp)
 # we need the prototype here for debugging purposes
 
 proc prepareDealloc(cell: PCell) =
@@ -269,18 +270,19 @@ proc prepareDealloc(cell: PCell) =
     # prevend recursive entering here by a lock.
     # XXX: we should set the cell's children to nil!
     inc(gch.recGcLock)
-    (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
+    (cast[Finalizer](cell.typ.finalizer))(cellToUsr(cell))
     dec(gch.recGcLock)
 
 when traceGC:
   # traceGC is a special switch to enable extensive debugging
   type
-    TCellState = enum
+    CellState = enum
       csAllocated, csFreed
+  {.deprecated: [TCellState: CellState].}
   var
-    states: array[TCellState, TCellSet]
+    states: array[CellState, CellSet]
 
-  proc traceCell(c: PCell, state: TCellState) =
+  proc traceCell(c: PCell, state: CellState) =
     case state
     of csAllocated:
       if c in states[csAllocated]:
@@ -300,12 +302,12 @@ when traceGC:
     incl(states[state], c)
 
   proc computeCellWeight(c: PCell): int =
-    var x: TCellSet
+    var x: CellSet
     x.init
 
     let startLen = gch.tempStack.len
     c.forAllChildren waPush
-    
+
     while startLen != gch.tempStack.len:
       dec gch.tempStack.len
       var c = gch.tempStack.d[gch.tempStack.len]
@@ -329,7 +331,7 @@ when traceGC:
     if c.isBitUp(rcMarkBit) and not isMarked:
       writecell("cyclic cell", cell)
       cprintf "Weight %d\n", cell.computeCellWeight
-      
+
   proc writeLeakage(onlyRoots: bool) =
     if onlyRoots:
       for c in elements(states[csAllocated]):
@@ -354,7 +356,7 @@ template WithHeapLock(blk: stmt): stmt =
   blk
   when hasThreadSupport and hasSharedHeap: ReleaseSys(HeapLock)
 
-proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} = 
+proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} =
   # we MUST access gch as a global here, because this crosses DLL boundaries!
   WithHeapLock: addCycleRoot(gch.cycleRoots, c)
 
@@ -363,30 +365,32 @@ proc rtlAddZCT(c: PCell) {.rtl, inl.} =
   WithHeapLock: addZCT(gch.zct, c)
 
 type
-  TCyclicMode = enum
+  CyclicMode = enum
     Cyclic,
     Acyclic,
     MaybeCyclic
 
-  TReleaseType = enum
+  ReleaseType = enum
     AddToZTC
     FreeImmediately
 
-  THeapType = enum
+  HeapType = enum
     LocalHeap
     SharedHeap
+{.deprecated: [TCyclicMode: CyclicMode, TReleaseType: ReleaseType,
+              THeapType: HeapType].}
 
-template `++` (rc: TRefCount, heapType: THeapType): stmt =
+template `++` (rc: RefCount, heapType: HeapType): stmt =
   when heapType == SharedHeap:
     discard atomicInc(rc, rcIncrement)
   else:
     inc rc, rcIncrement
 
-template `--`(rc: TRefCount): expr =
+template `--`(rc: RefCount): expr =
   dec rc, rcIncrement
   rc <% rcIncrement
 
-template `--` (rc: TRefCount, heapType: THeapType): expr =
+template `--` (rc: RefCount, heapType: HeapType): expr =
   (when heapType == SharedHeap: atomicDec(rc, rcIncrement) <% rcIncrement else: --rc)
 
 template doDecRef(cc: PCell,
@@ -419,7 +423,7 @@ template doIncRef(cc: PCell,
     elif IncRefRemovesCandidates:
       c.setColor rcAlive
   # XXX: this is not really atomic enough!
-  
+
 proc nimGCref(p: pointer) {.compilerProc, inline.} = doIncRef(usrToCell(p))
 proc nimGCunref(p: pointer) {.compilerProc, inline.} = doDecRef(usrToCell(p))
 
@@ -445,7 +449,7 @@ proc asgnRef(dest: PPointer, src: pointer) {.compilerProc, inline.} =
   doAsgnRef(dest, src, LocalHeap, MaybeCyclic)
 
 proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerProc, inline.} =
-  # the code generator calls this proc if it is known at compile time that no 
+  # the code generator calls this proc if it is known at compile time that no
   # cycle is possible.
   doAsgnRef(dest, src, LocalHeap, Acyclic)
 
@@ -479,7 +483,7 @@ when hasThreadSupport and hasSharedHeap:
 proc initGC() =
   when not defined(useNimRtl):
     when traceGC:
-      for i in low(TCellState)..high(TCellState): init(states[i])
+      for i in low(CellState)..high(CellState): init(states[i])
     gch.cycleThreshold = InitialCycleThreshold
     gch.stat.stackScans = 0
     gch.stat.cycleCollections = 0
@@ -494,7 +498,7 @@ proc initGC() =
     init(gch.cycleRoots)
     init(gch.decStack)
 
-proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
+proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) =
   var d = cast[ByteAddress](dest)
   case n.kind
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
@@ -505,7 +509,7 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
         if n.sons[i].typ.kind in {tyRef, tyString, tySequence}:
           doOperation(cast[PPointer](d +% n.sons[i].offset)[], op)
         else:
-          forAllChildrenAux(cast[pointer](d +% n.sons[i].offset), 
+          forAllChildrenAux(cast[pointer](d +% n.sons[i].offset),
                             n.sons[i].typ, op)
       else:
         forAllSlotsAux(dest, n.sons[i], op)
@@ -514,7 +518,7 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
     if m != nil: forAllSlotsAux(dest, m, op)
   of nkNone: sysAssert(false, "forAllSlotsAux")
 
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp) =
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
   var d = cast[ByteAddress](dest)
   if dest == nil: return # nothing to do
   if ntfNoRefs notin mt.flags:
@@ -528,7 +532,7 @@ proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp) =
         forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op)
     else: discard
 
-proc forAllChildren(cell: PCell, op: TWalkOp) =
+proc forAllChildren(cell: PCell, op: WalkOp) =
   sysAssert(cell != nil, "forAllChildren: 1")
   sysAssert(cell.typ != nil, "forAllChildren: 2")
   sysAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3"
@@ -549,11 +553,11 @@ proc forAllChildren(cell: PCell, op: TWalkOp) =
                             cell.typ.base, op)
     else: discard
 
-proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
+proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} =
   # we check the last 8 entries (cache line) for a slot that could be reused.
   # In 63% of all cases we succeed here! But we have to optimize the heck
   # out of this small linear search so that ``newObj`` is not slowed down.
-  # 
+  #
   # Slots to try          cache hit
   # 1                     32%
   # 4                     59%
@@ -593,29 +597,29 @@ proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
         return
     add(gch.zct, res)
 
-proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap, rc1 = false): pointer =
+proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap, rc1 = false): pointer =
   # generates a new object and sets its reference counter to 0
   acquire(gch)
   sysAssert(allocInv(gch.region), "rawNewObj begin")
   sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
-  
+
   collectCT(gch)
   sysAssert(allocInv(gch.region), "rawNewObj after collect")
 
-  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
   sysAssert(allocInv(gch.region), "rawNewObj after rawAlloc")
 
   sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
-  
+
   res.typ = typ
-  
+
   when trackAllocationSource and not hasThreadSupport:
     if framePtr != nil and framePtr.prev != nil and framePtr.prev.prev != nil:
       res.filename = framePtr.prev.prev.filename
       res.line = framePtr.prev.prev.line
     else:
       res.filename = "nofile"
-  
+
   if rc1:
     res.refcount = rcIncrement # refcount is 1
   else:
@@ -627,9 +631,9 @@ proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap, rc1 = false): pointer
       res.setBit(rcInCycleRoots)
       res.setColor rcCycleCandidate
       gch.cycleRoots.add res
-    
+
   sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
-  
+
   when logGC: writeCell("new cell", res)
   gcTrace(res, csAllocated)
   release(gch)
@@ -638,20 +642,20 @@ proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap, rc1 = false): pointer
 
 {.pop.}
 
-proc freeCell(gch: var TGcHeap, c: PCell) =
+proc freeCell(gch: var GcHeap, c: PCell) =
   # prepareDealloc(c)
   gcTrace(c, csFreed)
 
   when reallyDealloc: rawDealloc(gch.region, c)
   else:
     sysAssert(c.typ != nil, "collectCycles")
-    zeroMem(c, sizeof(TCell))
+    zeroMem(c, sizeof(Cell))
 
-template eraseAt(cells: var TCellSeq, at: int): stmt =
+template eraseAt(cells: var CellSeq, at: int): stmt =
   cells.d[at] = cells.d[cells.len - 1]
   dec cells.len
 
-template trimAt(roots: var TCellSeq, at: int): stmt =
+template trimAt(roots: var CellSeq, at: int): stmt =
   # This will remove a cycle root candidate during trimming.
   # a candidate is removed either because it received a refup and
   # it's no longer a candidate or because it received further refdowns
@@ -696,7 +700,7 @@ proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
 
-proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
+proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
   acquire(gch)
   collectCT(gch)
   var ol = usrToCell(old)
@@ -704,39 +708,39 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
   sysAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
   sysAssert(allocInv(gch.region), "growObj begin")
 
-  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell)))
+  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
   var elemSize = if ol.typ.kind != tyString: ol.typ.base.size
                  else: 1
-  
+
   var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
-  
+
   # XXX: This should happen outside
   # call user-defined move code
   # call user-defined default constructor
-  copyMem(res, ol, oldsize + sizeof(TCell))
-  zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(TCell)),
+  copyMem(res, ol, oldsize + sizeof(Cell))
+  zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)),
           newsize-oldsize)
 
   sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
   sysAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
-  
+
   when false:
     if ol.isBitUp(rcZct):
       var j = gch.zct.len-1
       var d = gch.zct.d
-      while j >= 0: 
+      while j >= 0:
         if d[j] == ol:
           d[j] = res
           break
         dec(j)
-    
+
     if ol.isBitUp(rcInCycleRoots):
       for i in 0 .. <gch.cycleRoots.len:
         if gch.cycleRoots.d[i] == ol:
           eraseAt(gch.cycleRoots, i)
 
     freeCell(gch, ol)
-  
+
   else:
     # the new buffer inherits the GC state of the old one
     if res.isBitUp(rcZct): gch.zct.add res
@@ -778,26 +782,27 @@ proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
 
 # ---------------- cycle collector -------------------------------------------
 
-proc doOperation(p: pointer, op: TWalkOp) =
+proc doOperation(p: pointer, op: WalkOp) =
   if p == nil: return
   var c: PCell = usrToCell(p)
   sysAssert(c != nil, "doOperation: 1")
   gch.tempStack.add c
-  
+
 proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
-  doOperation(d, TWalkOp(op))
+  doOperation(d, WalkOp(op))
 
 type
-  TRecursionType = enum 
+  RecursionType = enum
     FromChildren,
     FromRoot
+{.deprecated: [TRecursionType: RecursionType].}
 
-proc collectZCT(gch: var TGcHeap): bool
+proc collectZCT(gch: var GcHeap): bool
 
-template pseudoRecursion(typ: TRecursionType, body: stmt): stmt =
+template pseudoRecursion(typ: RecursionType, body: stmt): stmt =
   discard
 
-proc trimCycleRoots(gch: var TGcHeap, startIdx = gch.cycleRootsTrimIdx) =
+proc trimCycleRoots(gch: var GcHeap, startIdx = gch.cycleRootsTrimIdx) =
   var i = startIdx
   while i < gch.cycleRoots.len:
     if gch.cycleRoots.d[i].color != rcCycleCandidate:
@@ -808,7 +813,7 @@ proc trimCycleRoots(gch: var TGcHeap, startIdx = gch.cycleRootsTrimIdx) =
   gch.cycleRootsTrimIdx = gch.cycleRoots.len
 
 # we now use a much simpler and non-recursive algorithm for cycle removal
-proc collectCycles(gch: var TGcHeap) =
+proc collectCycles(gch: var GcHeap) =
   if gch.cycleRoots.len == 0: return
   gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, gch.cycleRoots.len)
 
@@ -833,14 +838,14 @@ proc collectCycles(gch: var TGcHeap) =
     let startLen = gch.tempStack.len
     cell.setColor rcAlive
     cell.forAllChildren waPush
-    
+
     while startLen != gch.tempStack.len:
       dec gch.tempStack.len
       var c = gch.tempStack.d[gch.tempStack.len]
       if c.color != rcAlive:
         c.setColor rcAlive
         c.forAllChildren waPush
-  
+
   template earlyMarkAlive(stackRoots) =
     # This marks all objects reachable from the stack as alive before any
     # of the other stages is executed. Such objects cannot be garbage and
@@ -851,7 +856,7 @@ proc collectCycles(gch: var TGcHeap) =
       earlyMarkAliveRec(c)
 
   earlyMarkAlive(gch.decStack)
-  
+
   when CollectCyclesStats:
     let tAfterEarlyMarkAlive = getTicks()
 
@@ -859,7 +864,7 @@ proc collectCycles(gch: var TGcHeap) =
     let startLen = gch.tempStack.len
     cell.setColor rcDecRefApplied
     cell.forAllChildren waPush
-    
+
     while startLen != gch.tempStack.len:
       dec gch.tempStack.len
       var c = gch.tempStack.d[gch.tempStack.len]
@@ -871,7 +876,7 @@ proc collectCycles(gch: var TGcHeap) =
       if c.color != rcDecRefApplied:
         c.setColor rcDecRefApplied
         c.forAllChildren waPush
- 
+
   template markRoots(roots) =
     var i = 0
     while i < roots.len:
@@ -880,34 +885,34 @@ proc collectCycles(gch: var TGcHeap) =
         inc i
       else:
         roots.trimAt i
-  
+
   markRoots(gch.cycleRoots)
-  
+
   when CollectCyclesStats:
     let tAfterMark = getTicks()
     c_printf "COLLECT CYCLES %d: %d/%d\n", gcCollectionIdx, gch.cycleRoots.len, l0
-  
+
   template recursiveMarkAlive(cell) =
     let startLen = gch.tempStack.len
     cell.setColor rcAlive
     cell.forAllChildren waPush
-    
+
     while startLen != gch.tempStack.len:
       dec gch.tempStack.len
       var c = gch.tempStack.d[gch.tempStack.len]
       if ignoreObject(c): continue
       inc c.refcount, rcIncrement
       inc increfs
-      
+
       if c.color != rcAlive:
         c.setColor rcAlive
         c.forAllChildren waPush
- 
+
   template scanRoots(roots) =
     for i in 0 .. <roots.len:
       let startLen = gch.tempStack.len
       gch.tempStack.add roots.d[i]
-      
+
       while startLen != gch.tempStack.len:
         dec gch.tempStack.len
         var c = gch.tempStack.d[gch.tempStack.len]
@@ -923,9 +928,9 @@ proc collectCycles(gch: var TGcHeap) =
             c.setColor rcMaybeDead
             inc maybedeads
             c.forAllChildren waPush
-  
+
   scanRoots(gch.cycleRoots)
-  
+
   when CollectCyclesStats:
     let tAfterScan = getTicks()
 
@@ -936,7 +941,7 @@ proc collectCycles(gch: var TGcHeap) =
 
       let startLen = gch.tempStack.len
       gch.tempStack.add c
-      
+
       while startLen != gch.tempStack.len:
         dec gch.tempStack.len
         var c = gch.tempStack.d[gch.tempStack.len]
@@ -960,7 +965,7 @@ proc collectCycles(gch: var TGcHeap) =
       freeCell(gch, gch.freeStack.d[i])
 
   collectDead(gch.cycleRoots)
-  
+
   when CollectCyclesStats:
     let tFinal = getTicks()
     cprintf "times:\n  early mark alive: %d ms\n  mark: %d ms\n  scan: %d ms\n  collect: %d ms\n  decrefs: %d\n  increfs: %d\n  marked dead: %d\n  collected: %d\n",
@@ -981,7 +986,7 @@ proc collectCycles(gch: var TGcHeap) =
 
   when MarkingSkipsAcyclicObjects:
     # Collect the acyclic objects that became unreachable due to collected
-    # cyclic objects. 
+    # cyclic objects.
     discard collectZCT(gch)
     # collectZCT may add new cycle candidates and we may decide to loop here
     # if gch.cycleRoots.len > 0: repeat
@@ -990,7 +995,7 @@ var gcDebugging* = false
 
 var seqdbg* : proc (s: PGenericSeq) {.cdecl.}
 
-proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
+proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
   sysAssert(allocInv(gch.region), "gcMark begin")
   var cell = usrToCell(p)
@@ -1025,12 +1030,12 @@ proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
         add(gch.decStack, cell)
   sysAssert(allocInv(gch.region), "gcMark end")
 
-proc markThreadStacks(gch: var TGcHeap) = 
+proc markThreadStacks(gch: var GcHeap) =
   when hasThreadSupport and hasSharedHeap:
     {.error: "not fully implemented".}
     var it = threadList
     while it != nil:
-      # mark registers: 
+      # mark registers:
       for i in 0 .. high(it.registers): gcMark(gch, it.registers[i])
       var sp = cast[ByteAddress](it.stackBottom)
       var max = cast[ByteAddress](it.stackTop)
@@ -1074,7 +1079,7 @@ proc stackSize(): int {.noinline.} =
 
 var
   jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-    # a little hack to get the size of a TJmpBuf in the generated C code
+    # a little hack to get the size of a JmpBuf in the generated C code
     # in a platform independent way
 
 when defined(sparc): # For SPARC architecture.
@@ -1086,7 +1091,7 @@ when defined(sparc): # For SPARC architecture.
     var x = cast[ByteAddress](p)
     result = a <=% x and x <=% b
 
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
+  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
     when defined(sparcv9):
       asm  """"flushw \n" """
     else:
@@ -1116,8 +1121,8 @@ elif stackIncreases:
     var b = cast[ByteAddress](stackTop)
     var x = cast[ByteAddress](p)
     result = a <=% x and x <=% b
-  
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
+
+  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
     var registers: C_JmpBuf
     if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
       var max = cast[ByteAddress](gch.stackBottom)
@@ -1140,7 +1145,7 @@ else:
     var x = cast[ByteAddress](p)
     result = a <=% x and x <=% b
 
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
+  proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
     # We use a jmp_buf buffer that is in the C stack.
     # Used to traverse the stack and registers assuming
     # that 'setjmp' will save registers in the C stack.
@@ -1151,7 +1156,7 @@ else:
         # mark the registers
         var jmpbufPtr = cast[ByteAddress](addr(registers))
         var jmpbufEnd = jmpbufPtr +% jmpbufSize
-      
+
         while jmpbufPtr <=% jmpbufEnd:
           gcMark(gch, cast[PPointer](jmpbufPtr)[])
           jmpbufPtr = jmpbufPtr +% sizeof(pointer)
@@ -1181,7 +1186,7 @@ else:
 # end of non-portable code
 # ----------------------------------------------------------------------------
 
-proc releaseCell(gch: var TGcHeap, cell: PCell) =
+proc releaseCell(gch: var GcHeap, cell: PCell) =
   if cell.color != rcReallyDead:
     prepareDealloc(cell)
     cell.setColor rcReallyDead
@@ -1210,21 +1215,21 @@ proc releaseCell(gch: var TGcHeap, cell: PCell) =
   #  recursion).
   # We can ignore it now as the ZCT cleaner will reach it soon.
 
-proc collectZCT(gch: var TGcHeap): bool =
+proc collectZCT(gch: var GcHeap): bool =
   const workPackage = 100
   var L = addr(gch.zct.len)
-  
+
   when withRealtime:
     var steps = workPackage
-    var t0: TTicks
+    var t0: Ticks
     if gch.maxPause > 0: t0 = getticks()
-  
+
   while L[] > 0:
     var c = gch.zct.d[0]
     sysAssert c.isBitUp(rcZct), "collectZCT: rcZct missing!"
     sysAssert(isAllocatedPtr(gch.region, c), "collectZCT: isAllocatedPtr")
-    
-    # remove from ZCT:    
+
+    # remove from ZCT:
     c.clearBit(rcZct)
     gch.zct.d[0] = gch.zct.d[L[] - 1]
     dec(L[])
@@ -1232,7 +1237,7 @@ proc collectZCT(gch: var TGcHeap): bool =
     if c.refcount <% rcIncrement:
       # It may have a RC > 0, if it is in the hardware stack or
       # it has not been removed yet from the ZCT. This is because
-      # ``incref`` does not bother to remove the cell from the ZCT 
+      # ``incref`` does not bother to remove the cell from the ZCT
       # as this might be too slow.
       # In any case, it should be removed from the ZCT. But not
       # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
@@ -1247,7 +1252,7 @@ proc collectZCT(gch: var TGcHeap): bool =
         steps = workPackage
         if gch.maxPause > 0:
           let duration = getticks() - t0
-          # the GC's measuring is not accurate and needs some cleanup actions 
+          # the GC's measuring is not accurate and needs some cleanup actions
           # (stack unmarking), so subtract some short amount of time in to
           # order to miss deadlines less often:
           if duration >= gch.maxPause - 50_000:
@@ -1257,14 +1262,14 @@ proc collectZCT(gch: var TGcHeap): bool =
   #deInit(gch.zct)
   #init(gch.zct)
 
-proc unmarkStackAndRegisters(gch: var TGcHeap) =
+proc unmarkStackAndRegisters(gch: var GcHeap) =
   var d = gch.decStack.d
   for i in 0 .. <gch.decStack.len:
     sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters"
     # XXX: just call doDecRef?
     var c = d[i]
     sysAssert c.typ != nil, "unmarkStackAndRegisters 2"
-    
+
     if c.color == rcRetiredBuffer:
       continue
 
@@ -1273,7 +1278,7 @@ proc unmarkStackAndRegisters(gch: var TGcHeap) =
       # the object survived only because of a stack reference
       # it still doesn't have heap references
       addZCT(gch.zct, c)
-    
+
     if canbeCycleRoot(c):
       # any cyclic object reachable from the stack can be turned into
       # a leak if it's orphaned through the stack reference
@@ -1283,12 +1288,12 @@ proc unmarkStackAndRegisters(gch: var TGcHeap) =
 
   gch.decStack.len = 0
 
-proc collectCTBody(gch: var TGcHeap) =
+proc collectCTBody(gch: var GcHeap) =
   when withRealtime:
     let t0 = getticks()
   when debugGC: inc gcCollectionIdx
   sysAssert(allocInv(gch.region), "collectCT: begin")
-  
+
   gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
   sysAssert(gch.decStack.len == 0, "collectCT")
   prepareForInteriorPointerChecking(gch.region)
@@ -1307,7 +1312,7 @@ proc collectCTBody(gch: var TGcHeap) =
         gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
   unmarkStackAndRegisters(gch)
   sysAssert(allocInv(gch.region), "collectCT: end")
-  
+
   when withRealtime:
     let duration = getticks() - t0
     gch.stat.maxPause = max(gch.stat.maxPause, duration)
@@ -1315,24 +1320,24 @@ proc collectCTBody(gch: var TGcHeap) =
       if gch.maxPause > 0 and duration > gch.maxPause:
         c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration)
 
-proc collectCT(gch: var TGcHeap) =
+proc collectCT(gch: var GcHeap) =
   if (gch.zct.len >= ZctThreshold or (cycleGC and
-      getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and 
+      getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
       gch.recGcLock == 0:
     collectCTBody(gch)
 
 when withRealtime:
-  proc toNano(x: int): TNanos {.inline.} =
+  proc toNano(x: int): Nanos {.inline.} =
     result = x * 1000
 
   proc GC_setMaxPause*(MaxPauseInUs: int) =
     gch.maxPause = MaxPauseInUs.toNano
 
-  proc GC_step(gch: var TGcHeap, us: int, strongAdvice: bool) =
+  proc GC_step(gch: var GcHeap, us: int, strongAdvice: bool) =
     acquire(gch)
     gch.maxPause = us.toNano
     if (gch.zct.len >= ZctThreshold or (cycleGC and
-        getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or 
+        getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or
         strongAdvice:
       collectCTBody(gch)
     release(gch)
@@ -1340,13 +1345,13 @@ when withRealtime:
   proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice)
 
 when not defined(useNimRtl):
-  proc GC_disable() = 
+  proc GC_disable() =
     when hasThreadSupport and hasSharedHeap:
       discard atomicInc(gch.recGcLock, 1)
     else:
       inc(gch.recGcLock)
   proc GC_enable() =
-    if gch.recGcLock > 0: 
+    if gch.recGcLock > 0:
       when hasThreadSupport and hasSharedHeap:
         discard atomicDec(gch.recGcLock, 1)
       else:
diff --git a/lib/system/gc_common.nim b/lib/system/gc_common.nim
new file mode 100644
index 000000000..47e8b4b1f
--- /dev/null
+++ b/lib/system/gc_common.nim
@@ -0,0 +1,277 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Rokas Kupstys
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+proc len(stack: ptr GcStack): int =
+  if stack == nil:
+    return 0
+
+  var s = stack
+  result = 1
+  while s.next != nil:
+    inc(result)
+    s = s.next
+
+when defined(nimCoroutines):
+  proc stackSize(stackBottom: pointer, pos: pointer=nil): int {.noinline.} =
+    var sp: pointer
+    if pos == nil:
+      var stackTop {.volatile.}: pointer
+      sp = addr(stackTop)
+    else:
+      sp = pos
+    result = abs(cast[int](sp) - cast[int](stackBottom))
+
+  proc GC_addStack*(starts: pointer) {.cdecl, exportc.} =
+    var sp {.volatile.}: pointer
+    var stack = cast[ptr GcStack](alloc0(sizeof(GcStack)))
+    stack.starts = starts
+    stack.pos = addr sp
+    if gch.stack == nil:
+      gch.stack = stack
+    else:
+      stack.next = gch.stack
+      gch.stack.prev = stack
+      gch.stack = stack
+    # c_fprintf(c_stdout, "[GC] added stack 0x%016X\n", starts)
+
+  proc GC_removeStack*(starts: pointer) {.cdecl, exportc.} =
+    var stack = gch.stack
+    while stack != nil:
+      if stack.starts == starts:
+        if stack.prev == nil:
+          if stack.next != nil:
+            stack.next.prev = nil
+          gch.stack = stack.next
+        else:
+          stack.prev.next = stack.next
+          if stack.next != nil:
+              stack.next.prev = stack.prev
+        dealloc(stack)
+        # echo "[GC] removed stack ", starts.repr
+        break
+      else:
+        stack = stack.next
+
+  proc GC_setCurrentStack*(starts, pos: pointer) {.cdecl, exportc.} =
+    var stack = gch.stack
+    while stack != nil:
+      if stack.starts == starts:
+        stack.pos = pos
+        stack.maxStackSize = max(stack.maxStackSize, stackSize(stack.starts, pos))
+        return
+      stack = stack.next
+    gcAssert(false, "Current stack position does not belong to registered stack")
+else:
+  proc stackSize(): int {.noinline.} =
+    var stackTop {.volatile.}: pointer
+    result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
+
+iterator items(stack: ptr GcStack): ptr GcStack =
+  var s = stack
+  while not isNil(s):
+    yield s
+    s = s.next
+
+var
+  localGcInitialized {.rtlThreadVar.}: bool
+
+proc setupForeignThreadGc*() =
+  ## call this if you registered a callback that will be run from a thread not
+  ## under your control. This has a cheap thread-local guard, so the GC for
+  ## this thread will only be initialized once per thread, no matter how often
+  ## it is called.
+  if not localGcInitialized:
+    localGcInitialized = true
+    var stackTop {.volatile.}: pointer
+    setStackBottom(addr(stackTop))
+    initGC()
+
+# ----------------- stack management --------------------------------------
+#  inspired from Smart Eiffel
+
+when defined(emscripten):
+  const stackIncreases = true
+elif defined(sparc):
+  const stackIncreases = false
+elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
+     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
+  const stackIncreases = true
+else:
+  const stackIncreases = false
+
+when not defined(useNimRtl):
+  {.push stack_trace: off.}
+  proc setStackBottom(theStackBottom: pointer) =
+    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
+    # the first init must be the one that defines the stack bottom:
+    when defined(nimCoroutines):
+      GC_addStack(theStackBottom)
+    else:
+      if gch.stackBottom == nil: gch.stackBottom = theStackBottom
+      else:
+        var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
+        var b = cast[ByteAddress](gch.stackBottom)
+        #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
+        when stackIncreases:
+          gch.stackBottom = cast[pointer](min(a, b))
+        else:
+          gch.stackBottom = cast[pointer](max(a, b))
+  {.pop.}
+
+when defined(sparc): # For SPARC architecture.
+  when defined(nimCoroutines):
+    {.error: "Nim coroutines are not supported on this platform."}
+
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var b = cast[TAddress](gch.stackBottom)
+    var a = cast[TAddress](stackTop)
+    var x = cast[TAddress](p)
+    result = a <=% x and x <=% b
+
+  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
+    when defined(sparcv9):
+      asm  """"flushw \n" """
+    else:
+      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
+
+    var
+      max = gch.stackBottom
+      sp: PPointer
+      stackTop: array[0..1, pointer]
+    sp = addr(stackTop[0])
+    # Addresses decrease as the stack grows.
+    while sp <= max:
+      gcMark(gch, sp[])
+      sp = cast[PPointer](cast[TAddress](sp) +% sizeof(pointer))
+
+elif defined(ELATE):
+  {.error: "stack marking code is to be written for this architecture".}
+
+elif stackIncreases:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses increase as the stack grows.
+  # ---------------------------------------------------------------------------
+  when defined(nimCoroutines):
+    {.error: "Nim coroutines are not supported on this platform."}
+  proc isOnStack(p: pointer): bool =
+    var stackTop {.volatile.}: pointer
+    stackTop = addr(stackTop)
+    var a = cast[ByteAddress](gch.stackBottom)
+    var b = cast[ByteAddress](stackTop)
+    var x = cast[ByteAddress](p)
+    result = a <=% x and x <=% b
+
+  var
+    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
+      # a little hack to get the size of a JmpBuf in the generated C code
+      # in a platform independent way
+
+  template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
+    var registers {.noinit.}: C_JmpBuf
+    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+      var max = cast[ByteAddress](gch.stackBottom)
+      var sp = cast[ByteAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
+      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
+      # otherwise sp would be below the registers structure).
+      while sp >=% max:
+        gcMark(gch, cast[PPointer](sp)[])
+        sp = sp -% sizeof(pointer)
+
+else:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses decrease as the stack grows.
+  # ---------------------------------------------------------------------------
+  when defined(nimCoroutines):
+    proc isOnStack(p: pointer): bool =
+      var stackTop {.volatile.}: pointer
+      stackTop = addr(stackTop)
+      for stack in items(gch.stack):
+        var b = cast[ByteAddress](stack.starts)
+        var a = cast[ByteAddress](stack.starts) - stack.maxStackSize
+        var x = cast[ByteAddress](p)
+        if a <=% x and x <=% b:
+          return true
+
+    template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
+      # We use a jmp_buf buffer that is in the C stack.
+      # Used to traverse the stack and registers assuming
+      # that 'setjmp' will save registers in the C stack.
+      type PStackSlice = ptr array [0..7, pointer]
+      var registers {.noinit.}: Registers
+      getRegisters(registers)
+      for i in registers.low .. registers.high:
+        gcMark(gch, cast[PPointer](registers[i]))
+
+      for stack in items(gch.stack):
+        stack.maxStackSize = max(stack.maxStackSize, stackSize(stack.starts))
+        var max = cast[ByteAddress](stack.starts)
+        var sp = cast[ByteAddress](stack.pos)
+        # loop unrolled:
+        while sp <% max - 8*sizeof(pointer):
+          gcMark(gch, cast[PStackSlice](sp)[0])
+          gcMark(gch, cast[PStackSlice](sp)[1])
+          gcMark(gch, cast[PStackSlice](sp)[2])
+          gcMark(gch, cast[PStackSlice](sp)[3])
+          gcMark(gch, cast[PStackSlice](sp)[4])
+          gcMark(gch, cast[PStackSlice](sp)[5])
+          gcMark(gch, cast[PStackSlice](sp)[6])
+          gcMark(gch, cast[PStackSlice](sp)[7])
+          sp = sp +% sizeof(pointer)*8
+        # last few entries:
+        while sp <=% max:
+          gcMark(gch, cast[PPointer](sp)[])
+          sp = sp +% sizeof(pointer)
+  else:
+    proc isOnStack(p: pointer): bool =
+      var stackTop {.volatile.}: pointer
+      stackTop = addr(stackTop)
+      var b = cast[ByteAddress](gch.stackBottom)
+      var a = cast[ByteAddress](stackTop)
+      var x = cast[ByteAddress](p)
+      result = a <=% x and x <=% b
+
+    template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
+      # We use a jmp_buf buffer that is in the C stack.
+      # Used to traverse the stack and registers assuming
+      # that 'setjmp' will save registers in the C stack.
+      type PStackSlice = ptr array [0..7, pointer]
+      var registers {.noinit.}: C_JmpBuf
+      if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+        var max = cast[ByteAddress](gch.stackBottom)
+        var sp = cast[ByteAddress](addr(registers))
+        when defined(amd64):
+          # words within the jmp_buf structure may not be properly aligned.
+          let regEnd = sp +% sizeof(registers)
+          while sp <% regEnd:
+            gcMark(gch, cast[PPointer](sp)[])
+            gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
+            sp = sp +% sizeof(pointer)
+        # Make sure sp is word-aligned
+        sp = sp and not (sizeof(pointer) - 1)
+        # loop unrolled:
+        while sp <% max - 8*sizeof(pointer):
+          gcMark(gch, cast[PStackSlice](sp)[0])
+          gcMark(gch, cast[PStackSlice](sp)[1])
+          gcMark(gch, cast[PStackSlice](sp)[2])
+          gcMark(gch, cast[PStackSlice](sp)[3])
+          gcMark(gch, cast[PStackSlice](sp)[4])
+          gcMark(gch, cast[PStackSlice](sp)[5])
+          gcMark(gch, cast[PStackSlice](sp)[6])
+          gcMark(gch, cast[PStackSlice](sp)[7])
+          sp = sp +% sizeof(pointer)*8
+        # last few entries:
+        while sp <=% max:
+          gcMark(gch, cast[PPointer](sp)[])
+          sp = sp +% sizeof(pointer)
+
+# ----------------------------------------------------------------------------
+# end of non-portable code
+# ----------------------------------------------------------------------------
diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim
index e287bf5d9..d1aecb7a2 100644
--- a/lib/system/gc_ms.nim
+++ b/lib/system/gc_ms.nim
@@ -9,6 +9,10 @@
 
 # A simple mark&sweep garbage collector for Nim. Define the
 # symbol ``gcUseBitvectors`` to generate a variant of this GC.
+
+when defined(nimCoroutines):
+  import arch
+
 {.push profiler:off.}
 
 const
@@ -26,49 +30,58 @@ when defined(memProfiler):
   proc nimProfile(requestedSize: int)
 
 type
-  TWalkOp = enum
+  WalkOp = enum
     waMarkGlobal,  # we need to mark conservatively for global marker procs
                    # as these may refer to a global var and not to a thread
                    # local
     waMarkPrecise  # fast precise marking
 
-  TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
+  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
     # A ref type can have a finalizer that is called before the object's
     # storage is freed.
 
-  TGlobalMarkerProc = proc () {.nimcall, benign.}
+  GlobalMarkerProc = proc () {.nimcall, benign.}
 
-  TGcStat = object
+  GcStat = object
     collections: int         # number of performed full collections
     maxThreshold: int        # max threshold that has been set
     maxStackSize: int        # max stack size
     freedObjects: int        # max entries in cycle table
 
-  TGcHeap = object           # this contains the zero count and
+  GcStack {.final.} = object
+    prev: ptr GcStack
+    next: ptr GcStack
+    starts: pointer
+    pos: pointer
+    maxStackSize: int
+
+  GcHeap = object            # this contains the zero count and
                              # non-zero count table
+    stack: ptr GcStack
     stackBottom: pointer
     cycleThreshold: int
     when useCellIds:
       idGenerator: int
     when withBitvectors:
-      allocated, marked: TCellSet
-    tempStack: TCellSeq      # temporary stack for recursion elimination
+      allocated, marked: CellSet
+    tempStack: CellSeq       # temporary stack for recursion elimination
     recGcLock: int           # prevent recursion via finalizers; no thread lock
-    region: TMemRegion       # garbage collected region
-    stat: TGcStat
-    additionalRoots: TCellSeq # dummy roots for GC_ref/unref
-
+    region: MemRegion        # garbage collected region
+    stat: GcStat
+    additionalRoots: CellSeq # dummy roots for GC_ref/unref
+{.deprecated: [TWalkOp: WalkOp, TFinalizer: Finalizer, TGcStat: GcStat,
+              TGlobalMarkerProc: GlobalMarkerProc, TGcHeap: GcHeap].}
 var
-  gch {.rtlThreadVar.}: TGcHeap
+  gch {.rtlThreadVar.}: GcHeap
 
 when not defined(useNimRtl):
   instantiateForRegion(gch.region)
 
-template acquire(gch: TGcHeap) =
+template acquire(gch: GcHeap) =
   when hasThreadSupport and hasSharedHeap:
     acquireSys(HeapLock)
 
-template release(gch: TGcHeap) =
+template release(gch: GcHeap) =
   when hasThreadSupport and hasSharedHeap:
     releaseSys(HeapLock)
 
@@ -80,11 +93,11 @@ template gcAssert(cond: bool, msg: string) =
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(TCell)))
+  result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell)))
 
 proc usrToCell(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(TCell)))
+  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
 
 proc canbeCycleRoot(c: PCell): bool {.inline.} =
   result = ntfAcyclic notin c.typ.flags
@@ -101,9 +114,9 @@ proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
 
 var
   globalMarkersLen: int
-  globalMarkers: array[0.. 7_000, TGlobalMarkerProc]
+  globalMarkers: array[0.. 7_000, GlobalMarkerProc]
 
-proc nimRegisterGlobalMarker(markerProc: TGlobalMarkerProc) {.compilerProc.} =
+proc nimRegisterGlobalMarker(markerProc: GlobalMarkerProc) {.compilerProc.} =
   if globalMarkersLen <= high(globalMarkers):
     globalMarkers[globalMarkersLen] = markerProc
     inc globalMarkersLen
@@ -116,11 +129,10 @@ when BitsPerPage mod (sizeof(int)*8) != 0:
   {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
 
 # forward declarations:
-proc collectCT(gch: var TGcHeap) {.benign.}
-proc isOnStack*(p: pointer): bool {.noinline, benign.}
-proc forAllChildren(cell: PCell, op: TWalkOp) {.benign.}
-proc doOperation(p: pointer, op: TWalkOp) {.benign.}
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp) {.benign.}
+proc collectCT(gch: var GcHeap) {.benign.}
+proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
+proc doOperation(p: pointer, op: WalkOp) {.benign.}
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
 # we need the prototype here for debugging purposes
 
 proc prepareDealloc(cell: PCell) =
@@ -131,7 +143,7 @@ proc prepareDealloc(cell: PCell) =
     # prevend recursive entering here by a lock.
     # XXX: we should set the cell's children to nil!
     inc(gch.recGcLock)
-    (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
+    (cast[Finalizer](cell.typ.finalizer))(cellToUsr(cell))
     dec(gch.recGcLock)
 
 proc nimGCref(p: pointer) {.compilerProc.} =
@@ -168,21 +180,7 @@ proc initGC() =
       init(gch.allocated)
       init(gch.marked)
 
-var
-  localGcInitialized {.rtlThreadVar.}: bool
-
-proc setupForeignThreadGc*() =
-  ## call this if you registered a callback that will be run from a thread not
-  ## under your control. This has a cheap thread-local guard, so the GC for
-  ## this thread will only be initialized once per thread, no matter how often
-  ## it is called.
-  if not localGcInitialized:
-    localGcInitialized = true
-    var stackTop {.volatile.}: pointer
-    setStackBottom(addr(stackTop))
-    initGC()
-
-proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) {.benign.} =
+proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
   var d = cast[ByteAddress](dest)
   case n.kind
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
@@ -194,7 +192,7 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) {.benign.} =
     if m != nil: forAllSlotsAux(dest, m, op)
   of nkNone: sysAssert(false, "forAllSlotsAux")
 
-proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp) =
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
   var d = cast[ByteAddress](dest)
   if dest == nil: return # nothing to do
   if ntfNoRefs notin mt.flags:
@@ -208,7 +206,7 @@ proc forAllChildrenAux(dest: pointer, mt: PNimType, op: TWalkOp) =
         forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op)
     else: discard
 
-proc forAllChildren(cell: PCell, op: TWalkOp) =
+proc forAllChildren(cell: PCell, op: WalkOp) =
   gcAssert(cell != nil, "forAllChildren: 1")
   gcAssert(cell.typ != nil, "forAllChildren: 2")
   gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3"
@@ -228,12 +226,12 @@ proc forAllChildren(cell: PCell, op: TWalkOp) =
             GenericSeqSize), cell.typ.base, op)
     else: discard
 
-proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
+proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   # generates a new object and sets its reference counter to 0
   acquire(gch)
   gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
   collectCT(gch)
-  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
   gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
@@ -285,20 +283,20 @@ proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
   cast[PGenericSeq](result).reserved = len
   when defined(memProfiler): nimProfile(size)
 
-proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
+proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
   acquire(gch)
   collectCT(gch)
   var ol = usrToCell(old)
   sysAssert(ol.typ != nil, "growObj: 1")
   gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
 
-  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell)))
+  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
   var elemSize = 1
   if ol.typ.kind != tyString: elemSize = ol.typ.base.size
 
   var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
-  copyMem(res, ol, oldsize + sizeof(TCell))
-  zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(TCell)),
+  copyMem(res, ol, oldsize + sizeof(Cell))
+  zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)),
           newsize-oldsize)
   sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
   when false:
@@ -306,7 +304,7 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
     when withBitvectors: excl(gch.allocated, ol)
     when reallyDealloc: rawDealloc(gch.region, ol)
     else:
-      zeroMem(ol, sizeof(TCell))
+      zeroMem(ol, sizeof(Cell))
   when withBitvectors: incl(gch.allocated, res)
   when useCellIds:
     inc gch.idGenerator
@@ -322,7 +320,7 @@ proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
 
 # ----------------- collector -----------------------------------------------
 
-proc mark(gch: var TGcHeap, c: PCell) =
+proc mark(gch: var GcHeap, c: PCell) =
   when withBitvectors:
     incl(gch.marked, c)
     gcAssert gch.tempStack.len == 0, "stack not empty!"
@@ -344,7 +342,7 @@ proc mark(gch: var TGcHeap, c: PCell) =
         d.refCount = rcBlack
         forAllChildren(d, waMarkPrecise)
 
-proc doOperation(p: pointer, op: TWalkOp) =
+proc doOperation(p: pointer, op: WalkOp) =
   if p == nil: return
   var c: PCell = usrToCell(p)
   gcAssert(c != nil, "doOperation: 1")
@@ -359,17 +357,17 @@ proc doOperation(p: pointer, op: TWalkOp) =
   of waMarkPrecise: add(gch.tempStack, c)
 
 proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
-  doOperation(d, TWalkOp(op))
+  doOperation(d, WalkOp(op))
 
-proc freeCyclicCell(gch: var TGcHeap, c: PCell) =
+proc freeCyclicCell(gch: var GcHeap, c: PCell) =
   inc gch.stat.freedObjects
   prepareDealloc(c)
   when reallyDealloc: rawDealloc(gch.region, c)
   else:
     gcAssert(c.typ != nil, "freeCyclicCell")
-    zeroMem(c, sizeof(TCell))
+    zeroMem(c, sizeof(Cell))
 
-proc sweep(gch: var TGcHeap) =
+proc sweep(gch: var GcHeap) =
   when withBitvectors:
     for c in gch.allocated.elementsExcept(gch.marked):
       gch.allocated.excl(c)
@@ -391,12 +389,12 @@ when false:
           writeStackTrace()
           quit 1
 
-proc markGlobals(gch: var TGcHeap) =
+proc markGlobals(gch: var GcHeap) =
   for i in 0 .. < globalMarkersLen: globalMarkers[i]()
   let d = gch.additionalRoots.d
   for i in 0 .. < gch.additionalRoots.len: mark(gch, d[i])
 
-proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
+proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
   var cell = usrToCell(p)
   var c = cast[ByteAddress](cell)
@@ -406,136 +404,14 @@ proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
     if objStart != nil:
       mark(gch, objStart)
 
-# ----------------- stack management --------------------------------------
-#  inspired from Smart Eiffel
+include gc_common
 
-when defined(sparc):
-  const stackIncreases = false
-elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
-     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
-  const stackIncreases = true
-else:
-  const stackIncreases = false
+proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
+  forEachStackSlot(gch, gcMark)
 
-when not defined(useNimRtl):
-  {.push stack_trace: off.}
-  proc setStackBottom(theStackBottom: pointer) =
-    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
-    # the first init must be the one that defines the stack bottom:
-    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
-    else:
-      var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[ByteAddress](gch.stackBottom)
-      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
-      when stackIncreases:
-        gch.stackBottom = cast[pointer](min(a, b))
-      else:
-        gch.stackBottom = cast[pointer](max(a, b))
-  {.pop.}
-
-proc stackSize(): int {.noinline.} =
-  var stackTop {.volatile.}: pointer
-  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
-
-when defined(sparc): # For SPARC architecture.
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
-    when defined(sparcv9):
-      asm  """"flushw \n" """
-    else:
-      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
-
-    var
-      max = gch.stackBottom
-      sp: PPointer
-      stackTop: array[0..1, pointer]
-    sp = addr(stackTop[0])
-    # Addresses decrease as the stack grows.
-    while sp <= max:
-      gcMark(gch, sp[])
-      sp = cast[ppointer](cast[ByteAddress](sp) +% sizeof(pointer))
-
-elif defined(ELATE):
-  {.error: "stack marking code is to be written for this architecture".}
-
-elif stackIncreases:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses increase as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var a = cast[ByteAddress](gch.stackBottom)
-    var b = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  var
-    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-      # a little hack to get the size of a TJmpBuf in the generated C code
-      # in a platform independent way
-
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[ByteAddress](gch.stackBottom)
-      var sp = cast[ByteAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
-      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
-      # otherwise sp would be below the registers structure).
-      while sp >=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp -% sizeof(pointer)
-
-else:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses decrease as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[ByteAddress](gch.stackBottom)
-    var a = cast[ByteAddress](stackTop)
-    var x = cast[ByteAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
-    # We use a jmp_buf buffer that is in the C stack.
-    # Used to traverse the stack and registers assuming
-    # that 'setjmp' will save registers in the C stack.
-    type PStackSlice = ptr array [0..7, pointer]
-    var registers {.noinit.}: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[ByteAddress](gch.stackBottom)
-      var sp = cast[ByteAddress](addr(registers))
-      # loop unrolled:
-      while sp <% max - 8*sizeof(pointer):
-        gcMark(gch, cast[PStackSlice](sp)[0])
-        gcMark(gch, cast[PStackSlice](sp)[1])
-        gcMark(gch, cast[PStackSlice](sp)[2])
-        gcMark(gch, cast[PStackSlice](sp)[3])
-        gcMark(gch, cast[PStackSlice](sp)[4])
-        gcMark(gch, cast[PStackSlice](sp)[5])
-        gcMark(gch, cast[PStackSlice](sp)[6])
-        gcMark(gch, cast[PStackSlice](sp)[7])
-        sp = sp +% sizeof(pointer)*8
-      # last few entries:
-      while sp <=% max:
-        gcMark(gch, cast[PPointer](sp)[])
-        sp = sp +% sizeof(pointer)
-
-# ----------------------------------------------------------------------------
-# end of non-portable code
-# ----------------------------------------------------------------------------
-
-proc collectCTBody(gch: var TGcHeap) =
-  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+proc collectCTBody(gch: var GcHeap) =
+  when not defined(nimCoroutines):
+    gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
   prepareForInteriorPointerChecking(gch.region)
   markStackAndRegisters(gch)
   markGlobals(gch)
@@ -549,7 +425,7 @@ proc collectCTBody(gch: var TGcHeap) =
   gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
   sysAssert(allocInv(gch.region), "collectCT: end")
 
-proc collectCT(gch: var TGcHeap) =
+proc collectCT(gch: var GcHeap) =
   if getOccupiedMem(gch.region) >= gch.cycleThreshold and gch.recGcLock == 0:
     collectCTBody(gch)
 
@@ -589,8 +465,13 @@ when not defined(useNimRtl):
              "[GC] occupied memory: " & $getOccupiedMem() & "\n" &
              "[GC] collections: " & $gch.stat.collections & "\n" &
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
-             "[GC] freed objects: " & $gch.stat.freedObjects & "\n" &
-             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
+             "[GC] freed objects: " & $gch.stat.freedObjects & "\n"
+    when defined(nimCoroutines):
+      result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
+      for stack in items(gch.stack):
+        result = result & "[GC]   stack " & stack.starts.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
+    else:
+      result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
     GC_enable()
 
 {.pop.}
diff --git a/lib/system/hti.nim b/lib/system/hti.nim
index aff0c0e6f..bfb13059e 100644
--- a/lib/system/hti.nim
+++ b/lib/system/hti.nim
@@ -7,14 +7,14 @@
 #    distribution, for details about the copyright.
 #
 
-when declared(NimString): 
+when declared(NimString):
   # we are in system module:
   {.pragma: codegenType, compilerproc.}
 else:
   {.pragma: codegenType, importc.}
 
-type 
-  # This should be he same as ast.TTypeKind
+type
+  # This should be the same as ast.TTypeKind
   # many enum fields are not used at runtime
   TNimKind = enum
     tyNone,
@@ -73,7 +73,7 @@ type
     len: int
     sons: ptr array [0..0x7fff, ptr TNimNode]
 
-  TNimTypeFlag = enum 
+  TNimTypeFlag = enum
     ntfNoRefs = 0,     # type contains no tyRef, tySequence, tyString
     ntfAcyclic = 1,    # type cannot form a cycle
     ntfEnumHole = 2    # enum has holes and thus `$` for them needs the slow
@@ -88,5 +88,5 @@ type
     marker: proc (p: pointer, op: int) {.nimcall, benign.} # marker proc for GC
     deepcopy: proc (p: pointer): pointer {.nimcall, benign.}
   PNimType = ptr TNimType
-  
+
 # node.len may be the ``first`` element of a set
diff --git a/lib/system/inclrtl.nim b/lib/system/inclrtl.nim
index dbc961402..201a99ca7 100644
--- a/lib/system/inclrtl.nim
+++ b/lib/system/inclrtl.nim
@@ -20,21 +20,21 @@ when not defined(nimNewShared):
   {.pragma: gcsafe.}
 
 when defined(createNimRtl):
-  when defined(useNimRtl): 
+  when defined(useNimRtl):
     {.error: "Cannot create and use nimrtl at the same time!".}
   elif appType != "lib":
     {.error: "nimrtl must be built as a library!".}
 
-when defined(createNimRtl): 
+when defined(createNimRtl):
   {.pragma: rtl, exportc: "nimrtl_$1", dynlib, gcsafe.}
   {.pragma: inl.}
   {.pragma: compilerRtl, compilerproc, exportc: "nimrtl_$1", dynlib.}
 elif defined(useNimRtl):
-  when defined(windows): 
+  when defined(windows):
     const nimrtl* = "nimrtl.dll"
   elif defined(macosx):
-    const nimrtl* = "nimrtl.dylib"
-  else: 
+    const nimrtl* = "libnimrtl.dylib"
+  else:
     const nimrtl* = "libnimrtl.so"
   {.pragma: rtl, importc: "nimrtl_$1", dynlib: nimrtl, gcsafe.}
   {.pragma: inl.}
diff --git a/lib/system/jssys.nim b/lib/system/jssys.nim
index 3b55f62ca..eb6080d38 100644
--- a/lib/system/jssys.nim
+++ b/lib/system/jssys.nim
@@ -15,13 +15,13 @@ else:
 proc log*(s: cstring) {.importc: "console.log", varargs, nodecl.}
 
 type
-  PSafePoint = ptr TSafePoint
-  TSafePoint {.compilerproc, final.} = object
+  PSafePoint = ptr SafePoint
+  SafePoint {.compilerproc, final.} = object
     prev: PSafePoint # points to next safe point
     exc: ref Exception
 
-  PCallFrame = ptr TCallFrame
-  TCallFrame {.importc, nodecl, final.} = object
+  PCallFrame = ptr CallFrame
+  CallFrame {.importc, nodecl, final.} = object
     prev: PCallFrame
     procname: cstring
     line: int # current line number
@@ -33,12 +33,11 @@ type
     lineNumber {.importc.}: int
     message {.importc.}: cstring
     stack {.importc.}: cstring
+{.deprecated: [TSafePoint: SafePoint, TCallFrame: CallFrame].}
 
 var
   framePtr {.importc, nodecl, volatile.}: PCallFrame
-  excHandler {.importc, nodecl, volatile.}: PSafePoint = nil
-    # list of exception handlers
-    # a global variable for the root of all try blocks
+  excHandler {.importc, nodecl, volatile.}: int = 0
   lastJSError {.importc, nodecl, volatile.}: PJSError = nil
 
 {.push stacktrace: off, profiler:off.}
@@ -51,21 +50,20 @@ proc nimCharToStr(x: char): string {.compilerproc.} =
   result[0] = x
 
 proc getCurrentExceptionMsg*(): string =
-  if excHandler != nil and excHandler.exc != nil:
-    return $excHandler.exc.msg
-  elif lastJSError != nil:
+  if lastJSError != nil:
     return $lastJSError.message
   else:
     return ""
 
 proc auxWriteStackTrace(f: PCallFrame): string =
   type
-    TTempFrame = tuple[procname: cstring, line: int]
+    TempFrame = tuple[procname: cstring, line: int]
+  {.deprecated: [TTempFrame: TempFrame].}
   var
     it = f
     i = 0
     total = 0
-    tempFrames: array [0..63, TTempFrame]
+    tempFrames: array [0..63, TempFrame]
   while it != nil and i <= high(tempFrames):
     tempFrames[i].procname = it.procname
     tempFrames[i].line = it.line
@@ -97,32 +95,41 @@ proc rawWriteStackTrace(): string =
   else:
     result = "No stack traceback available\n"
 
-proc raiseException(e: ref Exception, ename: cstring) {.
+proc unhandledException(e: ref Exception) {.
     compilerproc, asmNoStackFrame.} =
-  e.name = ename
-  if excHandler != nil:
-    excHandler.exc = e
+  when NimStackTrace:
+    var buf = rawWriteStackTrace()
   else:
-    when NimStackTrace:
-      var buf = rawWriteStackTrace()
-    else:
-      var buf = ""
+    var buf = ""
     if e.msg != nil and e.msg[0] != '\0':
       add(buf, "Error: unhandled exception: ")
       add(buf, e.msg)
     else:
       add(buf, "Error: unhandled exception")
     add(buf, " [")
-    add(buf, ename)
+    add(buf, e.name)
     add(buf, "]\n")
     alert(buf)
-  asm """throw `e`;"""
+
+proc raiseException(e: ref Exception, ename: cstring) {.
+    compilerproc, asmNoStackFrame.} =
+  e.name = ename
+  when not defined(noUnhandledHandler):
+    if excHandler == 0:
+      unhandledException(e)
+  asm "throw `e`;"
 
 proc reraiseException() {.compilerproc, asmNoStackFrame.} =
-  if excHandler == nil:
+  if lastJSError == nil:
     raise newException(ReraiseError, "no exception to reraise")
   else:
-    asm """throw excHandler.exc;"""
+    when not defined(noUnhandledHandler):
+      if excHandler == 0:
+        var isNimException : bool
+        asm "`isNimException` = lastJSError.m_type;"
+        if isNimException:
+          unhandledException(cast[ref Exception](lastJSError))
+    asm "throw lastJSError;"
 
 proc raiseOverflow {.exportc: "raiseOverflow", noreturn.} =
   raise newException(OverflowError, "over- or underflow")
@@ -168,12 +175,28 @@ proc cstrToNimstr(c: cstring): string {.asmNoStackFrame, compilerproc.} =
 proc toJSStr(s: string): cstring {.asmNoStackFrame, compilerproc.} =
   asm """
     var len = `s`.length-1;
-    var result = new Array(len);
+    var asciiPart = new Array(len);
     var fcc = String.fromCharCode;
+    var nonAsciiPart = null;
+    var nonAsciiOffset = 0;
     for (var i = 0; i < len; ++i) {
-      result[i] = fcc(`s`[i]);
+      if (nonAsciiPart !== null) {
+        var offset = (i - nonAsciiOffset) * 2;
+        nonAsciiPart[offset] = "%";
+        nonAsciiPart[offset + 1] = `s`[i].toString(16);
+      }
+      else if (`s`[i] < 128)
+        asciiPart[i] = fcc(`s`[i]);
+      else {
+        asciiPart.length = i;
+        nonAsciiOffset = i;
+        nonAsciiPart = new Array((len - i) * 2);
+        --i;
+      }
     }
-    return result.join("");
+    asciiPart = asciiPart.join("");
+    return (nonAsciiPart === null) ?
+        asciiPart : asciiPart + decodeURIComponent(nonAsciiPart.join(""));
   """
 
 proc mnewString(len: int): string {.asmNoStackFrame, compilerproc.} =
@@ -260,17 +283,17 @@ proc eqStrings(a, b: string): bool {.asmNoStackFrame, compilerProc.} =
   """
 
 type
-  TDocument {.importc.} = object of RootObj
+  Document {.importc.} = object of RootObj
     write: proc (text: cstring) {.nimcall.}
     writeln: proc (text: cstring) {.nimcall.}
-    createAttribute: proc (identifier: cstring): ref TNode {.nimcall.}
-    createElement: proc (identifier: cstring): ref TNode {.nimcall.}
-    createTextNode: proc (identifier: cstring): ref TNode {.nimcall.}
-    getElementById: proc (id: cstring): ref TNode {.nimcall.}
-    getElementsByName: proc (name: cstring): seq[ref TNode] {.nimcall.}
-    getElementsByTagName: proc (name: cstring): seq[ref TNode] {.nimcall.}
-
-  TNodeType* = enum
+    createAttribute: proc (identifier: cstring): ref Node {.nimcall.}
+    createElement: proc (identifier: cstring): ref Node {.nimcall.}
+    createTextNode: proc (identifier: cstring): ref Node {.nimcall.}
+    getElementById: proc (id: cstring): ref Node {.nimcall.}
+    getElementsByName: proc (name: cstring): seq[ref Node] {.nimcall.}
+    getElementsByTagName: proc (name: cstring): seq[ref Node] {.nimcall.}
+
+  NodeType* = enum
     ElementNode = 1,
     AttributeNode,
     TextNode,
@@ -283,35 +306,36 @@ type
     DocumentTypeNode,
     DocumentFragmentNode,
     NotationNode
-  TNode* {.importc.} = object of RootObj
-    attributes*: seq[ref TNode]
-    childNodes*: seq[ref TNode]
+  Node* {.importc.} = object of RootObj
+    attributes*: seq[ref Node]
+    childNodes*: seq[ref Node]
     data*: cstring
-    firstChild*: ref TNode
-    lastChild*: ref TNode
-    nextSibling*: ref TNode
+    firstChild*: ref Node
+    lastChild*: ref Node
+    nextSibling*: ref Node
     nodeName*: cstring
-    nodeType*: TNodeType
+    nodeType*: NodeType
     nodeValue*: cstring
-    parentNode*: ref TNode
-    previousSibling*: ref TNode
-    appendChild*: proc (child: ref TNode) {.nimcall.}
+    parentNode*: ref Node
+    previousSibling*: ref Node
+    appendChild*: proc (child: ref Node) {.nimcall.}
     appendData*: proc (data: cstring) {.nimcall.}
     cloneNode*: proc (copyContent: bool) {.nimcall.}
     deleteData*: proc (start, len: int) {.nimcall.}
     getAttribute*: proc (attr: cstring): cstring {.nimcall.}
-    getAttributeNode*: proc (attr: cstring): ref TNode {.nimcall.}
-    getElementsByTagName*: proc (): seq[ref TNode] {.nimcall.}
+    getAttributeNode*: proc (attr: cstring): ref Node {.nimcall.}
+    getElementsByTagName*: proc (): seq[ref Node] {.nimcall.}
     hasChildNodes*: proc (): bool {.nimcall.}
-    insertBefore*: proc (newNode, before: ref TNode) {.nimcall.}
+    insertBefore*: proc (newNode, before: ref Node) {.nimcall.}
     insertData*: proc (position: int, data: cstring) {.nimcall.}
     removeAttribute*: proc (attr: cstring) {.nimcall.}
-    removeAttributeNode*: proc (attr: ref TNode) {.nimcall.}
-    removeChild*: proc (child: ref TNode) {.nimcall.}
-    replaceChild*: proc (newNode, oldNode: ref TNode) {.nimcall.}
+    removeAttributeNode*: proc (attr: ref Node) {.nimcall.}
+    removeChild*: proc (child: ref Node) {.nimcall.}
+    replaceChild*: proc (newNode, oldNode: ref Node) {.nimcall.}
     replaceData*: proc (start, len: int, text: cstring) {.nimcall.}
     setAttribute*: proc (name, value: cstring) {.nimcall.}
-    setAttributeNode*: proc (attr: ref TNode) {.nimcall.}
+    setAttributeNode*: proc (attr: ref Node) {.nimcall.}
+{.deprecated: [TNode: Node, TNodeType: NodeType, TDocument: Document].}
 
 when defined(kwin):
   proc rawEcho {.compilerproc, asmNoStackFrame.} =
@@ -337,7 +361,7 @@ elif defined(nodejs):
 
 else:
   var
-    document {.importc, nodecl.}: ref TDocument
+    document {.importc, nodecl.}: ref Document
 
   proc ewriteln(x: cstring) =
     var node = document.getElementsByTagName("body")[0]
@@ -508,75 +532,80 @@ proc nimMax(a, b: int): int {.compilerproc.} = return if a >= b: a else: b
 type NimString = string # hack for hti.nim
 include "system/hti"
 
+type JSRef = int # Fake type.
+
 proc isFatPointer(ti: PNimType): bool =
   # This has to be consistent with the code generator!
   return ti.base.kind notin {tyObject,
     tyArray, tyArrayConstr, tyTuple,
     tyOpenArray, tySet, tyVar, tyRef, tyPtr}
 
-proc nimCopy(x: pointer, ti: PNimType): pointer {.compilerproc.}
+proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef {.compilerproc.}
 
-proc nimCopyAux(dest, src: pointer, n: ptr TNimNode) {.compilerproc.} =
+proc nimCopyAux(dest, src: JSRef, n: ptr TNimNode) {.compilerproc.} =
   case n.kind
   of nkNone: sysAssert(false, "nimCopyAux")
   of nkSlot:
-    asm "`dest`[`n`.offset] = nimCopy(`src`[`n`.offset], `n`.typ);"
+    asm """
+      `dest`[`n`.offset] = nimCopy(`dest`[`n`.offset], `src`[`n`.offset], `n`.typ);
+    """
   of nkList:
     for i in 0..n.len-1:
       nimCopyAux(dest, src, n.sons[i])
   of nkCase:
     asm """
-      `dest`[`n`.offset] = nimCopy(`src`[`n`.offset], `n`.typ);
+      `dest`[`n`.offset] = nimCopy(`dest`[`n`.offset], `src`[`n`.offset], `n`.typ);
       for (var i = 0; i < `n`.sons.length; ++i) {
         nimCopyAux(`dest`, `src`, `n`.sons[i][1]);
       }
     """
 
-proc nimCopy(x: pointer, ti: PNimType): pointer =
+proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef =
   case ti.kind
   of tyPtr, tyRef, tyVar, tyNil:
     if not isFatPointer(ti):
-      result = x
+      result = src
     else:
-      asm """
-        `result` = [null, 0];
-        `result`[0] = `x`[0];
-        `result`[1] = `x`[1];
-      """
+      asm "`result` = [`src`[0], `src`[1]];"
   of tySet:
     asm """
       `result` = {};
-      for (var key in `x`) { `result`[key] = `x`[key]; }
+      for (var key in `src`) { `result`[key] = `src`[key]; }
     """
   of tyTuple, tyObject:
-    if ti.base != nil: result = nimCopy(x, ti.base)
+    if ti.base != nil: result = nimCopy(dest, src, ti.base)
     elif ti.kind == tyObject:
-      asm "`result` = {m_type: `ti`};"
+      asm "`result` = (`dest` === null || `dest` === undefined) ? {m_type: `ti`} : `dest`;"
     else:
-      asm "`result` = {};"
-    nimCopyAux(result, x, ti.node)
+      asm "`result` = (`dest` === null || `dest` === undefined) ? {} : `dest`;"
+    nimCopyAux(result, src, ti.node)
   of tySequence, tyArrayConstr, tyOpenArray, tyArray:
     asm """
-      `result` = new Array(`x`.length);
-      for (var i = 0; i < `x`.length; ++i) {
-        `result`[i] = nimCopy(`x`[i], `ti`.base);
+      if (`dest` === null || `dest` === undefined) {
+        `dest` = new Array(`src`.length);
+      }
+      else {
+        `dest`.length = `src`.length;
+      }
+      `result` = `dest`;
+      for (var i = 0; i < `src`.length; ++i) {
+        `result`[i] = nimCopy(`result`[i], `src`[i], `ti`.base);
       }
     """
   of tyString:
     asm """
-      if (`x` !== null) {
-        `result` = `x`.slice(0);
+      if (`src` !== null) {
+        `result` = `src`.slice(0);
       }
     """
   else:
-    result = x
+    result = src
 
-proc genericReset(x: pointer, ti: PNimType): pointer {.compilerproc.} =
+proc genericReset(x: JSRef, ti: PNimType): JSRef {.compilerproc.} =
+  asm "`result` = null;"
   case ti.kind
   of tyPtr, tyRef, tyVar, tyNil:
-    if not isFatPointer(ti):
-      result = nil
-    else:
+    if isFatPointer(ti):
       asm """
         `result` = [null, 0];
       """
@@ -601,14 +630,14 @@ proc genericReset(x: pointer, ti: PNimType): pointer {.compilerproc.} =
       }
     """
   else:
-    result = nil
+    discard
 
-proc arrayConstr(len: int, value: pointer, typ: PNimType): pointer {.
+proc arrayConstr(len: int, value: JSRef, typ: PNimType): JSRef {.
                  asmNoStackFrame, compilerproc.} =
   # types are fake
   asm """
     var result = new Array(`len`);
-    for (var i = 0; i < `len`; ++i) result[i] = nimCopy(`value`, `typ`);
+    for (var i = 0; i < `len`; ++i) result[i] = nimCopy(null, `value`, `typ`);
     return result;
   """
 
diff --git a/lib/system/mmdisp.nim b/lib/system/mmdisp.nim
index 84e532049..1c13f3ff8 100644
--- a/lib/system/mmdisp.nim
+++ b/lib/system/mmdisp.nim
@@ -7,7 +7,7 @@
 #    distribution, for details about the copyright.
 #
 
-# Nim high-level memory manager: It supports Boehm's GC, no GC and the
+# Nim high-level memory manager: It supports Boehm's GC, Go's GC, no GC and the
 # native Nim GC. The native Nim GC is the default.
 
 #{.push checks:on, assertions:on.}
@@ -34,14 +34,15 @@ const
 
 type
   PPointer = ptr pointer
-  TByteArray = array[0..1000_0000, byte]
-  PByte = ptr TByteArray
+  ByteArray = array[0..ArrayDummySize, byte]
+  PByte = ptr ByteArray
   PString = ptr string
+{.deprecated: [TByteArray: ByteArray].}
 
 # Page size of the system; in most cases 4096 bytes. For exotic OS or
 # CPU this needs to be changed:
 const
-  PageShift = 12
+  PageShift = when defined(cpu16): 8 else: 12
   PageSize = 1 shl PageShift
   PageMask = PageSize-1
 
@@ -65,41 +66,34 @@ proc raiseOutOfMem() {.noinline.} =
   quit(1)
 
 when defined(boehmgc):
-  when defined(windows):
-    const boehmLib = "boehmgc.dll"
-  elif defined(macosx):
-    const boehmLib = "libgc.dylib"
-  else:
-    const boehmLib = "/usr/lib/libgc.so.1"
-
-  proc boehmGCinit {.importc: "GC_init", dynlib: boehmLib.}
-  proc boehmGC_disable {.importc: "GC_disable", dynlib: boehmLib.}
-  proc boehmGC_enable {.importc: "GC_enable", dynlib: boehmLib.}
+  proc boehmGCinit {.importc: "GC_init", boehmGC.}
+  proc boehmGC_disable {.importc: "GC_disable", boehmGC.}
+  proc boehmGC_enable {.importc: "GC_enable", boehmGC.}
   proc boehmGCincremental {.
-    importc: "GC_enable_incremental", dynlib: boehmLib.}
-  proc boehmGCfullCollect {.importc: "GC_gcollect", dynlib: boehmLib.}
-  proc boehmAlloc(size: int): pointer {.
-    importc: "GC_malloc", dynlib: boehmLib.}
+    importc: "GC_enable_incremental", boehmGC.}
+  proc boehmGCfullCollect {.importc: "GC_gcollect", boehmGC.}
+  proc boehmAlloc(size: int): pointer {.importc: "GC_malloc", boehmGC.}
   proc boehmAllocAtomic(size: int): pointer {.
-    importc: "GC_malloc_atomic", dynlib: boehmLib.}
+    importc: "GC_malloc_atomic", boehmGC.}
   proc boehmRealloc(p: pointer, size: int): pointer {.
-    importc: "GC_realloc", dynlib: boehmLib.}
-  proc boehmDealloc(p: pointer) {.importc: "GC_free", dynlib: boehmLib.}
+    importc: "GC_realloc", boehmGC.}
+  proc boehmDealloc(p: pointer) {.importc: "GC_free", boehmGC.}
+  when hasThreadSupport:
+    proc boehmGC_allow_register_threads {.
+      importc: "GC_allow_register_threads", boehmGC.}
 
-  proc boehmGetHeapSize: int {.importc: "GC_get_heap_size", dynlib: boehmLib.}
+  proc boehmGetHeapSize: int {.importc: "GC_get_heap_size", boehmGC.}
     ## Return the number of bytes in the heap.  Excludes collector private
     ## data structures. Includes empty blocks and fragmentation loss.
     ## Includes some pages that were allocated but never written.
 
-  proc boehmGetFreeBytes: int {.importc: "GC_get_free_bytes", dynlib: boehmLib.}
+  proc boehmGetFreeBytes: int {.importc: "GC_get_free_bytes", boehmGC.}
     ## Return a lower bound on the number of free bytes in the heap.
 
-  proc boehmGetBytesSinceGC: int {.importc: "GC_get_bytes_since_gc",
-    dynlib: boehmLib.}
+  proc boehmGetBytesSinceGC: int {.importc: "GC_get_bytes_since_gc", boehmGC.}
     ## Return the number of bytes allocated since the last collection.
 
-  proc boehmGetTotalBytes: int {.importc: "GC_get_total_bytes",
-    dynlib: boehmLib.}
+  proc boehmGetTotalBytes: int {.importc: "GC_get_total_bytes", boehmGC.}
     ## Return the total number of bytes allocated in this process.
     ## Never decreases.
 
@@ -109,24 +103,24 @@ when defined(boehmgc):
 
   when not defined(useNimRtl):
 
-    proc alloc(size: int): pointer =
+    proc alloc(size: Natural): pointer =
       result = boehmAlloc(size)
       if result == nil: raiseOutOfMem()
-    proc alloc0(size: int): pointer =
+    proc alloc0(size: Natural): pointer =
       result = alloc(size)
       zeroMem(result, size)
-    proc realloc(p: pointer, newsize: int): pointer =
+    proc realloc(p: pointer, newsize: Natural): pointer =
       result = boehmRealloc(p, newsize)
       if result == nil: raiseOutOfMem()
     proc dealloc(p: pointer) = boehmDealloc(p)
 
-    proc allocShared(size: int): pointer =
+    proc allocShared(size: Natural): pointer =
       result = boehmAlloc(size)
       if result == nil: raiseOutOfMem()
-    proc allocShared0(size: int): pointer =
+    proc allocShared0(size: Natural): pointer =
       result = alloc(size)
       zeroMem(result, size)
-    proc reallocShared(p: pointer, newsize: int): pointer =
+    proc reallocShared(p: pointer, newsize: Natural): pointer =
       result = boehmRealloc(p, newsize)
       if result == nil: raiseOutOfMem()
     proc deallocShared(p: pointer) = boehmDealloc(p)
@@ -156,7 +150,9 @@ when defined(boehmgc):
     proc setStackBottom(theStackBottom: pointer) = discard
 
   proc initGC() =
-    when defined(macosx): boehmGCinit()
+    boehmGCinit()
+    when hasThreadSupport:
+      boehmGC_allow_register_threads()
 
   proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
     if ntfNoRefs in typ.flags: result = allocAtomic(size)
@@ -180,40 +176,236 @@ when defined(boehmgc):
     dest[] = src
 
   type
-    TMemRegion = object {.final, pure.}
+    MemRegion = object {.final, pure.}
+  {.deprecated: [TMemRegion: MemRegion].}
 
-  proc alloc(r: var TMemRegion, size: int): pointer =
+  proc alloc(r: var MemRegion, size: int): pointer =
     result = boehmAlloc(size)
     if result == nil: raiseOutOfMem()
-  proc alloc0(r: var TMemRegion, size: int): pointer =
+  proc alloc0(r: var MemRegion, size: int): pointer =
     result = alloc(size)
     zeroMem(result, size)
-  proc dealloc(r: var TMemRegion, p: pointer) = boehmDealloc(p)
-  proc deallocOsPages(r: var TMemRegion) {.inline.} = discard
+  proc dealloc(r: var MemRegion, p: pointer) = boehmDealloc(p)
+  proc deallocOsPages(r: var MemRegion) {.inline.} = discard
   proc deallocOsPages() {.inline.} = discard
 
   include "system/cellsets"
+
+elif defined(gogc):
+  when defined(windows):
+    const goLib = "libgo.dll"
+  elif defined(macosx):
+    const goLib = "libgo.dylib"
+  else:
+    const goLib = "libgo.so"
+
+  proc roundup(x, v: int): int {.inline.} =
+    result = (x + (v-1)) and not (v-1)
+
+  proc initGC() = discard
+  # runtime_setgcpercent is only available in GCC 5
+  proc GC_disable() = discard
+  proc GC_enable() = discard
+  proc goRuntimeGC(force: int32) {.importc: "runtime_gc", dynlib: goLib.}
+  proc GC_fullCollect() = goRuntimeGC(2)
+  proc GC_setStrategy(strategy: GC_Strategy) = discard
+  proc GC_enableMarkAndSweep() = discard
+  proc GC_disableMarkAndSweep() = discard
+
+  const
+    goNumSizeClasses = 67
+
+  type
+    cbool {.importc: "_Bool", nodecl.} = bool
+
+    goMStats_inner_struct = object
+        size: uint32
+        nmalloc: uint64
+        nfree: uint64
+
+    goMStats = object
+        # General statistics.
+        alloc: uint64            # bytes allocated and still in use
+        total_alloc: uint64      # bytes allocated (even if freed)
+        sys: uint64              # bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
+        nlookup: uint64          # number of pointer lookups
+        nmalloc: uint64          # number of mallocs
+        nfree: uint64            # number of frees
+        # Statistics about malloc heap.
+        # protected by mheap.Lock
+        heap_alloc: uint64       # bytes allocated and still in use
+        heap_sys: uint64         # bytes obtained from system
+        heap_idle: uint64        # bytes in idle spans
+        heap_inuse: uint64       # bytes in non-idle spans
+        heap_released: uint64    # bytes released to the OS
+        heap_objects: uint64 # total number of allocated objects
+        # Statistics about allocation of low-level fixed-size structures.
+        # Protected by FixAlloc locks.
+        stacks_inuse: uint64     # bootstrap stacks
+        stacks_sys: uint64
+        mspan_inuse: uint64      # MSpan structures
+        mspan_sys: uint64
+        mcache_inuse: uint64     # MCache structures
+        mcache_sys: uint64
+        buckhash_sys: uint64     # profiling bucket hash table
+        gc_sys: uint64
+        other_sys: uint64
+        # Statistics about garbage collector.
+        # Protected by mheap or stopping the world during GC.
+        next_gc: uint64          # next GC (in heap_alloc time)
+        last_gc: uint64          # last GC (in absolute time)
+        pause_total_ns: uint64
+        pause_ns: array[256, uint64]
+        numgc: uint32
+        enablegc: cbool
+        debuggc: cbool
+        # Statistics about allocation size classes.
+        by_size: array[goNumSizeClasses, goMStats_inner_struct]
+
+  proc goRuntime_ReadMemStats(a2: ptr goMStats) {.cdecl,
+    importc: "runtime_ReadMemStats",
+    codegenDecl: "$1 $2$3 __asm__ (\"runtime.ReadMemStats\");\n$1 $2$3",
+    dynlib: goLib.}
+
+  proc GC_getStatistics(): string =
+    var mstats: goMStats
+    goRuntime_ReadMemStats(addr mstats)
+    result = "[GC] total allocated memory: " & $(mstats.total_alloc) & "\n" &
+             "[GC] total memory obtained from system: " & $(mstats.sys) & "\n" &
+             "[GC] occupied memory: " & $(mstats.alloc) & "\n" &
+             "[GC] number of pointer lookups: " & $(mstats.nlookup) & "\n" &
+             "[GC] number of mallocs: " & $(mstats.nmalloc) & "\n" &
+             "[GC] number of frees: " & $(mstats.nfree) & "\n" &
+             "[GC] heap objects: " & $(mstats.heap_objects) & "\n" &
+             "[GC] numgc: " & $(mstats.numgc) & "\n" &
+             "[GC] enablegc: " & $(mstats.enablegc) & "\n" &
+             "[GC] debuggc: " & $(mstats.debuggc) & "\n" &
+             "[GC] total pause time [ms]: " & $(mstats.pause_total_ns div 1000_000)
+
+  proc getOccupiedMem(): int =
+    var mstats: goMStats
+    goRuntime_ReadMemStats(addr mstats)
+    result = int(mstats.alloc)
+
+  proc getFreeMem(): int =
+    var mstats: goMStats
+    goRuntime_ReadMemStats(addr mstats)
+    result = int(mstats.sys - mstats.alloc)
+
+  proc getTotalMem(): int =
+    var mstats: goMStats
+    goRuntime_ReadMemStats(addr mstats)
+    result = int(mstats.sys)
+
+  proc setStackBottom(theStackBottom: pointer) = discard
+
+  proc alloc(size: Natural): pointer =
+    result = cmalloc(size)
+    if result == nil: raiseOutOfMem()
+
+  proc alloc0(size: Natural): pointer =
+    result = alloc(size)
+    zeroMem(result, size)
+
+  proc realloc(p: pointer, newsize: Natural): pointer =
+    result = crealloc(p, newsize)
+    if result == nil: raiseOutOfMem()
+
+  proc dealloc(p: pointer) = cfree(p)
+
+  proc allocShared(size: Natural): pointer =
+    result = cmalloc(size)
+    if result == nil: raiseOutOfMem()
+
+  proc allocShared0(size: Natural): pointer =
+    result = alloc(size)
+    zeroMem(result, size)
+
+  proc reallocShared(p: pointer, newsize: Natural): pointer =
+    result = crealloc(p, newsize)
+    if result == nil: raiseOutOfMem()
+
+  proc deallocShared(p: pointer) = cfree(p)
+
+  when hasThreadSupport:
+    proc getFreeSharedMem(): int = discard
+    proc getTotalSharedMem(): int = discard
+    proc getOccupiedSharedMem(): int = discard
+
+  const goFlagNoZero: uint32 = 1 shl 3
+  proc goRuntimeMallocGC(size: uint, typ: uint, flag: uint32): pointer {.importc: "runtime_mallocgc", dynlib: goLib.}
+  proc goFree(v: pointer) {.importc: "__go_free", dynlib: goLib.}
+
+  proc goSetFinalizer(obj: pointer, f: pointer) {.importc: "set_finalizer", codegenDecl:"$1 $2$3 __asm__ (\"main.Set_finalizer\");\n$1 $2$3", dynlib: goLib.}
+
+  proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
+    result = goRuntimeMallocGC(roundup(size, sizeof(pointer)).uint, 0.uint, 0.uint32)
+    if typ.finalizer != nil:
+      goSetFinalizer(result, typ.finalizer)
+
+  proc newObjNoInit(typ: PNimType, size: int): pointer =
+    result = goRuntimeMallocGC(roundup(size, sizeof(pointer)).uint, 0.uint, goFlagNoZero)
+    if typ.finalizer != nil:
+      goSetFinalizer(result, typ.finalizer)
+
+  proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
+    result = newObj(typ, len * typ.base.size + GenericSeqSize)
+    cast[PGenericSeq](result).len = len
+    cast[PGenericSeq](result).reserved = len
+    cast[PGenericSeq](result).elemSize = typ.base.size
+
+  proc growObj(old: pointer, newsize: int): pointer =
+    # the Go GC doesn't have a realloc
+    var
+      oldsize = cast[PGenericSeq](old).len * cast[PGenericSeq](old).elemSize + GenericSeqSize
+    result = goRuntimeMallocGC(roundup(newsize, sizeof(pointer)).uint, 0.uint, goFlagNoZero)
+    copyMem(result, old, oldsize)
+    zeroMem(cast[pointer](cast[ByteAddress](result) +% oldsize), newsize - oldsize)
+    goFree(old)
+
+  proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
+  proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
+
+  proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+    dest[] = src
+  proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+    dest[] = src
+  proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+    dest[] = src
+
+  type
+    MemRegion = object {.final, pure.}
+  {.deprecated: [TMemRegion: MemRegion].}
+
+  proc alloc(r: var MemRegion, size: int): pointer =
+    result = alloc(size)
+  proc alloc0(r: var MemRegion, size: int): pointer =
+    result = alloc0(size)
+  proc dealloc(r: var MemRegion, p: pointer) = dealloc(p)
+  proc deallocOsPages(r: var MemRegion) {.inline.} = discard
+  proc deallocOsPages() {.inline.} = discard
+
 elif defined(nogc) and defined(useMalloc):
 
   when not defined(useNimRtl):
-    proc alloc(size: int): pointer =
+    proc alloc(size: Natural): pointer =
       result = cmalloc(size)
       if result == nil: raiseOutOfMem()
-    proc alloc0(size: int): pointer =
+    proc alloc0(size: Natural): pointer =
       result = alloc(size)
       zeroMem(result, size)
-    proc realloc(p: pointer, newsize: int): pointer =
+    proc realloc(p: pointer, newsize: Natural): pointer =
       result = crealloc(p, newsize)
       if result == nil: raiseOutOfMem()
     proc dealloc(p: pointer) = cfree(p)
 
-    proc allocShared(size: int): pointer =
+    proc allocShared(size: Natural): pointer =
       result = cmalloc(size)
       if result == nil: raiseOutOfMem()
-    proc allocShared0(size: int): pointer =
+    proc allocShared0(size: Natural): pointer =
       result = alloc(size)
       zeroMem(result, size)
-    proc reallocShared(p: pointer, newsize: int): pointer =
+    proc reallocShared(p: pointer, newsize: Natural): pointer =
       result = crealloc(p, newsize)
       if result == nil: raiseOutOfMem()
     proc deallocShared(p: pointer) = cfree(p)
@@ -257,14 +449,15 @@ elif defined(nogc) and defined(useMalloc):
     dest[] = src
 
   type
-    TMemRegion = object {.final, pure.}
+    MemRegion = object {.final, pure.}
+  {.deprecated: [TMemRegion: MemRegion].}
 
-  proc alloc(r: var TMemRegion, size: int): pointer =
+  proc alloc(r: var MemRegion, size: int): pointer =
     result = alloc(size)
-  proc alloc0(r: var TMemRegion, size: int): pointer =
+  proc alloc0(r: var MemRegion, size: int): pointer =
     result = alloc0(size)
-  proc dealloc(r: var TMemRegion, p: pointer) = dealloc(p)
-  proc deallocOsPages(r: var TMemRegion) {.inline.} = discard
+  proc dealloc(r: var MemRegion, p: pointer) = dealloc(p)
+  proc deallocOsPages(r: var MemRegion) {.inline.} = discard
   proc deallocOsPages() {.inline.} = discard
 
 elif defined(nogc):
@@ -288,7 +481,6 @@ elif defined(nogc):
   proc GC_disableMarkAndSweep() = discard
   proc GC_getStatistics(): string = return ""
 
-
   proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
     result = alloc0(size)
 
@@ -313,7 +505,7 @@ elif defined(nogc):
   proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline.} =
     dest[] = src
 
-  var allocator {.rtlThreadVar.}: TMemRegion
+  var allocator {.rtlThreadVar.}: MemRegion
   instantiateForRegion(allocator)
 
   include "system/cellsets"
@@ -323,7 +515,7 @@ else:
 
   include "system/cellsets"
   when not leakDetector:
-    sysAssert(sizeof(TCell) == sizeof(TFreeCell), "sizeof TFreeCell")
+    sysAssert(sizeof(Cell) == sizeof(FreeCell), "sizeof FreeCell")
   when compileOption("gc", "v2"):
     include "system/gc2"
   elif defined(gcMarkAndSweep):
@@ -335,4 +527,3 @@ else:
     include "system/gc"
 
 {.pop.}
-
diff --git a/lib/system/nimscript.nim b/lib/system/nimscript.nim
new file mode 100644
index 000000000..22430348c
--- /dev/null
+++ b/lib/system/nimscript.nim
@@ -0,0 +1,277 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+
+# Nim's configuration system now uses Nim for scripting. This module provides
+# a few things that are required for this to work.
+
+template builtin = discard
+
+# We know the effects better than the compiler:
+{.push hint[XDeclaredButNotUsed]: off.}
+
+proc listDirs*(dir: string): seq[string] =
+  ## Lists all the subdirectories (non-recursively) in the directory `dir`.
+  builtin
+proc listFiles*(dir: string): seq[string] =
+  ## Lists all the files (non-recursively) in the directory `dir`.
+  builtin
+
+proc removeDir(dir: string){.
+  tags: [ReadIOEffect, WriteIOEffect], raises: [OSError].} = builtin
+proc removeFile(dir: string) {.
+  tags: [ReadIOEffect, WriteIOEffect], raises: [OSError].} = builtin
+proc moveFile(src, dest: string) {.
+  tags: [ReadIOEffect, WriteIOEffect], raises: [OSError].} = builtin
+proc copyFile(src, dest: string) {.
+  tags: [ReadIOEffect, WriteIOEffect], raises: [OSError].} = builtin
+proc createDir(dir: string) {.tags: [WriteIOEffect], raises: [OSError].} =
+  builtin
+proc getOsError: string = builtin
+proc setCurrentDir(dir: string) = builtin
+proc getCurrentDir(): string = builtin
+proc rawExec(cmd: string): int {.tags: [ExecIOEffect], raises: [OSError].} =
+  builtin
+
+proc paramStr*(i: int): string =
+  ## Retrieves the ``i``'th command line parameter.
+  builtin
+
+proc paramCount*(): int =
+  ## Retrieves the number of command line parameters.
+  builtin
+
+proc switch*(key: string, val="") =
+  ## Sets a Nim compiler command line switch, for
+  ## example ``switch("checks", "on")``.
+  builtin
+
+proc getCommand*(): string =
+  ## Gets the Nim command that the compiler has been invoked with, for example
+  ## "c", "js", "build", "help".
+  builtin
+
+proc setCommand*(cmd: string; project="") =
+  ## Sets the Nim command that should be continued with after this Nimscript
+  ## has finished.
+  builtin
+
+proc cmpIgnoreStyle(a, b: string): int = builtin
+proc cmpIgnoreCase(a, b: string): int = builtin
+
+proc cmpic*(a, b: string): int =
+  ## Compares `a` and `b` ignoring case.
+  cmpIgnoreCase(a, b)
+
+proc getEnv*(key: string): string {.tags: [ReadIOEffect].} =
+  ## Retrieves the environment variable of name `key`.
+  builtin
+
+proc existsEnv*(key: string): bool {.tags: [ReadIOEffect].} =
+  ## Checks for the existance of an environment variable named `key`.
+  builtin
+
+proc fileExists*(filename: string): bool {.tags: [ReadIOEffect].} =
+  ## Checks if the file exists.
+  builtin
+
+proc dirExists*(dir: string): bool {.
+  tags: [ReadIOEffect].} =
+  ## Checks if the directory `dir` exists.
+  builtin
+
+proc existsFile*(filename: string): bool =
+  ## An alias for ``fileExists``.
+  fileExists(filename)
+
+proc existsDir*(dir: string): bool =
+  ## An alias for ``dirExists``.
+  dirExists(dir)
+
+proc toExe*(filename: string): string =
+  ## On Windows adds ".exe" to `filename`, else returns `filename` unmodified.
+  (when defined(windows): filename & ".exe" else: filename)
+
+proc toDll*(filename: string): string =
+  ## On Windows adds ".dll" to `filename`, on Posix produces "lib$filename.so".
+  (when defined(windows): filename & ".dll" else: "lib" & filename & ".so")
+
+proc strip(s: string): string =
+  var i = 0
+  while s[i] in {' ', '\c', '\L'}: inc i
+  result = s.substr(i)
+
+template `--`*(key, val: untyped) =
+  ## A shortcut for ``switch(astToStr(key), astToStr(val))``.
+  switch(astToStr(key), strip astToStr(val))
+
+template `--`*(key: untyped) =
+  ## A shortcut for ``switch(astToStr(key)``.
+  switch(astToStr(key), "")
+
+type
+  ScriptMode* {.pure.} = enum ## Controls the behaviour of the script.
+    Silent,                   ## Be silent.
+    Verbose,                  ## Be verbose.
+    Whatif                    ## Do not run commands, instead just echo what
+                              ## would have been done.
+
+var
+  mode*: ScriptMode ## Set this to influence how mkDir, rmDir, rmFile etc.
+                    ## behave
+
+template checkOsError =
+  let err = getOsError()
+  if err.len > 0: raise newException(OSError, err)
+
+template log(msg: string, body: untyped) =
+  if mode == ScriptMode.Verbose or mode == ScriptMode.Whatif:
+    echo "[NimScript] ", msg
+  if mode != ScriptMode.WhatIf:
+    body
+
+proc rmDir*(dir: string) {.raises: [OSError].} =
+  ## Removes the directory `dir`.
+  log "rmDir: " & dir:
+    removeDir dir
+    checkOsError()
+
+proc rmFile*(file: string) {.raises: [OSError].} =
+  ## Removes the `file`.
+  log "rmFile: " & file:
+    removeFile file
+    checkOsError()
+
+proc mkDir*(dir: string) {.raises: [OSError].} =
+  ## Creates the directory `dir` including all necessary subdirectories. If
+  ## the directory already exists, no error is raised.
+  log "mkDir: " & dir:
+    createDir dir
+    checkOsError()
+
+proc mvFile*(`from`, to: string) {.raises: [OSError].} =
+  ## Moves the file `from` to `to`.
+  log "mvFile: " & `from` & ", " & to:
+    moveFile `from`, to
+    checkOsError()
+
+proc cpFile*(`from`, to: string) {.raises: [OSError].} =
+  ## Copies the file `from` to `to`.
+  log "mvFile: " & `from` & ", " & to:
+    copyFile `from`, to
+    checkOsError()
+
+proc exec*(command: string) =
+  ## Executes an external process.
+  log "exec: " & command:
+    if rawExec(command) != 0:
+      raise newException(OSError, "FAILED: " & command)
+    checkOsError()
+
+proc exec*(command: string, input: string, cache = "") {.
+  raises: [OSError], tags: [ExecIOEffect].} =
+  ## Executes an external process.
+  log "exec: " & command:
+    echo staticExec(command, input, cache)
+
+proc put*(key, value: string) =
+  ## Sets a configuration 'key' like 'gcc.options.always' to its value.
+  builtin
+
+proc get*(key: string): string =
+  ## Retrieves a configuration 'key' like 'gcc.options.always'.
+  builtin
+
+proc exists*(key: string): bool =
+  ## Checks for the existance of a configuration 'key'
+  ## like 'gcc.options.always'.
+  builtin
+
+proc nimcacheDir*(): string =
+  ## Retrieves the location of 'nimcache'.
+  builtin
+
+proc thisDir*(): string =
+  ## Retrieves the location of the current ``nims`` script file.
+  builtin
+
+proc cd*(dir: string) {.raises: [OSError].} =
+  ## Changes the current directory.
+  ##
+  ## The change is permanent for the rest of the execution, since this is just
+  ## a shortcut for `os.setCurrentDir()
+  ## <http://nim-lang.org/os.html#setCurrentDir,string>`_ . Use the `withDir()
+  ## <#withDir>`_ template if you want to perform a temporary change only.
+  setCurrentDir(dir)
+  checkOsError()
+
+template withDir*(dir: string; body: untyped): untyped =
+  ## Changes the current directory temporarily.
+  ##
+  ## If you need a permanent change, use the `cd() <#cd>`_ proc. Usage example:
+  ##
+  ## .. code-block:: nim
+  ##   withDir "foo":
+  ##     # inside foo
+  ##   #back to last dir
+  var curDir = getCurrentDir()
+  try:
+    cd(dir)
+    body
+  finally:
+    cd(curDir)
+
+template `==?`(a, b: string): bool = cmpIgnoreStyle(a, b) == 0
+
+proc writeTask(name, desc: string) =
+  if desc.len > 0:
+    var spaces = " "
+    for i in 0 ..< 20 - name.len: spaces.add ' '
+    echo name, spaces, desc
+
+template task*(name: untyped; description: string; body: untyped): untyped =
+  ## Defines a task. Hidden tasks are supported via an empty description.
+  ## Example:
+  ##
+  ## .. code-block:: nim
+  ##  task build, "default build is via the C backend":
+  ##    setCommand "c"
+  proc `name Task`() = body
+
+  let cmd = getCommand()
+  if cmd.len == 0 or cmd ==? "help":
+    setCommand "help"
+    writeTask(astToStr(name), description)
+  elif cmd ==? astToStr(name):
+    setCommand "nop"
+    `name Task`()
+
+var
+  packageName* = ""    ## Nimble support: Set this to the package name. It
+                       ## is usually not required to do that, nims' filename is
+                       ## the default.
+  version*: string     ## Nimble support: The package's version.
+  author*: string      ## Nimble support: The package's author.
+  description*: string ## Nimble support: The package's description.
+  license*: string     ## Nimble support: The package's license.
+  srcdir*: string      ## Nimble support: The package's source directory.
+  binDir*: string      ## Nimble support: The package's binary directory.
+  backend*: string     ## Nimble support: The package's backend.
+
+  skipDirs*, skipFiles*, skipExt*, installDirs*, installFiles*,
+    installExt*, bin*: seq[string] = @[] ## Nimble metadata.
+  requiresData*: seq[string] = @[] ## Exposes the list of requirements for read
+                                   ## and write accesses.
+
+proc requires*(deps: varargs[string]) =
+  ## Nimble support: Call this to set the list of requirements of your Nimble
+  ## package.
+  for d in deps: requiresData.add(d)
+
+{.pop.}
diff --git a/lib/system/platforms.nim b/lib/system/platforms.nim
index 47a01d5fe..eeada5c51 100644
--- a/lib/system/platforms.nim
+++ b/lib/system/platforms.nim
@@ -18,13 +18,17 @@ type
     alpha,                     ## Alpha processor
     powerpc,                   ## 32 bit PowerPC
     powerpc64,                 ## 64 bit PowerPC
+    powerpc64el,               ## Little Endian 64 bit PowerPC
     sparc,                     ## Sparc based processor
     ia64,                      ## Intel Itanium
     amd64,                     ## x86_64 (AMD64); 64 bit x86 compatible CPU
     mips,                      ## Mips based processor
+    mipsel,                    ## Little Endian Mips based processor
     arm,                       ## ARM based processor
+    arm64,                     ## ARM64 based processor
     vm,                        ## Some Virtual machine: Nim's VM or JavaScript
     avr                        ## AVR based processor
+    msp430                     ## TI MSP430 microcontroller
 
   OsPlatform* {.pure.} = enum ## the OS this program will run on.
     none, dos, windows, os2, linux, morphos, skyos, solaris,
@@ -63,12 +67,16 @@ const
                elif defined(alpha): CpuPlatform.alpha
                elif defined(powerpc): CpuPlatform.powerpc
                elif defined(powerpc64): CpuPlatform.powerpc64
+               elif defined(powerpc64el): CpuPlatform.powerpc64el
                elif defined(sparc): CpuPlatform.sparc
                elif defined(ia64): CpuPlatform.ia64
                elif defined(amd64): CpuPlatform.amd64
                elif defined(mips): CpuPlatform.mips
+               elif defined(mipsel): CpuPlatform.mipsel
                elif defined(arm): CpuPlatform.arm
+               elif defined(arm64): CpuPlatform.arm64
                elif defined(vm): CpuPlatform.vm
                elif defined(avr): CpuPlatform.avr
+               elif defined(msp430): CpuPlatform.msp430
                else: CpuPlatform.none
     ## the CPU this program will run on.
diff --git a/lib/system/profiler.nim b/lib/system/profiler.nim
index 6d6863caa..4f600417e 100644
--- a/lib/system/profiler.nim
+++ b/lib/system/profiler.nim
@@ -19,10 +19,11 @@ const
   MaxTraceLen = 20 # tracking the last 20 calls is enough
 
 type
-  TStackTrace* = array [0..MaxTraceLen-1, cstring]
-  TProfilerHook* = proc (st: TStackTrace) {.nimcall.}
+  StackTrace* = array [0..MaxTraceLen-1, cstring]
+  ProfilerHook* = proc (st: StackTrace) {.nimcall.}
+{.deprecated: [TStackTrace: StackTrace, TProfilerHook: ProfilerHook].}
 
-proc captureStackTrace(f: PFrame, st: var TStackTrace) =
+proc captureStackTrace(f: PFrame, st: var StackTrace) =
   const
     firstCalls = 5
   var
@@ -39,7 +40,7 @@ proc captureStackTrace(f: PFrame, st: var TStackTrace) =
   while it != nil:
     inc(total)
     it = it.prev
-  for j in 1..total-i-(firstCalls-1): 
+  for j in 1..total-i-(firstCalls-1):
     if b != nil: b = b.prev
   if total != i:
     st[i] = "..."
@@ -51,14 +52,15 @@ proc captureStackTrace(f: PFrame, st: var TStackTrace) =
 
 when defined(memProfiler):
   type
-    TMemProfilerHook* = proc (st: TStackTrace, requestedSize: int) {.nimcall, benign.}
+    MemProfilerHook* = proc (st: StackTrace, requestedSize: int) {.nimcall, benign.}
+  {.deprecated: [TMemProfilerHook: MemProfilerHook].}
   var
-    profilerHook*: TMemProfilerHook
+    profilerHook*: MemProfilerHook
       ## set this variable to provide a procedure that implements a profiler in
       ## user space. See the `nimprof` module for a reference implementation.
 
-  proc callProfilerHook(hook: TMemProfilerHook, requestedSize: int) =
-    var st: TStackTrace
+  proc callProfilerHook(hook: MemProfilerHook, requestedSize: int) =
+    var st: StackTrace
     captureStackTrace(framePtr, st)
     hook(st, requestedSize)
 
@@ -70,15 +72,15 @@ else:
     SamplingInterval = 50_000
       # set this to change the default sampling interval
   var
-    profilerHook*: TProfilerHook
+    profilerHook*: ProfilerHook
       ## set this variable to provide a procedure that implements a profiler in
       ## user space. See the `nimprof` module for a reference implementation.
     gTicker {.threadvar.}: int
 
-  proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
+  proc callProfilerHook(hook: ProfilerHook) {.noinline.} =
     # 'noinline' so that 'nimProfile' does not perform the stack allocation
     # in the common case.
-    var st: TStackTrace
+    var st: StackTrace
     captureStackTrace(framePtr, st)
     hook(st)
 
diff --git a/lib/system/repr.nim b/lib/system/repr.nim
index f1029ff6a..1f81a0813 100644
--- a/lib/system/repr.nim
+++ b/lib/system/repr.nim
@@ -21,9 +21,22 @@ proc reprPointer(x: pointer): string {.compilerproc.} =
   return $buf
 
 proc `$`(x: uint64): string =
-  var buf: array [0..59, char]
-  discard c_sprintf(buf, "%llu", x)
-  return $buf
+  if x == 0:
+    result = "0"
+  else:
+    var buf: array [60, char]
+    var i = 0
+    var n = x
+    while n != 0:
+      let nn = n div 10'u64
+      buf[i] = char(n - 10'u64 * nn + ord('0'))
+      inc i
+      n = nn
+
+    let half = i div 2
+    # Reverse
+    for t in 0 .. < half: swap(buf[t], buf[i-t-1])
+    result = $buf
 
 proc reprStrAux(result: var string, s: string) =
   if cast[pointer](s) == nil:
@@ -121,38 +134,39 @@ proc reprSet(p: pointer, typ: PNimType): string {.compilerRtl.} =
   reprSetAux(result, p, typ)
 
 type
-  TReprClosure {.final.} = object # we cannot use a global variable here
+  ReprClosure {.final.} = object # we cannot use a global variable here
                                   # as this wouldn't be thread-safe
-    when declared(TCellSet):
-      marked: TCellSet
+    when declared(CellSet):
+      marked: CellSet
     recdepth: int       # do not recurse endlessly
     indent: int         # indentation
+{.deprecated: [TReprClosure: ReprClosure].}
 
 when not defined(useNimRtl):
-  proc initReprClosure(cl: var TReprClosure) =
+  proc initReprClosure(cl: var ReprClosure) =
     # Important: cellsets does not lock the heap when doing allocations! We
     # have to do it here ...
     when hasThreadSupport and hasSharedHeap and declared(heapLock):
       AcquireSys(HeapLock)
-    when declared(TCellSet):
+    when declared(CellSet):
       init(cl.marked)
     cl.recdepth = -1      # default is to display everything!
     cl.indent = 0
 
-  proc deinitReprClosure(cl: var TReprClosure) =
-    when declared(TCellSet): deinit(cl.marked)
-    when hasThreadSupport and hasSharedHeap and declared(heapLock): 
+  proc deinitReprClosure(cl: var ReprClosure) =
+    when declared(CellSet): deinit(cl.marked)
+    when hasThreadSupport and hasSharedHeap and declared(heapLock):
       ReleaseSys(HeapLock)
 
-  proc reprBreak(result: var string, cl: TReprClosure) =
+  proc reprBreak(result: var string, cl: ReprClosure) =
     add result, "\n"
     for i in 0..cl.indent-1: add result, ' '
 
   proc reprAux(result: var string, p: pointer, typ: PNimType,
-               cl: var TReprClosure) {.benign.}
+               cl: var ReprClosure) {.benign.}
 
   proc reprArray(result: var string, p: pointer, typ: PNimType,
-                 cl: var TReprClosure) =
+                 cl: var ReprClosure) =
     add result, "["
     var bs = typ.base.size
     for i in 0..typ.size div bs - 1:
@@ -161,7 +175,7 @@ when not defined(useNimRtl):
     add result, "]"
 
   proc reprSequence(result: var string, p: pointer, typ: PNimType,
-                    cl: var TReprClosure) =
+                    cl: var ReprClosure) =
     if p == nil:
       add result, "nil"
       return
@@ -174,7 +188,7 @@ when not defined(useNimRtl):
     add result, "]"
 
   proc reprRecordAux(result: var string, p: pointer, n: ptr TNimNode,
-                     cl: var TReprClosure) {.benign.} =
+                     cl: var ReprClosure) {.benign.} =
     case n.kind
     of nkNone: sysAssert(false, "reprRecordAux")
     of nkSlot:
@@ -191,20 +205,26 @@ when not defined(useNimRtl):
       if m != nil: reprRecordAux(result, p, m, cl)
 
   proc reprRecord(result: var string, p: pointer, typ: PNimType,
-                  cl: var TReprClosure) =
+                  cl: var ReprClosure) =
     add result, "["
-    let oldLen = result.len
-    reprRecordAux(result, p, typ.node, cl)
-    if typ.base != nil: 
-      if oldLen != result.len: add result, ",\n"
-      reprRecordAux(result, p, typ.base.node, cl)
+    var curTyp = typ
+    var first = true
+    while curTyp != nil:
+      var part = ""
+      reprRecordAux(part, p, curTyp.node, cl)
+      if part.len > 0:
+        if not first:
+          add result, ",\n"
+        add result, part
+        first = false
+      curTyp = curTyp.base
     add result, "]"
 
   proc reprRef(result: var string, p: pointer, typ: PNimType,
-               cl: var TReprClosure) =
+               cl: var ReprClosure) =
     # we know that p is not nil here:
-    when declared(TCellSet):
-      when defined(boehmGC) or defined(nogc):
+    when declared(CellSet):
+      when defined(boehmGC) or defined(gogc) or defined(nogc):
         var cell = cast[PCell](p)
       else:
         var cell = usrToCell(p)
@@ -216,7 +236,7 @@ when not defined(useNimRtl):
         reprAux(result, p, typ.base, cl)
 
   proc reprAux(result: var string, p: pointer, typ: PNimType,
-               cl: var TReprClosure) =
+               cl: var ReprClosure) =
     if cl.recdepth == 0:
       add result, "..."
       return
@@ -225,7 +245,7 @@ when not defined(useNimRtl):
     of tySet: reprSetAux(result, p, typ)
     of tyArray, tyArrayConstr: reprArray(result, p, typ, cl)
     of tyTuple: reprRecord(result, p, typ, cl)
-    of tyObject: 
+    of tyObject:
       var t = cast[ptr PNimType](p)[]
       reprRecord(result, p, t, cl)
     of tyRef, tyPtr:
@@ -241,7 +261,7 @@ when not defined(useNimRtl):
     of tyInt64: add result, $(cast[ptr int64](p)[])
     of tyUInt8: add result, $ze(cast[ptr int8](p)[])
     of tyUInt16: add result, $ze(cast[ptr int16](p)[])
-    
+
     of tyFloat: add result, $(cast[ptr float](p)[])
     of tyFloat32: add result, $(cast[ptr float32](p)[])
     of tyFloat64: add result, $(cast[ptr float64](p)[])
@@ -249,7 +269,10 @@ when not defined(useNimRtl):
     of tyBool: add result, reprBool(cast[ptr bool](p)[])
     of tyChar: add result, reprChar(cast[ptr char](p)[])
     of tyString: reprStrAux(result, cast[ptr string](p)[])
-    of tyCString: reprStrAux(result, $(cast[ptr cstring](p)[]))
+    of tyCString:
+      let cs = cast[ptr cstring](p)[]
+      if cs.isNil: add result, "nil"
+      else: reprStrAux(result, $cs)
     of tyRange: reprAux(result, p, typ.base, cl)
     of tyProc, tyPointer:
       if cast[PPointer](p)[] == nil: add result, "nil"
@@ -261,7 +284,7 @@ when not defined(useNimRtl):
 proc reprOpenArray(p: pointer, length: int, elemtyp: PNimType): string {.
                    compilerRtl.} =
   var
-    cl: TReprClosure
+    cl: ReprClosure
   initReprClosure(cl)
   result = "["
   var bs = elemtyp.size
@@ -274,7 +297,7 @@ proc reprOpenArray(p: pointer, length: int, elemtyp: PNimType): string {.
 when not defined(useNimRtl):
   proc reprAny(p: pointer, typ: PNimType): string =
     var
-      cl: TReprClosure
+      cl: ReprClosure
     initReprClosure(cl)
     result = ""
     if typ.kind in {tyObject, tyTuple, tyArray, tyArrayConstr, tySet}:
@@ -284,4 +307,3 @@ when not defined(useNimRtl):
       reprAux(result, addr(p), typ, cl)
     add result, "\n"
     deinitReprClosure(cl)
-
diff --git a/lib/system/sets.nim b/lib/system/sets.nim
index 7826fd4b7..66877de30 100644
--- a/lib/system/sets.nim
+++ b/lib/system/sets.nim
@@ -10,7 +10,8 @@
 # set handling
 
 type
-  TNimSet = array [0..4*2048-1, uint8]
+  NimSet = array [0..4*2048-1, uint8]
+{.deprecated: [TNimSet: NimSet].}
 
 proc countBits32(n: int32): int {.compilerproc.} =
   var v = n
@@ -19,10 +20,10 @@ proc countBits32(n: int32): int {.compilerproc.} =
   result = ((v +% (v shr 4'i32) and 0xF0F0F0F'i32) *% 0x1010101'i32) shr 24'i32
 
 proc countBits64(n: int64): int {.compilerproc.} =
-  result = countBits32(toU32(n and 0xffff'i64)) +
-           countBits32(toU32(n shr 16'i64))
+  result = countBits32(toU32(n and 0xffffffff'i64)) +
+           countBits32(toU32(n shr 32'i64))
 
-proc cardSet(s: TNimSet, len: int): int {.compilerproc.} =
+proc cardSet(s: NimSet, len: int): int {.compilerproc.} =
   result = 0
   for i in countup(0, len-1):
     inc(result, countBits32(int32(s[i])))
diff --git a/lib/system/sysio.nim b/lib/system/sysio.nim
index 468af1713..3d0b2aa8a 100644
--- a/lib/system/sysio.nim
+++ b/lib/system/sysio.nim
@@ -16,7 +16,7 @@
                        # of the standard library!
 
 
-proc fputs(c: cstring, f: File) {.importc: "fputs", header: "<stdio.h>", 
+proc fputs(c: cstring, f: File) {.importc: "fputs", header: "<stdio.h>",
   tags: [WriteIOEffect].}
 proc fgets(c: cstring, n: int, f: File): cstring {.
   importc: "fgets", header: "<stdio.h>", tags: [ReadIOEffect].}
@@ -26,20 +26,24 @@ proc ungetc(c: cint, f: File) {.importc: "ungetc", header: "<stdio.h>",
   tags: [].}
 proc putc(c: char, stream: File) {.importc: "putc", header: "<stdio.h>",
   tags: [WriteIOEffect].}
-proc fprintf(f: File, frmt: cstring) {.importc: "fprintf", 
+proc fprintf(f: File, frmt: cstring) {.importc: "fprintf",
   header: "<stdio.h>", varargs, tags: [WriteIOEffect].}
 proc strlen(c: cstring): int {.
   importc: "strlen", header: "<string.h>", tags: [].}
 
-
 # C routine that is used here:
 proc fread(buf: pointer, size, n: int, f: File): int {.
   importc: "fread", header: "<stdio.h>", tags: [ReadIOEffect].}
 proc fseek(f: File, offset: clong, whence: int): int {.
   importc: "fseek", header: "<stdio.h>", tags: [].}
 proc ftell(f: File): int {.importc: "ftell", header: "<stdio.h>", tags: [].}
+proc ferror(f: File): int {.importc: "ferror", header: "<stdio.h>", tags: [].}
 proc setvbuf(stream: File, buf: pointer, typ, size: cint): cint {.
   importc, header: "<stdio.h>", tags: [].}
+proc memchr(s: pointer, c: cint, n: csize): pointer {.
+  importc: "memchr", header: "<string.h>", tags: [].}
+proc memset(s: pointer, c: cint, n: csize) {.
+  header: "<string.h>", importc: "memset", tags: [].}
 
 {.push stackTrace:off, profiler:off.}
 proc write(f: File, c: cstring) = fputs(c, f)
@@ -68,38 +72,60 @@ proc raiseEIO(msg: string) {.noinline, noreturn.} =
   sysFatal(IOError, msg)
 
 proc readLine(f: File, line: var TaintedString): bool =
-  # of course this could be optimized a bit; but IO is slow anyway...
-  # and it was difficult to get this CORRECT with Ansi C's methods
-  setLen(line.string, 0) # reuse the buffer!
+  var pos = 0
+  # Use the currently reserved space for a first try
+  when defined(nimscript):
+    var space = 80
+  else:
+    var space = cast[PGenericSeq](line.string).space
+  line.string.setLen(space)
+
   while true:
-    var c = fgetc(f)
-    if c < 0'i32:
-      if line.len > 0: break
-      else: return false
-    if c == 10'i32: break # LF
-    if c == 13'i32:  # CR
-      c = fgetc(f) # is the next char LF?
-      if c != 10'i32: ungetc(c, f) # no, put the character back
-      break
-    add line.string, chr(int(c))
-  result = true
+    # memset to \l so that we can tell how far fgets wrote, even on EOF, where
+    # fgets doesn't append an \l
+    memset(addr line.string[pos], '\l'.ord, space)
+    if fgets(addr line.string[pos], space, f) == nil:
+      line.string.setLen(0)
+      return false
+    let m = memchr(addr line.string[pos], '\l'.ord, space)
+    if m != nil:
+      # \l found: Could be our own or the one by fgets, in any case, we're done
+      var last = cast[ByteAddress](m) - cast[ByteAddress](addr line.string[0])
+      if last > 0 and line.string[last-1] == '\c':
+        line.string.setLen(last-1)
+        return true
+        # We have to distinguish between two possible cases:
+        # \0\l\0 => line ending in a null character.
+        # \0\l\l => last line without newline, null was put there by fgets.
+      elif last > 0 and line.string[last-1] == '\0':
+        if last < pos + space - 1 and line.string[last+1] != '\0':
+          dec last
+      line.string.setLen(last)
+      return true
+    else:
+      # fgets will have inserted a null byte at the end of the string.
+      dec space
+    # No \l found: Increase buffer and read more
+    inc pos, space
+    space = 128 # read in 128 bytes at a time
+    line.string.setLen(pos+space)
 
 proc readLine(f: File): TaintedString =
   result = TaintedString(newStringOfCap(80))
   if not readLine(f, result): raiseEIO("EOF reached")
 
-proc write(f: File, i: int) = 
+proc write(f: File, i: int) =
   when sizeof(int) == 8:
     fprintf(f, "%lld", i)
   else:
     fprintf(f, "%ld", i)
 
-proc write(f: File, i: BiggestInt) = 
+proc write(f: File, i: BiggestInt) =
   when sizeof(BiggestInt) == 8:
     fprintf(f, "%lld", i)
   else:
     fprintf(f, "%ld", i)
-    
+
 proc write(f: File, b: bool) =
   if b: write(f, "true")
   else: write(f, "false")
@@ -110,7 +136,7 @@ proc write(f: File, c: char) = putc(c, f)
 proc write(f: File, a: varargs[string, `$`]) =
   for x in items(a): write(f, x)
 
-proc readAllBuffer(file: File): string = 
+proc readAllBuffer(file: File): string =
   # This proc is for File we want to read but don't know how many
   # bytes we need to read before the buffer is empty.
   result = ""
@@ -123,8 +149,8 @@ proc readAllBuffer(file: File): string =
       buffer.setLen(bytesRead)
       result.add(buffer)
       break
-  
-proc rawFileSize(file: File): int = 
+
+proc rawFileSize(file: File): int =
   # this does not raise an error opposed to `getFileSize`
   var oldPos = ftell(file)
   discard fseek(file, 0, 2) # seek the end of the file
@@ -134,23 +160,34 @@ proc rawFileSize(file: File): int =
 proc readAllFile(file: File, len: int): string =
   # We acquire the filesize beforehand and hope it doesn't change.
   # Speeds things up.
-  result = newString(int(len))
-  if readBuffer(file, addr(result[0]), int(len)) != len:
+  result = newString(len)
+  let bytes = readBuffer(file, addr(result[0]), len)
+  if endOfFile(file):
+    if bytes < len:
+      result.setLen(bytes)
+  elif ferror(file) != 0:
     raiseEIO("error while reading from file")
+  else:
+    # We read all the bytes but did not reach the EOF
+    # Try to read it as a buffer
+    result.add(readAllBuffer(file))
 
 proc readAllFile(file: File): string =
   var len = rawFileSize(file)
   result = readAllFile(file, len)
-  
-proc readAll(file: File): TaintedString = 
+
+proc readAll(file: File): TaintedString =
   # Separate handling needed because we need to buffer when we
   # don't know the overall length of the File.
-  let len = if file != stdin: rawFileSize(file) else: -1
+  when declared(stdin):
+    let len = if file != stdin: rawFileSize(file) else: -1
+  else:
+    let len = rawFileSize(file)
   if len > 0:
     result = readAllFile(file, len).TaintedString
   else:
     result = readAllBuffer(file).TaintedString
-  
+
 proc readFile(filename: string): TaintedString =
   var f = open(filename)
   try:
@@ -171,12 +208,19 @@ proc endOfFile(f: File): bool =
   ungetc(c, f)
   return c < 0'i32
 
-proc writeln[Ty](f: File, x: varargs[Ty, `$`]) =
-  for i in items(x): write(f, i)
+proc writeLn[Ty](f: File, x: varargs[Ty, `$`]) =
+  for i in items(x):
+    write(f, i)
+  write(f, "\n")
+
+proc writeLine[Ty](f: File, x: varargs[Ty, `$`]) =
+  for i in items(x):
+    write(f, i)
   write(f, "\n")
 
-proc rawEcho(x: string) {.inline, compilerproc.} = write(stdout, x)
-proc rawEchoNL() {.inline, compilerproc.} = write(stdout, "\n")
+when declared(stdout):
+  proc rawEcho(x: string) {.inline, compilerproc.} = write(stdout, x)
+  proc rawEchoNL() {.inline, compilerproc.} = write(stdout, "\n")
 
 # interface to the C procs:
 
@@ -229,7 +273,7 @@ proc open(f: var File, filename: string,
     elif bufSize == 0:
       discard setvbuf(f, nil, IONBF, 0)
 
-proc reopen(f: File, filename: string, mode: FileMode = fmRead): bool = 
+proc reopen(f: File, filename: string, mode: FileMode = fmRead): bool =
   var p: pointer = freopen(filename, FormatOpen[mode], f)
   result = p != nil
 
@@ -243,23 +287,23 @@ proc open(f: var File, filehandle: FileHandle, mode: FileMode): bool =
 proc fwrite(buf: pointer, size, n: int, f: File): int {.
   importc: "fwrite", noDecl.}
 
-proc readBuffer(f: File, buffer: pointer, len: int): int =
+proc readBuffer(f: File, buffer: pointer, len: Natural): int =
   result = fread(buffer, 1, len, f)
 
-proc readBytes(f: File, a: var openArray[int8|uint8], start, len: int): int =
+proc readBytes(f: File, a: var openArray[int8|uint8], start, len: Natural): int =
   result = readBuffer(f, addr(a[start]), len)
 
-proc readChars(f: File, a: var openArray[char], start, len: int): int =
+proc readChars(f: File, a: var openArray[char], start, len: Natural): int =
   result = readBuffer(f, addr(a[start]), len)
 
 {.push stackTrace:off, profiler:off.}
-proc writeBytes(f: File, a: openArray[int8|uint8], start, len: int): int =
+proc writeBytes(f: File, a: openArray[int8|uint8], start, len: Natural): int =
   var x = cast[ptr array[0..1000_000_000, int8]](a)
   result = writeBuffer(f, addr(x[start]), len)
-proc writeChars(f: File, a: openArray[char], start, len: int): int =
+proc writeChars(f: File, a: openArray[char], start, len: Natural): int =
   var x = cast[ptr array[0..1000_000_000, int8]](a)
   result = writeBuffer(f, addr(x[start]), len)
-proc writeBuffer(f: File, buffer: pointer, len: int): int =
+proc writeBuffer(f: File, buffer: pointer, len: Natural): int =
   result = fwrite(buffer, 1, len, f)
 
 proc write(f: File, s: string) =
diff --git a/lib/system/syslocks.nim b/lib/system/syslocks.nim
index 8b38f34f3..7a113b9d4 100644
--- a/lib/system/syslocks.nim
+++ b/lib/system/syslocks.nim
@@ -11,8 +11,8 @@
 
 when defined(Windows):
   type
-    THandle = int
-    TSysLock {.final, pure.} = object # CRITICAL_SECTION in WinApi
+    Handle = int
+    SysLock {.final, pure.} = object # CRITICAL_SECTION in WinApi
       DebugInfo: pointer
       LockCount: int32
       RecursionCount: int32
@@ -20,85 +20,87 @@ when defined(Windows):
       LockSemaphore: int
       Reserved: int32
 
-    TSysCond = THandle
-          
-  proc initSysLock(L: var TSysLock) {.stdcall, noSideEffect,
+    SysCond = Handle
+
+  {.deprecated: [THandle: Handle, TSysLock: SysLock, TSysCond: SysCond].}
+
+  proc initSysLock(L: var SysLock) {.stdcall, noSideEffect,
     dynlib: "kernel32", importc: "InitializeCriticalSection".}
     ## Initializes the lock `L`.
 
-  proc tryAcquireSysAux(L: var TSysLock): int32 {.stdcall, noSideEffect,
+  proc tryAcquireSysAux(L: var SysLock): int32 {.stdcall, noSideEffect,
     dynlib: "kernel32", importc: "TryEnterCriticalSection".}
     ## Tries to acquire the lock `L`.
-    
-  proc tryAcquireSys(L: var TSysLock): bool {.inline.} = 
+
+  proc tryAcquireSys(L: var SysLock): bool {.inline.} =
     result = tryAcquireSysAux(L) != 0'i32
 
-  proc acquireSys(L: var TSysLock) {.stdcall, noSideEffect,
+  proc acquireSys(L: var SysLock) {.stdcall, noSideEffect,
     dynlib: "kernel32", importc: "EnterCriticalSection".}
     ## Acquires the lock `L`.
-    
-  proc releaseSys(L: var TSysLock) {.stdcall, noSideEffect,
+
+  proc releaseSys(L: var SysLock) {.stdcall, noSideEffect,
     dynlib: "kernel32", importc: "LeaveCriticalSection".}
     ## Releases the lock `L`.
 
-  proc deinitSys(L: var TSysLock) {.stdcall, noSideEffect,
+  proc deinitSys(L: var SysLock) {.stdcall, noSideEffect,
     dynlib: "kernel32", importc: "DeleteCriticalSection".}
 
-  proc createEvent(lpEventAttributes: pointer, 
+  proc createEvent(lpEventAttributes: pointer,
                    bManualReset, bInitialState: int32,
-                   lpName: cstring): TSysCond {.stdcall, noSideEffect,
+                   lpName: cstring): SysCond {.stdcall, noSideEffect,
     dynlib: "kernel32", importc: "CreateEventA".}
-  
-  proc closeHandle(hObject: THandle) {.stdcall, noSideEffect,
+
+  proc closeHandle(hObject: Handle) {.stdcall, noSideEffect,
     dynlib: "kernel32", importc: "CloseHandle".}
-  proc waitForSingleObject(hHandle: THandle, dwMilliseconds: int32): int32 {.
+  proc waitForSingleObject(hHandle: Handle, dwMilliseconds: int32): int32 {.
     stdcall, dynlib: "kernel32", importc: "WaitForSingleObject", noSideEffect.}
 
-  proc signalSysCond(hEvent: TSysCond) {.stdcall, noSideEffect,
+  proc signalSysCond(hEvent: SysCond) {.stdcall, noSideEffect,
     dynlib: "kernel32", importc: "SetEvent".}
-  
-  proc initSysCond(cond: var TSysCond) {.inline.} =
+
+  proc initSysCond(cond: var SysCond) {.inline.} =
     cond = createEvent(nil, 0'i32, 0'i32, nil)
-  proc deinitSysCond(cond: var TSysCond) {.inline.} =
+  proc deinitSysCond(cond: var SysCond) {.inline.} =
     closeHandle(cond)
-  proc waitSysCond(cond: var TSysCond, lock: var TSysLock) =
+  proc waitSysCond(cond: var SysCond, lock: var SysLock) =
     releaseSys(lock)
     discard waitForSingleObject(cond, -1'i32)
     acquireSys(lock)
 
-  proc waitSysCondWindows(cond: var TSysCond) =
+  proc waitSysCondWindows(cond: var SysCond) =
     discard waitForSingleObject(cond, -1'i32)
 
 else:
   type
-    TSysLock {.importc: "pthread_mutex_t", pure, final,
+    SysLock {.importc: "pthread_mutex_t", pure, final,
                header: "<sys/types.h>".} = object
-    TSysCond {.importc: "pthread_cond_t", pure, final,
+    SysCond {.importc: "pthread_cond_t", pure, final,
                header: "<sys/types.h>".} = object
 
-  proc initSysLock(L: var TSysLock, attr: pointer = nil) {.
+  proc initSysLock(L: var SysLock, attr: pointer = nil) {.
     importc: "pthread_mutex_init", header: "<pthread.h>", noSideEffect.}
 
-  proc acquireSys(L: var TSysLock) {.noSideEffect,
+  proc acquireSys(L: var SysLock) {.noSideEffect,
     importc: "pthread_mutex_lock", header: "<pthread.h>".}
-  proc tryAcquireSysAux(L: var TSysLock): cint {.noSideEffect,
+  proc tryAcquireSysAux(L: var SysLock): cint {.noSideEffect,
     importc: "pthread_mutex_trylock", header: "<pthread.h>".}
 
-  proc tryAcquireSys(L: var TSysLock): bool {.inline.} = 
+  proc tryAcquireSys(L: var SysLock): bool {.inline.} =
     result = tryAcquireSysAux(L) == 0'i32
 
-  proc releaseSys(L: var TSysLock) {.noSideEffect,
+  proc releaseSys(L: var SysLock) {.noSideEffect,
     importc: "pthread_mutex_unlock", header: "<pthread.h>".}
-  proc deinitSys(L: var TSysLock) {.noSideEffect,
+  proc deinitSys(L: var SysLock) {.noSideEffect,
     importc: "pthread_mutex_destroy", header: "<pthread.h>".}
 
-  proc initSysCond(cond: var TSysCond, cond_attr: pointer = nil) {.
+  proc initSysCond(cond: var SysCond, cond_attr: pointer = nil) {.
     importc: "pthread_cond_init", header: "<pthread.h>", noSideEffect.}
-  proc waitSysCond(cond: var TSysCond, lock: var TSysLock) {.
+  proc waitSysCond(cond: var SysCond, lock: var SysLock) {.
     importc: "pthread_cond_wait", header: "<pthread.h>", noSideEffect.}
-  proc signalSysCond(cond: var TSysCond) {.
+  proc signalSysCond(cond: var SysCond) {.
     importc: "pthread_cond_signal", header: "<pthread.h>", noSideEffect.}
-  
-  proc deinitSysCond(cond: var TSysCond) {.noSideEffect,
+
+  proc deinitSysCond(cond: var SysCond) {.noSideEffect,
     importc: "pthread_cond_destroy", header: "<pthread.h>".}
-  
+
diff --git a/lib/system/sysspawn.nim b/lib/system/sysspawn.nim
index 6f45f1509..7aef86df9 100644
--- a/lib/system/sysspawn.nim
+++ b/lib/system/sysspawn.nim
@@ -9,7 +9,7 @@
 
 ## Implements Nim's 'spawn'.
 
-when not declared(NimString): 
+when not declared(NimString):
   {.error: "You must not import this module explicitly".}
 
 {.push stackTrace:off.}
@@ -19,9 +19,9 @@ when not declared(NimString):
 
 type
   CondVar = object
-    c: TSysCond
+    c: SysCond
     when defined(posix):
-      stupidLock: TSysLock
+      stupidLock: SysLock
       counter: int
 
 proc createCondVar(): CondVar =
diff --git a/lib/system/sysstr.nim b/lib/system/sysstr.nim
index 5b4020c8c..326c601bd 100644
--- a/lib/system/sysstr.nim
+++ b/lib/system/sysstr.nim
@@ -50,12 +50,16 @@ proc rawNewStringNoInit(space: int): NimString {.compilerProc.} =
   if s < 7: s = 7
   result = allocStrNoInit(sizeof(TGenericSeq) + s + 1)
   result.reserved = s
+  when defined(gogc):
+    result.elemSize = 1
 
 proc rawNewString(space: int): NimString {.compilerProc.} =
   var s = space
   if s < 7: s = 7
   result = allocStr(sizeof(TGenericSeq) + s + 1)
   result.reserved = s
+  when defined(gogc):
+    result.elemSize = 1
 
 proc mnewString(len: int): NimString {.compilerProc.} =
   result = rawNewString(len)
@@ -206,6 +210,14 @@ proc incrSeq(seq: PGenericSeq, elemSize: int): PGenericSeq {.compilerProc.} =
                                GenericSeqSize))
   inc(result.len)
 
+proc incrSeqV2(seq: PGenericSeq, elemSize: int): PGenericSeq {.compilerProc.} =
+  # incrSeq version 2
+  result = seq
+  if result.len >= result.space:
+    result.reserved = resize(result.space)
+    result = cast[PGenericSeq](growObj(result, elemSize * result.reserved +
+                               GenericSeqSize))
+
 proc setLengthSeq(seq: PGenericSeq, elemSize, newLen: int): PGenericSeq {.
     compilerRtl.} =
   result = seq
@@ -216,7 +228,7 @@ proc setLengthSeq(seq: PGenericSeq, elemSize, newLen: int): PGenericSeq {.
   elif newLen < result.len:
     # we need to decref here, otherwise the GC leaks!
     when not defined(boehmGC) and not defined(nogc) and
-         not defined(gcMarkAndSweep):
+         not defined(gcMarkAndSweep) and not defined(gogc):
       when compileOption("gc", "v2"):
         for i in newLen..result.len-1:
           let len0 = gch.tempStack.len
diff --git a/lib/system/threads.nim b/lib/system/threads.nim
index d8e011ecb..c5de841f8 100644
--- a/lib/system/threads.nim
+++ b/lib/system/threads.nim
@@ -11,7 +11,7 @@
 ## Do not import it directly. To activate thread support you need to compile
 ## with the ``--threads:on`` command line switch.
 ##
-## Nim's memory model for threads is quite different from other common 
+## Nim's memory model for threads is quite different from other common
 ## programming languages (C, Pascal): Each thread has its own
 ## (garbage collected) heap and sharing of memory is restricted. This helps
 ## to prevent race conditions and improves efficiency. See `the manual for
@@ -24,9 +24,9 @@
 ##  import locks
 ##
 ##  var
-##    thr: array [0..4, TThread[tuple[a,b: int]]]
-##    L: TLock
-##  
+##    thr: array [0..4, Thread[tuple[a,b: int]]]
+##    L: Lock
+##
 ##  proc threadFunc(interval: tuple[a,b: int]) {.thread.} =
 ##    for i in interval.a..interval.b:
 ##      acquire(L) # lock stdout
@@ -38,8 +38,8 @@
 ##  for i in 0..high(thr):
 ##    createThread(thr[i], threadFunc, (i*10, i*10+5))
 ##  joinThreads(thr)
-  
-when not declared(NimString): 
+
+when not declared(NimString):
   {.error: "You must not import this module explicitly".}
 
 const
@@ -51,40 +51,41 @@ const
 
 when defined(windows):
   type
-    TSysThread = THandle
-    TWinThreadProc = proc (x: pointer): int32 {.stdcall.}
+    SysThread = Handle
+    WinThreadProc = proc (x: pointer): int32 {.stdcall.}
+  {.deprecated: [TSysThread: SysThread, TWinThreadProc: WinThreadProc].}
 
   proc createThread(lpThreadAttributes: pointer, dwStackSize: int32,
-                     lpStartAddress: TWinThreadProc, 
+                     lpStartAddress: WinThreadProc,
                      lpParameter: pointer,
-                     dwCreationFlags: int32, 
-                     lpThreadId: var int32): TSysThread {.
+                     dwCreationFlags: int32,
+                     lpThreadId: var int32): SysThread {.
     stdcall, dynlib: "kernel32", importc: "CreateThread".}
 
-  proc winSuspendThread(hThread: TSysThread): int32 {.
+  proc winSuspendThread(hThread: SysThread): int32 {.
     stdcall, dynlib: "kernel32", importc: "SuspendThread".}
-      
-  proc winResumeThread(hThread: TSysThread): int32 {.
+
+  proc winResumeThread(hThread: SysThread): int32 {.
     stdcall, dynlib: "kernel32", importc: "ResumeThread".}
 
   proc waitForMultipleObjects(nCount: int32,
-                              lpHandles: ptr TSysThread,
+                              lpHandles: ptr SysThread,
                               bWaitAll: int32,
                               dwMilliseconds: int32): int32 {.
     stdcall, dynlib: "kernel32", importc: "WaitForMultipleObjects".}
 
-  proc terminateThread(hThread: TSysThread, dwExitCode: int32): int32 {.
+  proc terminateThread(hThread: SysThread, dwExitCode: int32): int32 {.
     stdcall, dynlib: "kernel32", importc: "TerminateThread".}
-    
+
   type
-    TThreadVarSlot = distinct int32
+    ThreadVarSlot = distinct int32
 
   when true:
-    proc threadVarAlloc(): TThreadVarSlot {.
+    proc threadVarAlloc(): ThreadVarSlot {.
       importc: "TlsAlloc", stdcall, header: "<windows.h>".}
-    proc threadVarSetValue(dwTlsIndex: TThreadVarSlot, lpTlsValue: pointer) {.
+    proc threadVarSetValue(dwTlsIndex: ThreadVarSlot, lpTlsValue: pointer) {.
       importc: "TlsSetValue", stdcall, header: "<windows.h>".}
-    proc tlsGetValue(dwTlsIndex: TThreadVarSlot): pointer {.
+    proc tlsGetValue(dwTlsIndex: ThreadVarSlot): pointer {.
       importc: "TlsGetValue", stdcall, header: "<windows.h>".}
 
     proc getLastError(): uint32 {.
@@ -92,76 +93,93 @@ when defined(windows):
     proc setLastError(x: uint32) {.
       importc: "SetLastError", stdcall, header: "<windows.h>".}
 
-    proc threadVarGetValue(dwTlsIndex: TThreadVarSlot): pointer =
+    proc threadVarGetValue(dwTlsIndex: ThreadVarSlot): pointer =
       let realLastError = getLastError()
       result = tlsGetValue(dwTlsIndex)
       setLastError(realLastError)
   else:
-    proc threadVarAlloc(): TThreadVarSlot {.
+    proc threadVarAlloc(): ThreadVarSlot {.
       importc: "TlsAlloc", stdcall, dynlib: "kernel32".}
-    proc threadVarSetValue(dwTlsIndex: TThreadVarSlot, lpTlsValue: pointer) {.
+    proc threadVarSetValue(dwTlsIndex: ThreadVarSlot, lpTlsValue: pointer) {.
       importc: "TlsSetValue", stdcall, dynlib: "kernel32".}
-    proc threadVarGetValue(dwTlsIndex: TThreadVarSlot): pointer {.
+    proc threadVarGetValue(dwTlsIndex: ThreadVarSlot): pointer {.
       importc: "TlsGetValue", stdcall, dynlib: "kernel32".}
-  
+
+  proc setThreadAffinityMask(hThread: SysThread, dwThreadAffinityMask: uint) {.
+    importc: "SetThreadAffinityMask", stdcall, header: "<windows.h>".}
+
 else:
   when not defined(macosx):
     {.passL: "-pthread".}
 
   {.passC: "-pthread".}
+  const
+    schedh = "#define _GNU_SOURCE\n#include <sched.h>"
+    pthreadh = "#define _GNU_SOURCE\n#include <pthread.h>"
 
   type
-    TSysThread {.importc: "pthread_t", header: "<sys/types.h>",
+    SysThread {.importc: "pthread_t", header: "<sys/types.h>",
                  final, pure.} = object
-    Tpthread_attr {.importc: "pthread_attr_t",
+    Pthread_attr {.importc: "pthread_attr_t",
                      header: "<sys/types.h>", final, pure.} = object
-                 
-    Ttimespec {.importc: "struct timespec",
+
+    Timespec {.importc: "struct timespec",
                 header: "<time.h>", final, pure.} = object
       tv_sec: int
       tv_nsec: int
+  {.deprecated: [TSysThread: SysThread, Tpthread_attr: PThreadAttr,
+                Ttimespec: Timespec].}
 
-  proc pthread_attr_init(a1: var TPthread_attr) {.
-    importc, header: "<pthread.h>".}
-  proc pthread_attr_setstacksize(a1: var TPthread_attr, a2: int) {.
-    importc, header: "<pthread.h>".}
+  proc pthread_attr_init(a1: var PthreadAttr) {.
+    importc, header: pthreadh.}
+  proc pthread_attr_setstacksize(a1: var PthreadAttr, a2: int) {.
+    importc, header: pthreadh.}
 
-  proc pthread_create(a1: var TSysThread, a2: var TPthread_attr,
-            a3: proc (x: pointer): pointer {.noconv.}, 
-            a4: pointer): cint {.importc: "pthread_create", 
-            header: "<pthread.h>".}
-  proc pthread_join(a1: TSysThread, a2: ptr pointer): cint {.
-    importc, header: "<pthread.h>".}
+  proc pthread_create(a1: var SysThread, a2: var PthreadAttr,
+            a3: proc (x: pointer): pointer {.noconv.},
+            a4: pointer): cint {.importc: "pthread_create",
+            header: pthreadh.}
+  proc pthread_join(a1: SysThread, a2: ptr pointer): cint {.
+    importc, header: pthreadh.}
 
-  proc pthread_cancel(a1: TSysThread): cint {.
-    importc: "pthread_cancel", header: "<pthread.h>".}
+  proc pthread_cancel(a1: SysThread): cint {.
+    importc: "pthread_cancel", header: pthreadh.}
 
   type
-    TThreadVarSlot {.importc: "pthread_key_t", pure, final,
+    ThreadVarSlot {.importc: "pthread_key_t", pure, final,
                    header: "<sys/types.h>".} = object
+  {.deprecated: [TThreadVarSlot: ThreadVarSlot].}
 
-  proc pthread_getspecific(a1: TThreadVarSlot): pointer {.
-    importc: "pthread_getspecific", header: "<pthread.h>".}
-  proc pthread_key_create(a1: ptr TThreadVarSlot, 
+  proc pthread_getspecific(a1: ThreadVarSlot): pointer {.
+    importc: "pthread_getspecific", header: pthreadh.}
+  proc pthread_key_create(a1: ptr ThreadVarSlot,
                           destruct: proc (x: pointer) {.noconv.}): int32 {.
-    importc: "pthread_key_create", header: "<pthread.h>".}
-  proc pthread_key_delete(a1: TThreadVarSlot): int32 {.
-    importc: "pthread_key_delete", header: "<pthread.h>".}
-
-  proc pthread_setspecific(a1: TThreadVarSlot, a2: pointer): int32 {.
-    importc: "pthread_setspecific", header: "<pthread.h>".}
-  
-  proc threadVarAlloc(): TThreadVarSlot {.inline.} =
+    importc: "pthread_key_create", header: pthreadh.}
+  proc pthread_key_delete(a1: ThreadVarSlot): int32 {.
+    importc: "pthread_key_delete", header: pthreadh.}
+
+  proc pthread_setspecific(a1: ThreadVarSlot, a2: pointer): int32 {.
+    importc: "pthread_setspecific", header: pthreadh.}
+
+  proc threadVarAlloc(): ThreadVarSlot {.inline.} =
     discard pthread_key_create(addr(result), nil)
-  proc threadVarSetValue(s: TThreadVarSlot, value: pointer) {.inline.} =
+  proc threadVarSetValue(s: ThreadVarSlot, value: pointer) {.inline.} =
     discard pthread_setspecific(s, value)
-  proc threadVarGetValue(s: TThreadVarSlot): pointer {.inline.} =
+  proc threadVarGetValue(s: ThreadVarSlot): pointer {.inline.} =
     result = pthread_getspecific(s)
 
   when useStackMaskHack:
-    proc pthread_attr_setstack(attr: var TPthread_attr, stackaddr: pointer,
+    proc pthread_attr_setstack(attr: var PthreadAttr, stackaddr: pointer,
                                size: int): cint {.
-      importc: "pthread_attr_setstack", header: "<pthread.h>".}
+      importc: "pthread_attr_setstack", header: pthreadh.}
+
+  type CpuSet {.importc: "cpu_set_t", header: schedh.} = object
+  proc cpusetZero(s: var CpuSet) {.importc: "CPU_ZERO", header: schedh.}
+  proc cpusetIncl(cpu: cint; s: var CpuSet) {.
+    importc: "CPU_SET", header: schedh.}
+
+  proc setAffinity(thread: SysThread; setsize: csize; s: var CpuSet) {.
+    importc: "pthread_setaffinity_np", header: pthreadh.}
 
 const
   emulatedThreadVars = compileOption("tlsEmulation")
@@ -175,13 +193,13 @@ when emulatedThreadVars:
 # allocations are needed. Currently less than 7K are used on a 64bit machine.
 # We use ``float`` for proper alignment:
 type
-  TThreadLocalStorage = array [0..1_000, float]
+  ThreadLocalStorage = array [0..1_000, float]
 
-  PGcThread = ptr TGcThread
-  TGcThread {.pure, inheritable.} = object
-    sys: TSysThread
+  PGcThread = ptr GcThread
+  GcThread {.pure, inheritable.} = object
+    sys: SysThread
     when emulatedThreadVars and not useStackMaskHack:
-      tls: TThreadLocalStorage
+      tls: ThreadLocalStorage
     else:
       nil
     when hasSharedHeap:
@@ -190,34 +208,35 @@ type
       stackSize: int
     else:
       nil
-
-# XXX it'd be more efficient to not use a global variable for the 
-# thread storage slot, but to rely on the implementation to assign slot X
-# for us... ;-)
-var globalsSlot: TThreadVarSlot
+{.deprecated: [TThreadLocalStorage: ThreadLocalStorage, TGcThread: GcThread].}
 
 when not defined(useNimRtl):
   when not useStackMaskHack:
-    var mainThread: TGcThread
+    var mainThread: GcThread
 
-proc initThreadVarsEmulation() {.compilerProc, inline.} =
-  when not defined(useNimRtl):
-    globalsSlot = threadVarAlloc()
-    when declared(mainThread):
-      threadVarSetValue(globalsSlot, addr(mainThread))
-
-#const globalsSlot = TThreadVarSlot(0)
+#const globalsSlot = ThreadVarSlot(0)
 #sysAssert checkSlot.int == globalsSlot.int
 
 when emulatedThreadVars:
+  # XXX it'd be more efficient to not use a global variable for the
+  # thread storage slot, but to rely on the implementation to assign slot X
+  # for us... ;-)
+  var globalsSlot: ThreadVarSlot
+
   proc GetThreadLocalVars(): pointer {.compilerRtl, inl.} =
     result = addr(cast[PGcThread](threadVarGetValue(globalsSlot)).tls)
 
+  proc initThreadVarsEmulation() {.compilerProc, inline.} =
+    when not defined(useNimRtl):
+      globalsSlot = threadVarAlloc()
+      when declared(mainThread):
+        threadVarSetValue(globalsSlot, addr(mainThread))
+
 when useStackMaskHack:
   proc maskStackPointer(offset: int): pointer {.compilerRtl, inl.} =
     var x {.volatile.}: pointer
     x = addr(x)
-    result = cast[pointer]((cast[int](x) and not ThreadStackMask) +% 
+    result = cast[pointer]((cast[int](x) and not ThreadStackMask) +%
       (0) +% offset)
 
 # create for the main thread. Note: do not insert this data into the list
@@ -225,28 +244,28 @@ when useStackMaskHack:
 when not defined(useNimRtl):
   when not useStackMaskHack:
     #when not defined(createNimRtl): initStackBottom()
-    initGC()
-    
+    when declared(initGC): initGC()
+
   when emulatedThreadVars:
-    if nimThreadVarsSize() > sizeof(TThreadLocalStorage):
+    if nimThreadVarsSize() > sizeof(ThreadLocalStorage):
       echo "too large thread local storage size requested"
       quit 1
-  
-  when hasSharedHeap and not defined(boehmgc) and not defined(nogc):
+
+  when hasSharedHeap and not defined(boehmgc) and not defined(gogc) and not defined(nogc):
     var
       threadList: PGcThread
-      
-    proc registerThread(t: PGcThread) = 
+
+    proc registerThread(t: PGcThread) =
       # we need to use the GC global lock here!
       acquireSys(HeapLock)
       t.prev = nil
       t.next = threadList
-      if threadList != nil: 
+      if threadList != nil:
         sysAssert(threadList.prev == nil, "threadList.prev == nil")
         threadList.prev = t
       threadList = t
       releaseSys(HeapLock)
-    
+
     proc unregisterThread(t: PGcThread) =
       # we need to use the GC global lock here!
       acquireSys(HeapLock)
@@ -258,96 +277,128 @@ when not defined(useNimRtl):
       t.next = nil
       t.prev = nil
       releaseSys(HeapLock)
-      
+
     # on UNIX, the GC uses ``SIGFREEZE`` to tell every thread to stop so that
     # the GC can examine the stacks?
     proc stopTheWord() = discard
-    
+
 # We jump through some hops here to ensure that Nim thread procs can have
-# the Nim calling convention. This is needed because thread procs are 
+# the Nim calling convention. This is needed because thread procs are
 # ``stdcall`` on Windows and ``noconv`` on UNIX. Alternative would be to just
 # use ``stdcall`` since it is mapped to ``noconv`` on UNIX anyway.
 
 type
-  TThread* {.pure, final.}[TArg] =
-      object of TGcThread ## Nim thread. A thread is a heavy object (~14K)
+  Thread* {.pure, final.}[TArg] =
+      object of GcThread  ## Nim thread. A thread is a heavy object (~14K)
                           ## that **must not** be part of a message! Use
-                          ## a ``TThreadId`` for that.
+                          ## a ``ThreadId`` for that.
     when TArg is void:
       dataFn: proc () {.nimcall, gcsafe.}
     else:
       dataFn: proc (m: TArg) {.nimcall, gcsafe.}
       data: TArg
-  TThreadId*[TArg] = ptr TThread[TArg] ## the current implementation uses
+  ThreadId*[TArg] = ptr Thread[TArg]  ## the current implementation uses
                                        ## a pointer as a thread ID.
+{.deprecated: [TThread: Thread, TThreadId: ThreadId].}
 
-when not defined(boehmgc) and not hasSharedHeap:
+when not defined(boehmgc) and not hasSharedHeap and not defined(gogc):
   proc deallocOsPages()
 
+when defined(boehmgc):
+  type GCStackBaseProc = proc(sb: pointer, t: pointer) {.noconv.}
+  proc boehmGC_call_with_stack_base(sbp: GCStackBaseProc, p: pointer)
+    {.importc: "GC_call_with_stack_base", boehmGC.}
+  proc boehmGC_register_my_thread(sb: pointer)
+    {.importc: "GC_register_my_thread", boehmGC.}
+  proc boehmGC_unregister_my_thread()
+    {.importc: "GC_unregister_my_thread", boehmGC.}
+
+  proc threadProcWrapDispatch[TArg](sb: pointer, thrd: pointer) {.noconv.} =
+    boehmGC_register_my_thread(sb)
+    let thrd = cast[ptr Thread[TArg]](thrd)
+    when TArg is void:
+      thrd.dataFn()
+    else:
+      thrd.dataFn(thrd.data)
+    boehmGC_unregister_my_thread()
+else:
+  proc threadProcWrapDispatch[TArg](thrd: ptr Thread[TArg]) =
+    when TArg is void:
+      thrd.dataFn()
+    else:
+      thrd.dataFn(thrd.data)
+
+proc threadProcWrapStackFrame[TArg](thrd: ptr Thread[TArg]) =
+  when defined(boehmgc):
+    boehmGC_call_with_stack_base(threadProcWrapDispatch[TArg], thrd)
+  elif not defined(nogc) and not defined(gogc):
+    var p {.volatile.}: proc(a: ptr Thread[TArg]) {.nimcall.} =
+      threadProcWrapDispatch[TArg]
+    when not hasSharedHeap:
+      # init the GC for refc/markandsweep
+      setStackBottom(addr(p))
+      initGC()
+    when declared(registerThread):
+      thrd.stackBottom = addr(thrd)
+      registerThread(thrd)
+    p(thrd)
+    when declared(registerThread): unregisterThread(thrd)
+    when declared(deallocOsPages): deallocOsPages()
+  else:
+    threadProcWrapDispatch(thrd)
+
 template threadProcWrapperBody(closure: expr) {.immediate.} =
   when declared(globalsSlot): threadVarSetValue(globalsSlot, closure)
-  var t = cast[ptr TThread[TArg]](closure)
-  when useStackMaskHack:
-    var tls: TThreadLocalStorage
-  when not defined(boehmgc) and not defined(nogc) and not hasSharedHeap:
-    # init the GC for this thread:
-    setStackBottom(addr(t))
-    initGC()
-  when declared(registerThread):
-    t.stackBottom = addr(t)
-    registerThread(t)
-  when TArg is void: t.dataFn()
-  else: t.dataFn(t.data)
-  when declared(registerThread): unregisterThread(t)
-  when declared(deallocOsPages): deallocOsPages()
+  var thrd = cast[ptr Thread[TArg]](closure)
+  threadProcWrapStackFrame(thrd)
   # Since an unhandled exception terminates the whole process (!), there is
   # no need for a ``try finally`` here, nor would it be correct: The current
   # exception is tried to be re-raised by the code-gen after the ``finally``!
   # However this is doomed to fail, because we already unmapped every heap
   # page!
-  
+
   # mark as not running anymore:
-  t.dataFn = nil
-  
+  thrd.dataFn = nil
+
 {.push stack_trace:off.}
 when defined(windows):
-  proc threadProcWrapper[TArg](closure: pointer): int32 {.stdcall.} = 
+  proc threadProcWrapper[TArg](closure: pointer): int32 {.stdcall.} =
     threadProcWrapperBody(closure)
     # implicitly return 0
 else:
-  proc threadProcWrapper[TArg](closure: pointer): pointer {.noconv.} = 
+  proc threadProcWrapper[TArg](closure: pointer): pointer {.noconv.} =
     threadProcWrapperBody(closure)
 {.pop.}
 
-proc running*[TArg](t: TThread[TArg]): bool {.inline.} = 
+proc running*[TArg](t: Thread[TArg]): bool {.inline.} =
   ## returns true if `t` is running.
   result = t.dataFn != nil
 
 when hostOS == "windows":
-  proc joinThread*[TArg](t: TThread[TArg]) {.inline.} = 
+  proc joinThread*[TArg](t: Thread[TArg]) {.inline.} =
     ## waits for the thread `t` to finish.
     discard waitForSingleObject(t.sys, -1'i32)
 
-  proc joinThreads*[TArg](t: varargs[TThread[TArg]]) = 
+  proc joinThreads*[TArg](t: varargs[Thread[TArg]]) =
     ## waits for every thread in `t` to finish.
-    var a: array[0..255, TSysThread]
+    var a: array[0..255, SysThread]
     sysAssert a.len >= t.len, "a.len >= t.len"
     for i in 0..t.high: a[i] = t[i].sys
     discard waitForMultipleObjects(t.len.int32,
-                                   cast[ptr TSysThread](addr(a)), 1, -1)
+                                   cast[ptr SysThread](addr(a)), 1, -1)
 
 else:
-  proc joinThread*[TArg](t: TThread[TArg]) {.inline.} =
+  proc joinThread*[TArg](t: Thread[TArg]) {.inline.} =
     ## waits for the thread `t` to finish.
     discard pthread_join(t.sys, nil)
 
-  proc joinThreads*[TArg](t: varargs[TThread[TArg]]) =
+  proc joinThreads*[TArg](t: varargs[Thread[TArg]]) =
     ## waits for every thread in `t` to finish.
     for i in 0..t.high: joinThread(t[i])
 
 when false:
   # XXX a thread should really release its heap here somehow:
-  proc destroyThread*[TArg](t: var TThread[TArg]) =
+  proc destroyThread*[TArg](t: var Thread[TArg]) =
     ## forces the thread `t` to terminate. This is potentially dangerous if
     ## you don't have full control over `t` and its acquired resources.
     when hostOS == "windows":
@@ -358,8 +409,8 @@ when false:
     t.dataFn = nil
 
 when hostOS == "windows":
-  proc createThread*[TArg](t: var TThread[TArg],
-                           tp: proc (arg: TArg) {.thread.}, 
+  proc createThread*[TArg](t: var Thread[TArg],
+                           tp: proc (arg: TArg) {.thread.},
                            param: TArg) =
     ## creates a new thread `t` and starts its execution. Entry point is the
     ## proc `tp`. `param` is passed to `tp`. `TArg` can be ``void`` if you
@@ -372,9 +423,16 @@ when hostOS == "windows":
                          addr(t), 0'i32, dummyThreadId)
     if t.sys <= 0:
       raise newException(ResourceExhaustedError, "cannot create thread")
+
+  proc pinToCpu*[Arg](t: var Thread[Arg]; cpu: Natural) =
+    ## pins a thread to a `CPU`:idx:. In other words sets a
+    ## thread's `affinity`:idx:. If you don't know what this means, you
+    ## shouldn't use this proc.
+    setThreadAffinityMask(t.sys, uint(1 shl cpu))
+
 else:
-  proc createThread*[TArg](t: var TThread[TArg], 
-                           tp: proc (arg: TArg) {.thread.}, 
+  proc createThread*[TArg](t: var Thread[TArg],
+                           tp: proc (arg: TArg) {.thread.},
                            param: TArg) =
     ## creates a new thread `t` and starts its execution. Entry point is the
     ## proc `tp`. `param` is passed to `tp`. `TArg` can be ``void`` if you
@@ -382,29 +440,32 @@ else:
     when TArg isnot void: t.data = param
     t.dataFn = tp
     when hasSharedHeap: t.stackSize = ThreadStackSize
-    var a {.noinit.}: Tpthread_attr
+    var a {.noinit.}: PthreadAttr
     pthread_attr_init(a)
     pthread_attr_setstacksize(a, ThreadStackSize)
     if pthread_create(t.sys, a, threadProcWrapper[TArg], addr(t)) != 0:
       raise newException(ResourceExhaustedError, "cannot create thread")
 
-proc threadId*[TArg](t: var TThread[TArg]): TThreadId[TArg] {.inline.} =
+  proc pinToCpu*[Arg](t: var Thread[Arg]; cpu: Natural) =
+    ## pins a thread to a `CPU`:idx:. In other words sets a
+    ## thread's `affinity`:idx:. If you don't know what this means, you
+    ## shouldn't use this proc.
+    var s {.noinit.}: CpuSet
+    cpusetZero(s)
+    cpusetIncl(cpu.cint, s)
+    setAffinity(t.sys, sizeof(s), s)
+
+proc threadId*[TArg](t: var Thread[TArg]): ThreadId[TArg] {.inline.} =
   ## returns the thread ID of `t`.
   result = addr(t)
 
-proc myThreadId*[TArg](): TThreadId[TArg] =
-  ## returns the thread ID of the thread that calls this proc. This is unsafe
-  ## because the type ``TArg`` is not checked for consistency!
-  result = cast[TThreadId[TArg]](threadVarGetValue(globalsSlot))
-
 when false:
-  proc mainThreadId*[TArg](): TThreadId[TArg] =
+  proc mainThreadId*[TArg](): ThreadId[TArg] =
     ## returns the thread ID of the main thread.
-    result = cast[TThreadId[TArg]](addr(mainThread))
+    result = cast[ThreadId[TArg]](addr(mainThread))
 
 when useStackMaskHack:
   proc runMain(tp: proc () {.thread.}) {.compilerproc.} =
-    var mainThread: TThread[pointer]
+    var mainThread: Thread[pointer]
     createThread(mainThread, tp)
     joinThread(mainThread)
-
diff --git a/lib/system/timers.nim b/lib/system/timers.nim
index e5de791ac..ac8418824 100644
--- a/lib/system/timers.nim
+++ b/lib/system/timers.nim
@@ -11,83 +11,86 @@
 ## `<https://github.com/jckarter/clay/blob/master/compiler/src/hirestimer.cpp>`_
 
 type
-  TTicks = distinct int64
-  TNanos = int64
+  Ticks = distinct int64
+  Nanos = int64
+{.deprecated: [TTicks: Ticks, TNanos: Nanos].}
 
 when defined(windows):
 
-  proc QueryPerformanceCounter(res: var TTicks) {.
+  proc QueryPerformanceCounter(res: var Ticks) {.
     importc: "QueryPerformanceCounter", stdcall, dynlib: "kernel32".}
   proc QueryPerformanceFrequency(res: var int64) {.
     importc: "QueryPerformanceFrequency", stdcall, dynlib: "kernel32".}
 
-  proc getTicks(): TTicks {.inline.} =
+  proc getTicks(): Ticks {.inline.} =
     QueryPerformanceCounter(result)
 
-  proc `-`(a, b: TTicks): TNanos =
+  proc `-`(a, b: Ticks): Nanos =
     var frequency: int64
     QueryPerformanceFrequency(frequency)
     var performanceCounterRate = 1e+9'f64 / float64(frequency)
 
-    result = TNanos(float64(a.int64 - b.int64) * performanceCounterRate)
+    result = Nanos(float64(a.int64 - b.int64) * performanceCounterRate)
 
 elif defined(macosx):
   type
-    TMachTimebaseInfoData {.pure, final, 
-        importc: "mach_timebase_info_data_t", 
+    MachTimebaseInfoData {.pure, final,
+        importc: "mach_timebase_info_data_t",
         header: "<mach/mach_time.h>".} = object
       numer, denom: int32
+  {.deprecated: [TMachTimebaseInfoData: MachTimebaseInfoData].}
 
   proc mach_absolute_time(): int64 {.importc, header: "<mach/mach.h>".}
-  proc mach_timebase_info(info: var TMachTimebaseInfoData) {.importc,
+  proc mach_timebase_info(info: var MachTimebaseInfoData) {.importc,
     header: "<mach/mach_time.h>".}
 
-  proc getTicks(): TTicks {.inline.} =
-    result = TTicks(mach_absolute_time())
-  
-  var timeBaseInfo: TMachTimebaseInfoData
+  proc getTicks(): Ticks {.inline.} =
+    result = Ticks(mach_absolute_time())
+
+  var timeBaseInfo: MachTimebaseInfoData
   mach_timebase_info(timeBaseInfo)
-    
-  proc `-`(a, b: TTicks): TNanos =
+
+  proc `-`(a, b: Ticks): Nanos =
     result = (a.int64 - b.int64)  * timeBaseInfo.numer div timeBaseInfo.denom
 
 elif defined(posixRealtime):
   type
-    TClockid {.importc: "clockid_t", header: "<time.h>", final.} = object
+    Clockid {.importc: "clockid_t", header: "<time.h>", final.} = object
 
-    TTimeSpec {.importc: "struct timespec", header: "<time.h>", 
+    TimeSpec {.importc: "struct timespec", header: "<time.h>",
                final, pure.} = object ## struct timespec
-      tv_sec: int  ## Seconds. 
-      tv_nsec: int ## Nanoseconds. 
+      tv_sec: int  ## Seconds.
+      tv_nsec: int ## Nanoseconds.
+  {.deprecated: [TClockid: Clickid, TTimeSpec: TimeSpec].}
 
   var
-    CLOCK_REALTIME {.importc: "CLOCK_REALTIME", header: "<time.h>".}: TClockid
+    CLOCK_REALTIME {.importc: "CLOCK_REALTIME", header: "<time.h>".}: Clockid
 
-  proc clock_gettime(clkId: TClockid, tp: var TTimespec) {.
+  proc clock_gettime(clkId: Clockid, tp: var Timespec) {.
     importc: "clock_gettime", header: "<time.h>".}
 
-  proc getTicks(): TTicks =
-    var t: TTimespec
+  proc getTicks(): Ticks =
+    var t: Timespec
     clock_gettime(CLOCK_REALTIME, t)
-    result = TTicks(int64(t.tv_sec) * 1000000000'i64 + int64(t.tv_nsec))
+    result = Ticks(int64(t.tv_sec) * 1000000000'i64 + int64(t.tv_nsec))
 
-  proc `-`(a, b: TTicks): TNanos {.borrow.}
+  proc `-`(a, b: Ticks): Nanos {.borrow.}
 
 else:
-  # fallback Posix implementation:  
+  # fallback Posix implementation:
   type
-    Ttimeval {.importc: "struct timeval", header: "<sys/select.h>", 
+    Timeval {.importc: "struct timeval", header: "<sys/select.h>",
                final, pure.} = object ## struct timeval
-      tv_sec: int  ## Seconds. 
-      tv_usec: int ## Microseconds. 
-        
-  proc posix_gettimeofday(tp: var Ttimeval, unused: pointer = nil) {.
+      tv_sec: int  ## Seconds.
+      tv_usec: int ## Microseconds.
+  {.deprecated: [Ttimeval: Timeval].}
+  proc posix_gettimeofday(tp: var Timeval, unused: pointer = nil) {.
     importc: "gettimeofday", header: "<sys/time.h>".}
 
-  proc getTicks(): TTicks =
-    var t: Ttimeval
+  proc getTicks(): Ticks =
+    var t: Timeval
     posix_gettimeofday(t)
-    result = TTicks(int64(t.tv_sec) * 1000_000_000'i64 + 
+    result = Ticks(int64(t.tv_sec) * 1000_000_000'i64 +
                     int64(t.tv_usec) * 1000'i64)
 
-  proc `-`(a, b: TTicks): TNanos {.borrow.}
+  proc `-`(a, b: Ticks): Nanos {.borrow.}
diff --git a/lib/system/widestrs.nim b/lib/system/widestrs.nim
index 1e8bc6791..5a30a7c0f 100644
--- a/lib/system/widestrs.nim
+++ b/lib/system/widestrs.nim
@@ -14,8 +14,9 @@ when not declared(NimString):
   {.error: "You must not import this module explicitly".}
 
 type
-  TUtf16Char* = distinct int16
-  WideCString* = ref array[0.. 1_000_000, TUtf16Char]
+  Utf16Char* = distinct int16
+  WideCString* = ref array[0.. 1_000_000, Utf16Char]
+{.deprecated: [TUtf16Char: Utf16Char].}
 
 proc len*(w: WideCString): int =
   ## returns the length of a widestring. This traverses the whole string to
@@ -23,7 +24,7 @@ proc len*(w: WideCString): int =
   while int16(w[result]) != 0'i16: inc result
 
 const
-  UNI_REPLACEMENT_CHAR = TUtf16Char(0xFFFD'i16)
+  UNI_REPLACEMENT_CHAR = Utf16Char(0xFFFD'i16)
   UNI_MAX_BMP = 0x0000FFFF
   UNI_MAX_UTF16 = 0x0010FFFF
   UNI_MAX_UTF32 = 0x7FFFFFFF
@@ -89,16 +90,16 @@ proc newWideCString*(source: cstring, L: int): WideCString =
       if ch >=% UNI_SUR_HIGH_START and ch <=% UNI_SUR_LOW_END:
         result[d] = UNI_REPLACEMENT_CHAR
       else:
-        result[d] = TUtf16Char(toU16(ch))
+        result[d] = Utf16Char(toU16(ch))
     elif ch >% UNI_MAX_UTF16:
       result[d] = UNI_REPLACEMENT_CHAR
     else:
       let ch = ch -% halfBase
-      result[d] = TUtf16Char(toU16((ch shr halfShift) +% UNI_SUR_HIGH_START))
+      result[d] = Utf16Char(toU16((ch shr halfShift) +% UNI_SUR_HIGH_START))
       inc d
-      result[d] = TUtf16Char(toU16((ch and halfMask) +% UNI_SUR_LOW_START))
+      result[d] = Utf16Char(toU16((ch and halfMask) +% UNI_SUR_LOW_START))
     inc d
-  result[d] = TUtf16Char(0'i16)
+  result[d] = Utf16Char(0'i16)
 
 proc newWideCString*(s: cstring): WideCString =
   if s.isNil: return nil
@@ -113,38 +114,44 @@ proc newWideCString*(s: cstring): WideCString =
 proc newWideCString*(s: string): WideCString =
   result = newWideCString(s, s.len)
 
-proc `$`*(w: WideCString, estimate: int): string =
+proc `$`*(w: WideCString, estimate: int, replacement: int = 0xFFFD): string =
   result = newStringOfCap(estimate + estimate shr 2)
 
   var i = 0
   while w[i].int16 != 0'i16:
-    var ch = w[i].int
+    var ch = int(cast[uint16](w[i]))
     inc i
-    if ch >=% UNI_SUR_HIGH_START and ch <=% UNI_SUR_HIGH_END:
+    if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_HIGH_END:
       # If the 16 bits following the high surrogate are in the source buffer...
-      let ch2 = w[i].int
+      let ch2 = int(cast[uint16](w[i]))
+
       # If it's a low surrogate, convert to UTF32:
-      if ch2 >=% UNI_SUR_LOW_START and ch2 <=% UNI_SUR_LOW_END:
-        ch = ((ch -% UNI_SUR_HIGH_START) shr halfShift) +%
-              (ch2 -% UNI_SUR_LOW_START) +% halfBase
+      if ch2 >= UNI_SUR_LOW_START and ch2 <= UNI_SUR_LOW_END:
+        ch = (((ch and halfMask) shl halfShift) + (ch2 and halfMask)) + halfBase
         inc i
-        
-    if ch <=% 127:
+      else:
+        #invalid UTF-16
+        ch = replacement
+    elif ch >= UNI_SUR_LOW_START and ch <= UNI_SUR_LOW_END:
+      #invalid UTF-16
+      ch = replacement
+
+    if ch < 0x80:
       result.add chr(ch)
-    elif ch <=% 0x07FF:
-      result.add chr((ch shr 6) or 0b110_00000)
-      result.add chr((ch and ones(6)) or 0b10_000000)
-    elif ch <=% 0xFFFF:
-      result.add chr(ch shr 12 or 0b1110_0000)
-      result.add chr(ch shr 6 and ones(6) or 0b10_0000_00)
-      result.add chr(ch and ones(6) or 0b10_0000_00)
-    elif ch <=% 0x0010FFFF:
-      result.add chr(ch shr 18 or 0b1111_0000)
-      result.add chr(ch shr 12 and ones(6) or 0b10_0000_00)
-      result.add chr(ch shr 6 and ones(6) or 0b10_0000_00)
-      result.add chr(ch and ones(6) or 0b10_0000_00)
+    elif ch < 0x800:
+      result.add chr((ch shr 6) or 0xc0)
+      result.add chr((ch and 0x3f) or 0x80)
+    elif ch < 0x10000:
+      result.add chr((ch shr 12) or 0xe0)
+      result.add chr(((ch shr 6) and 0x3f) or 0x80)
+      result.add chr((ch and 0x3f) or 0x80)
+    elif ch <= 0x10FFFF:
+      result.add chr((ch shr 18) or 0xf0)
+      result.add chr(((ch shr 12) and 0x3f) or 0x80)
+      result.add chr(((ch shr 6) and 0x3f) or 0x80)
+      result.add chr((ch and 0x3f) or 0x80)
     else:
-      # replacement char:
+      # replacement char(in case user give very large number):
       result.add chr(0xFFFD shr 12 or 0b1110_0000)
       result.add chr(0xFFFD shr 6 and ones(6) or 0b10_0000_00)
       result.add chr(0xFFFD and ones(6) or 0b10_0000_00)