81 files changed, 13325 insertions, 6541 deletions
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim
index 7b52780fe..3de6d8713 100755..100644
--- a/lib/system/alloc.nim
+++ b/lib/system/alloc.nim
@@ -1,292 +1,448 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2012 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
 #
 
-# Low level allocator for Nimrod. Has been designed to support the GC.
-# TODO: 
-# - eliminate "used" field
-# - make searching for block O(1)
+# Low level allocator for Nim. Has been designed to support the GC.
 {.push profiler:off.}
 
-# ------------ platform specific chunk allocation code -----------------------
-
-# some platforms have really weird unmap behaviour: unmap(blockStart, PageSize)
-# really frees the whole block. Happens for Linux/PowerPC for example. Amd64
-# and x86 are safe though; Windows is special because MEM_RELEASE can only be
-# used with a size of 0:
-const weirdUnmap = not (defined(amd64) or defined(i386)) or defined(windows)
-
-when defined(posix): 
-  const
-    PROT_READ  = 1             # page can be read 
-    PROT_WRITE = 2             # page can be written 
-    MAP_PRIVATE = 2'i32        # Changes are private 
-  
-  when defined(macosx) or defined(bsd):
-    const MAP_ANONYMOUS = 0x1000
-  elif defined(solaris): 
-    const MAP_ANONYMOUS = 0x100
-  else:
-    var
-      MAP_ANONYMOUS {.importc: "MAP_ANONYMOUS", header: "<sys/mman.h>".}: cint
-    
-  proc mmap(adr: pointer, len: int, prot, flags, fildes: cint,
-            off: int): pointer {.header: "<sys/mman.h>".}
-
-  proc munmap(adr: pointer, len: int) {.header: "<sys/mman.h>".}
-  
-  proc osAllocPages(size: int): pointer {.inline.} = 
-    result = mmap(nil, size, PROT_READ or PROT_WRITE, 
-                             MAP_PRIVATE or MAP_ANONYMOUS, -1, 0)
-    if result == nil or result == cast[pointer](-1):
-      raiseOutOfMem()
-      
-  proc osDeallocPages(p: pointer, size: int) {.inline} =
-    when reallyOsDealloc: munmap(p, size)
-  
-elif defined(windows): 
-  const
-    MEM_RESERVE = 0x2000 
-    MEM_COMMIT = 0x1000
-    MEM_TOP_DOWN = 0x100000
-    PAGE_READWRITE = 0x04
-
-    MEM_DECOMMIT = 0x4000
-    MEM_RELEASE = 0x8000
-
-  proc VirtualAlloc(lpAddress: pointer, dwSize: int, flAllocationType,
-                    flProtect: int32): pointer {.
-                    header: "<windows.h>", stdcall.}
-  
-  proc VirtualFree(lpAddress: pointer, dwSize: int, 
-                   dwFreeType: int32) {.header: "<windows.h>", stdcall.}
-  
-  proc osAllocPages(size: int): pointer {.inline.} = 
-    result = VirtualAlloc(nil, size, MEM_RESERVE or MEM_COMMIT,
-                          PAGE_READWRITE)
-    if result == nil: raiseOutOfMem()
-
-  proc osDeallocPages(p: pointer, size: int) {.inline.} =
-    # according to Microsoft, 0 is the only correct value for MEM_RELEASE:
-    # This means that the OS has some different view over how big the block is
-    # that we want to free! So, we cannot reliably release the memory back to
-    # Windows :-(. We have to live with MEM_DECOMMIT instead.
-    # Well that used to be the case but MEM_DECOMMIT fragments the address
-    # space heavily, so we now treat Windows as a strange unmap target.
-    when reallyOsDealloc: VirtualFree(p, 0, MEM_RELEASE)
-    #VirtualFree(p, size, MEM_DECOMMIT)
-
-else: 
-  {.error: "Port memory manager to your platform".}
-
-# --------------------- end of non-portable code -----------------------------
+include osalloc
+import std/private/syslocks
+import std/sysatomics
+
+template track(op, address, size) =
+  when defined(memTracker):
+    memTrackerOp(op, address, size)
 
 # We manage *chunks* of memory. Each chunk is a multiple of the page size.
-# Each chunk starts at an address that is divisible by the page size. Chunks
-# that are bigger than ``ChunkOsReturn`` are returned back to the operating
-# system immediately.
+# Each chunk starts at an address that is divisible by the page size.
+# Small chunks may be divided into smaller cells of reusable pointers to reduce the number of page allocations.
+
+# An allocation of a small pointer looks approximately like this
+#[
+
+  alloc -> rawAlloc -> No free chunk available > Request a new page from tslf -> result = chunk.data -------------+
+              |                                                                                                   |
+              v                                                                                                   |
+    Free chunk available                                                                                          |
+              |                                                                                                   |
+              v                                                                                                   v
+      Fetch shared cells -> No free cells available -> Advance acc -> result = chunk.data + chunk.acc -------> return
+    (may not add new cells)                                                                                       ^
+              |                                                                                                   |
+              v                                                                                                   |
+     Free cells available -> result = chunk.freeList -> Advance chunk.freeList -----------------------------------+
+]#
+# so it is split into 3 paths, where the last path is preferred to prevent unnecessary allocations.
+#
+#
+# A deallocation of a small pointer then looks like this
+#[
+  dealloc -> rawDealloc -> chunk.owner == addr(a) --------------> This thread owns the chunk ------> The current chunk is active    -> Chunk is completely unused -----> Chunk references no foreign cells
+                                      |                                       |                   (Add cell into the current chunk)                 |                  Return the current chunk back to tlsf
+                                      |                                       |                                   |                                 |
+                                      v                                       v                                   v                                 v
+                      A different thread owns this chunk.     The current chunk is not active.          chunk.free was < size      Chunk references foreign cells, noop
+                      Add the cell to a.sharedFreeLists      Add the cell into the active chunk          Activate the chunk                       (end)
+                                    (end)                                    (end)                              (end)
+]#
+# So "true" deallocation is delayed for as long as possible in favor of reusing cells.
 
 const
-  ChunkOsReturn = 256 * PageSize # 1 MB
-  InitialMemoryRequest = ChunkOsReturn div 2 # < ChunkOsReturn!
+  nimMinHeapPages {.intdefine.} = 128 # 0.5 MB
   SmallChunkSize = PageSize
+  MaxFli = when sizeof(int) > 2: 30 else: 14
+  MaxLog2Sli = 5 # 32, this cannot be increased without changing 'uint32'
+                 # everywhere!
+  MaxSli = 1 shl MaxLog2Sli
+  FliOffset = 6
+  RealFli = MaxFli - FliOffset
 
-type 
-  PTrunk = ptr TTrunk
-  TTrunk {.final.} = object 
+  # size of chunks in last matrix bin
+  MaxBigChunkSize = int(1'i32 shl MaxFli - 1'i32 shl (MaxFli-MaxLog2Sli-1))
+  HugeChunkSize = MaxBigChunkSize + 1
+
+type
+  PTrunk = ptr Trunk
+  Trunk = object
     next: PTrunk         # all nodes are connected with this pointer
     key: int             # start address at bit 0
-    bits: array[0..IntsPerTrunk-1, int] # a bit vector
-  
-  TTrunkBuckets = array[0..255, PTrunk]
-  TIntSet {.final.} = object 
-    data: TTrunkBuckets
-  
-type
-  TAlignType = biggestFloat
-  TFreeCell {.final, pure.} = object
-    next: ptr TFreeCell  # next free cell in chunk (overlaid with refcount)
-    zeroField: int       # 0 means cell is not used (overlaid with typ field)
-                         # 1 means cell is manually managed pointer
-                         # otherwise a PNimType is stored in there
-
-  PChunk = ptr TBaseChunk
-  PBigChunk = ptr TBigChunk
-  PSmallChunk = ptr TSmallChunk
-  TBaseChunk {.pure, inheritable.} = object
-    prevSize: int        # size of previous chunk; for coalescing
-    size: int            # if < PageSize it is a small chunk
-    used: bool           # later will be optimized into prevSize...
-  
-  TSmallChunk = object of TBaseChunk
-    next, prev: PSmallChunk  # chunks of the same size
-    freeList: ptr TFreeCell
-    free: int            # how many bytes remain    
-    acc: int             # accumulator for small object allocation
-    data: TAlignType     # start of usable memory
-  
-  TBigChunk = object of TBaseChunk # not necessarily > PageSize!
-    next, prev: PBigChunk    # chunks of the same (or bigger) size
-    align: int
-    data: TAlignType     # start of usable memory
-
-template smallChunkOverhead(): expr = sizeof(TSmallChunk)-sizeof(TAlignType)
-template bigChunkOverhead(): expr = sizeof(TBigChunk)-sizeof(TAlignType)
+    bits: array[0..IntsPerTrunk-1, uint] # a bit vector
 
-proc roundup(x, v: int): int {.inline.} = 
-  result = (x + (v-1)) and not (v-1)
-  sysAssert(result >= x, "roundup: result < x")
-  #return ((-x) and (v-1)) +% x
-
-sysAssert(roundup(14, PageSize) == PageSize, "invalid PageSize")
-sysAssert(roundup(15, 8) == 16, "roundup broken")
-sysAssert(roundup(65, 8) == 72, "roundup broken 2")
+  TrunkBuckets = array[0..255, PTrunk]
+  IntSet = object
+    data: TrunkBuckets
 
 # ------------- chunk table ---------------------------------------------------
 # We use a PtrSet of chunk starts and a table[Page, chunksize] for chunk
 # endings of big chunks. This is needed by the merging operation. The only
 # remaining operation is best-fit for big chunks. Since there is a size-limit
 # for big chunks (because greater than the limit means they are returned back
-# to the OS), a fixed size array can be used. 
+# to the OS), a fixed size array can be used.
 
 type
-  PLLChunk = ptr TLLChunk
-  TLLChunk {.pure.} = object ## *low-level* chunk
+  PLLChunk = ptr LLChunk
+  LLChunk = object ## *low-level* chunk
     size: int                # remaining size
     acc: int                 # accumulator
     next: PLLChunk           # next low-level chunk; only needed for dealloc
 
-  PAvlNode = ptr TAvlNode
-  TAvlNode {.pure, final.} = object 
-    link: array[0..1, PAvlNode] # Left (0) and right (1) links 
+  PAvlNode = ptr AvlNode
+  AvlNode = object
+    link: array[0..1, PAvlNode] # Left (0) and right (1) links
     key, upperBound: int
     level: int
-    
-  TMemRegion {.final, pure.} = object
-    minLargeObj, maxLargeObj: int
-    freeSmallChunks: array[0..SmallChunkSize div MemAlign-1, PSmallChunk]
+
+const
+  RegionHasLock = false # hasThreadSupport and defined(gcDestructors)
+
+type
+  FreeCell {.final, pure.} = object
+    # A free cell is a pointer that has been freed, meaning it became available for reuse.
+    # It may become foreign if it is lent to a chunk that did not create it, doing so reduces the amount of needed pages.
+    next: ptr FreeCell  # next free cell in chunk (overlaid with refcount)
+    when not defined(gcDestructors):
+      zeroField: int       # 0 means cell is not used (overlaid with typ field)
+                          # 1 means cell is manually managed pointer
+                          # otherwise a PNimType is stored in there
+    else:
+      alignment: int
+
+  PChunk = ptr BaseChunk
+  PBigChunk = ptr BigChunk
+  PSmallChunk = ptr SmallChunk
+  BaseChunk {.pure, inheritable.} = object
+    prevSize: int        # size of previous chunk; for coalescing
+                         # 0th bit == 1 if 'used
+    size: int            # if < PageSize it is a small chunk
+    owner: ptr MemRegion
+
+  SmallChunk = object of BaseChunk
+    next, prev: PSmallChunk  # chunks of the same size
+    freeList: ptr FreeCell   # Singly linked list of cells. They may be from foreign chunks or from the current chunk.
+                             #  Should be `nil` when the chunk isn't active in `a.freeSmallChunks`.
+    free: int32              # Bytes this chunk is able to provide using both the accumulator and free cells.
+                             # When a cell is considered foreign, its source chunk's free field is NOT adjusted until it
+                             #  reaches dealloc while the source chunk is active.
+                             # Instead, the receiving chunk gains the capacity and thus reserves space in the foreign chunk.
+    acc: uint32              # Offset from data, used when there are no free cells available but the chunk is considered free.
+    foreignCells: int        # When a free cell is given to a chunk that is not its origin,
+                             #  both the cell and the source chunk are considered foreign.
+                             # Receiving a foreign cell can happen both when deallocating from another thread or when
+                             #  the active chunk in `a.freeSmallChunks` is not the current chunk.
+                             # Freeing a chunk while `foreignCells > 0` leaks memory as all references to it become lost.
+    data {.align: MemAlign.}: UncheckedArray[byte]      # start of usable memory
+
+  BigChunk = object of BaseChunk # not necessarily > PageSize!
+    next, prev: PBigChunk    # chunks of the same (or bigger) size
+    data {.align: MemAlign.}: UncheckedArray[byte]      # start of usable memory
+
+  HeapLinks = object
+    len: int
+    chunks: array[30, (PBigChunk, int)]
+    next: ptr HeapLinks
+
+  MemRegion = object
+    when not defined(gcDestructors):
+      minLargeObj, maxLargeObj: int
+    freeSmallChunks: array[0..max(1, SmallChunkSize div MemAlign-1), PSmallChunk]
+      # List of available chunks per size class. Only one is expected to be active per class.
+    when defined(gcDestructors):
+      sharedFreeLists: array[0..max(1, SmallChunkSize div MemAlign-1), ptr FreeCell]
+        # When a thread frees a pointer it did not create, it must not adjust the counters.
+        # Instead, the cell is placed here and deferred until the next allocation.
+    flBitmap: uint32
+    slBitmap: array[RealFli, uint32]
+    matrix: array[RealFli, array[MaxSli, PBigChunk]]
     llmem: PLLChunk
-    currMem, maxMem, freeMem: int # memory sizes (allocated from OS)
-    lastSize: int # needed for the case that OS gives us pages linearly 
-    freeChunksList: PBigChunk # XXX make this a datastructure with O(1) access
-    chunkStarts: TIntSet
-    root, deleted, last, freeAvlNodes: PAvlNode
-  
-# shared:
-var
-  bottomData: TAvlNode
-  bottom: PAvlNode
-
-{.push stack_trace: off.}
-proc initAllocator() =
-  when not defined(useNimRtl):
-    bottom = addr(bottomData)
-    bottom.link[0] = bottom
-    bottom.link[1] = bottom
-{.pop.}
+    currMem, maxMem, freeMem, occ: int # memory sizes (allocated from OS)
+    lastSize: int # needed for the case that OS gives us pages linearly
+    when RegionHasLock:
+      lock: SysLock
+    when defined(gcDestructors):
+      sharedFreeListBigChunks: PBigChunk # make no attempt at avoiding false sharing for now for this object field
+
+    chunkStarts: IntSet
+    when not defined(gcDestructors):
+      root, deleted, last, freeAvlNodes: PAvlNode
+    lockActive, locked, blockChunkSizeIncrease: bool # if locked, we cannot free pages.
+    nextChunkSize: int
+    when not defined(gcDestructors):
+      bottomData: AvlNode
+    heapLinks: HeapLinks
+    when defined(nimTypeNames):
+      allocCounter, deallocCounter: int
+
+template smallChunkOverhead(): untyped = sizeof(SmallChunk)
+template bigChunkOverhead(): untyped = sizeof(BigChunk)
+
+when hasThreadSupport:
+  template loada(x: untyped): untyped = atomicLoadN(unsafeAddr x, ATOMIC_RELAXED)
+  template storea(x, y: untyped) = atomicStoreN(unsafeAddr x, y, ATOMIC_RELAXED)
+
+  when false:
+    # not yet required
+    template atomicStatDec(x, diff: untyped) = discard atomicSubFetch(unsafeAddr x, diff, ATOMIC_RELAXED)
+    template atomicStatInc(x, diff: untyped) = discard atomicAddFetch(unsafeAddr x, diff, ATOMIC_RELAXED)
+else:
+  template loada(x: untyped): untyped = x
+  template storea(x, y: untyped) = x = y
 
-proc incCurrMem(a: var TMemRegion, bytes: int) {.inline.} = 
-  inc(a.currMem, bytes)
+template atomicStatDec(x, diff: untyped) = dec x, diff
+template atomicStatInc(x, diff: untyped) = inc x, diff
 
-proc decCurrMem(a: var TMemRegion, bytes: int) {.inline.} =
+const
+  fsLookupTable: array[byte, int8] = [
+    -1'i8, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5,
+    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7
+  ]
+
+proc msbit(x: uint32): int {.inline.} =
+  let a = if x <= 0xff_ff'u32:
+            (if x <= 0xff: 0 else: 8)
+          else:
+            (if x <= 0xff_ff_ff'u32: 16 else: 24)
+  result = int(fsLookupTable[byte(x shr a)]) + a
+
+proc lsbit(x: uint32): int {.inline.} =
+  msbit(x and ((not x) + 1))
+
+proc setBit(nr: int; dest: var uint32) {.inline.} =
+  dest = dest or (1u32 shl (nr and 0x1f))
+
+proc clearBit(nr: int; dest: var uint32) {.inline.} =
+  dest = dest and not (1u32 shl (nr and 0x1f))
+
+proc mappingSearch(r, fl, sl: var int) {.inline.} =
+  #let t = (1 shl (msbit(uint32 r) - MaxLog2Sli)) - 1
+  # This diverges from the standard TLSF algorithm because we need to ensure
+  # PageSize alignment:
+  let t = roundup((1 shl (msbit(uint32 r) - MaxLog2Sli)), PageSize) - 1
+  r = r + t
+  r = r and not t
+  r = min(r, MaxBigChunkSize).int
+  fl = msbit(uint32 r)
+  sl = (r shr (fl - MaxLog2Sli)) - MaxSli
+  dec fl, FliOffset
+  sysAssert((r and PageMask) == 0, "mappingSearch: still not aligned")
+
+# See http://www.gii.upv.es/tlsf/files/papers/tlsf_desc.pdf for details of
+# this algorithm.
+
+proc mappingInsert(r: int): tuple[fl, sl: int] {.inline.} =
+  sysAssert((r and PageMask) == 0, "mappingInsert: still not aligned")
+  result.fl = msbit(uint32 r)
+  result.sl = (r shr (result.fl - MaxLog2Sli)) - MaxSli
+  dec result.fl, FliOffset
+
+template mat(): untyped = a.matrix[fl][sl]
+
+proc findSuitableBlock(a: MemRegion; fl, sl: var int): PBigChunk {.inline.} =
+  let tmp = a.slBitmap[fl] and (not 0u32 shl sl)
+  result = nil
+  if tmp != 0:
+    sl = lsbit(tmp)
+    result = mat()
+  else:
+    fl = lsbit(a.flBitmap and (not 0u32 shl (fl + 1)))
+    if fl > 0:
+      sl = lsbit(a.slBitmap[fl])
+      result = mat()
+
+template clearBits(sl, fl) =
+  clearBit(sl, a.slBitmap[fl])
+  if a.slBitmap[fl] == 0u32:
+    # do not forget to cascade:
+    clearBit(fl, a.flBitmap)
+
+proc removeChunkFromMatrix(a: var MemRegion; b: PBigChunk) =
+  let (fl, sl) = mappingInsert(b.size)
+  if b.next != nil: b.next.prev = b.prev
+  if b.prev != nil: b.prev.next = b.next
+  if mat() == b:
+    mat() = b.next
+    if mat() == nil:
+      clearBits(sl, fl)
+  b.prev = nil
+  b.next = nil
+
+proc removeChunkFromMatrix2(a: var MemRegion; b: PBigChunk; fl, sl: int) =
+  mat() = b.next
+  if mat() != nil:
+    mat().prev = nil
+  else:
+    clearBits(sl, fl)
+  b.prev = nil
+  b.next = nil
+
+proc addChunkToMatrix(a: var MemRegion; b: PBigChunk) =
+  let (fl, sl) = mappingInsert(b.size)
+  b.prev = nil
+  b.next = mat()
+  if mat() != nil:
+    mat().prev = b
+  mat() = b
+  setBit(sl, a.slBitmap[fl])
+  setBit(fl, a.flBitmap)
+
+proc incCurrMem(a: var MemRegion, bytes: int) {.inline.} =
+  atomicStatInc(a.currMem, bytes)
+
+proc decCurrMem(a: var MemRegion, bytes: int) {.inline.} =
   a.maxMem = max(a.maxMem, a.currMem)
-  dec(a.currMem, bytes)
+  atomicStatDec(a.currMem, bytes)
 
-proc getMaxMem(a: var TMemRegion): int =
-  # Since we update maxPagesCount only when freeing pages, 
+proc getMaxMem(a: var MemRegion): int =
+  # Since we update maxPagesCount only when freeing pages,
   # maxPagesCount may not be up to date. Thus we use the
   # maximum of these both values here:
   result = max(a.currMem, a.maxMem)
-  
-proc llAlloc(a: var TMemRegion, size: int): pointer =
+
+const nimMaxHeap {.intdefine.} = 0
+
+proc allocPages(a: var MemRegion, size: int): pointer =
+  when nimMaxHeap != 0:
+    if a.occ + size > nimMaxHeap * 1024 * 1024:
+      raiseOutOfMem()
+  osAllocPages(size)
+
+proc tryAllocPages(a: var MemRegion, size: int): pointer =
+  when nimMaxHeap != 0:
+    if a.occ + size > nimMaxHeap * 1024 * 1024:
+      raiseOutOfMem()
+  osTryAllocPages(size)
+
+proc llAlloc(a: var MemRegion, size: int): pointer =
   # *low-level* alloc for the memory managers data structures. Deallocation
-  # is done at he end of the allocator's life time.
+  # is done at the end of the allocator's life time.
   if a.llmem == nil or size > a.llmem.size:
-    # the requested size is ``roundup(size+sizeof(TLLChunk), PageSize)``, but
+    # the requested size is ``roundup(size+sizeof(LLChunk), PageSize)``, but
     # since we know ``size`` is a (small) constant, we know the requested size
     # is one page:
-    sysAssert roundup(size+sizeof(TLLChunk), PageSize) == PageSize, "roundup 6"
+    sysAssert roundup(size+sizeof(LLChunk), PageSize) == PageSize, "roundup 6"
     var old = a.llmem # can be nil and is correct with nil
-    a.llmem = cast[PLLChunk](osAllocPages(PageSize))
+    a.llmem = cast[PLLChunk](allocPages(a, PageSize))
+    when defined(nimAvlcorruption):
+      trackLocation(a.llmem, PageSize)
     incCurrMem(a, PageSize)
-    a.llmem.size = PageSize - sizeof(TLLChunk)
-    a.llmem.acc = sizeof(TLLChunk)
+    a.llmem.size = PageSize - sizeof(LLChunk)
+    a.llmem.acc = sizeof(LLChunk)
     a.llmem.next = old
-  result = cast[pointer](cast[TAddress](a.llmem) + a.llmem.acc)
+  result = cast[pointer](cast[int](a.llmem) + a.llmem.acc)
   dec(a.llmem.size, size)
   inc(a.llmem.acc, size)
   zeroMem(result, size)
 
-proc allocAvlNode(a: var TMemRegion, key, upperBound: int): PAvlNode =
-  if a.freeAvlNodes != nil:
-    result = a.freeAvlNodes
-    a.freeAvlNodes = a.freeAvlNodes.link[0]
+when not defined(gcDestructors):
+  proc getBottom(a: var MemRegion): PAvlNode =
+    result = addr(a.bottomData)
+    if result.link[0] == nil:
+      result.link[0] = result
+      result.link[1] = result
+
+  proc allocAvlNode(a: var MemRegion, key, upperBound: int): PAvlNode =
+    if a.freeAvlNodes != nil:
+      result = a.freeAvlNodes
+      a.freeAvlNodes = a.freeAvlNodes.link[0]
+    else:
+      result = cast[PAvlNode](llAlloc(a, sizeof(AvlNode)))
+      when defined(nimAvlcorruption):
+        cprintf("tracking location: %p\n", result)
+    result.key = key
+    result.upperBound = upperBound
+    let bottom = getBottom(a)
+    result.link[0] = bottom
+    result.link[1] = bottom
+    result.level = 1
+    #when defined(nimAvlcorruption):
+    #  track("allocAvlNode", result, sizeof(AvlNode))
+    sysAssert(bottom == addr(a.bottomData), "bottom data")
+    sysAssert(bottom.link[0] == bottom, "bottom link[0]")
+    sysAssert(bottom.link[1] == bottom, "bottom link[1]")
+
+  proc deallocAvlNode(a: var MemRegion, n: PAvlNode) {.inline.} =
+    n.link[0] = a.freeAvlNodes
+    a.freeAvlNodes = n
+
+proc addHeapLink(a: var MemRegion; p: PBigChunk, size: int): ptr HeapLinks =
+  var it = addr(a.heapLinks)
+  while it != nil and it.len >= it.chunks.len: it = it.next
+  if it == nil:
+    var n = cast[ptr HeapLinks](llAlloc(a, sizeof(HeapLinks)))
+    n.next = a.heapLinks.next
+    a.heapLinks.next = n
+    n.chunks[0] = (p, size)
+    n.len = 1
+    result = n
   else:
-    result = cast[PAvlNode](llAlloc(a, sizeof(TAvlNode)))
-  result.key = key
-  result.upperBound = upperBound
-  result.link[0] = bottom
-  result.link[1] = bottom
-  result.level = 1
-  sysAssert(bottom == addr(bottomData), "bottom data")
-  sysAssert(bottom.link[0] == bottom, "bottom link[0]")
-  sysAssert(bottom.link[1] == bottom, "bottom link[1]")
-
-proc deallocAvlNode(a: var TMemRegion, n: PAvlNode) {.inline.} =
-  n.link[0] = a.freeAvlNodes
-  a.freeAvlNodes = n
-
-include "system/avltree"
-
-proc llDeallocAll(a: var TMemRegion) =
+    let L = it.len
+    it.chunks[L] = (p, size)
+    inc it.len
+    result = it
+
+when not defined(gcDestructors):
+  include "system/avltree"
+
+proc llDeallocAll(a: var MemRegion) =
   var it = a.llmem
   while it != nil:
     # we know each block in the list has the size of 1 page:
     var next = it.next
     osDeallocPages(it, PageSize)
     it = next
-  
-proc IntSetGet(t: TIntSet, key: int): PTrunk = 
+  a.llmem = nil
+
+proc intSetGet(t: IntSet, key: int): PTrunk =
   var it = t.data[key and high(t.data)]
-  while it != nil: 
+  while it != nil:
     if it.key == key: return it
     it = it.next
   result = nil
 
-proc IntSetPut(a: var TMemRegion, t: var TIntSet, key: int): PTrunk = 
-  result = IntSetGet(t, key)
+proc intSetPut(a: var MemRegion, t: var IntSet, key: int): PTrunk =
+  result = intSetGet(t, key)
   if result == nil:
     result = cast[PTrunk](llAlloc(a, sizeof(result[])))
     result.next = t.data[key and high(t.data)]
     t.data[key and high(t.data)] = result
     result.key = key
 
-proc Contains(s: TIntSet, key: int): bool = 
-  var t = IntSetGet(s, key shr TrunkShift)
-  if t != nil: 
+proc contains(s: IntSet, key: int): bool =
+  var t = intSetGet(s, key shr TrunkShift)
+  if t != nil:
     var u = key and TrunkMask
-    result = (t.bits[u shr IntShift] and (1 shl (u and IntMask))) != 0
-  else: 
+    result = (t.bits[u shr IntShift] and (uint(1) shl (u and IntMask))) != 0
+  else:
     result = false
-  
-proc Incl(a: var TMemRegion, s: var TIntSet, key: int) = 
-  var t = IntSetPut(a, s, key shr TrunkShift)
+
+proc incl(a: var MemRegion, s: var IntSet, key: int) =
+  var t = intSetPut(a, s, key shr TrunkShift)
   var u = key and TrunkMask
-  t.bits[u shr IntShift] = t.bits[u shr IntShift] or (1 shl (u and IntMask))
+  t.bits[u shr IntShift] = t.bits[u shr IntShift] or (uint(1) shl (u and IntMask))
 
-proc Excl(s: var TIntSet, key: int) = 
-  var t = IntSetGet(s, key shr TrunkShift)
+proc excl(s: var IntSet, key: int) =
+  var t = intSetGet(s, key shr TrunkShift)
   if t != nil:
     var u = key and TrunkMask
     t.bits[u shr IntShift] = t.bits[u shr IntShift] and not
-        (1 shl (u and IntMask))
+        (uint(1) shl (u and IntMask))
 
-iterator elements(t: TIntSet): int {.inline.} =
+iterator elements(t: IntSet): int {.inline.} =
   # while traversing it is forbidden to change the set!
   for h in 0..high(t.data):
     var r = t.data[h]
@@ -303,117 +459,140 @@ iterator elements(t: TIntSet): int {.inline.} =
           w = w shr 1
         inc(i)
       r = r.next
-  
-proc isSmallChunk(c: PChunk): bool {.inline.} = 
-  return c.size <= SmallChunkSize-smallChunkOverhead()
-  
-proc chunkUnused(c: PChunk): bool {.inline.} = 
-  result = not c.used
-
-iterator allObjects(m: TMemRegion): pointer {.inline.} =
-  for s in elements(m.chunkStarts):
-    let c = cast[PChunk](s shl PageShift)
-    if not chunkUnused(c):
-      if isSmallChunk(c):
-        var c = cast[PSmallChunk](c)
-        
-        let size = c.size
-        var a = cast[TAddress](addr(c.data))
-        let limit = a + c.acc
-        while a <% limit:
-          yield cast[pointer](a)
-          a = a +% size
-      else:
-        let c = cast[PBigChunk](c)
-        yield addr(c.data)
 
-proc isCell(p: pointer): bool {.inline.} =
-  result = cast[ptr TFreeCell](p).zeroField >% 1
+proc isSmallChunk(c: PChunk): bool {.inline.} =
+  result = c.size <= SmallChunkSize-smallChunkOverhead()
+
+proc chunkUnused(c: PChunk): bool {.inline.} =
+  result = (c.prevSize and 1) == 0
+
+iterator allObjects(m: var MemRegion): pointer {.inline.} =
+  m.locked = true
+  for s in elements(m.chunkStarts):
+    # we need to check here again as it could have been modified:
+    if s in m.chunkStarts:
+      let c = cast[PChunk](s shl PageShift)
+      if not chunkUnused(c):
+        if isSmallChunk(c):
+          var c = cast[PSmallChunk](c)
+
+          let size = c.size
+          var a = cast[int](addr(c.data))
+          let limit = a + c.acc.int
+          while a <% limit:
+            yield cast[pointer](a)
+            a = a +% size
+        else:
+          let c = cast[PBigChunk](c)
+          yield addr(c.data)
+  m.locked = false
+
+proc iterToProc*(iter: typed, envType: typedesc; procName: untyped) {.
+                      magic: "Plugin", compileTime.}
+
+when not defined(gcDestructors):
+  proc isCell(p: pointer): bool {.inline.} =
+    result = cast[ptr FreeCell](p).zeroField >% 1
 
 # ------------- chunk management ----------------------------------------------
-proc pageIndex(c: PChunk): int {.inline.} = 
-  result = cast[TAddress](c) shr PageShift
+proc pageIndex(c: PChunk): int {.inline.} =
+  result = cast[int](c) shr PageShift
 
-proc pageIndex(p: pointer): int {.inline.} = 
-  result = cast[TAddress](p) shr PageShift
+proc pageIndex(p: pointer): int {.inline.} =
+  result = cast[int](p) shr PageShift
 
-proc pageAddr(p: pointer): PChunk {.inline.} = 
-  result = cast[PChunk](cast[TAddress](p) and not PageMask)
+proc pageAddr(p: pointer): PChunk {.inline.} =
+  result = cast[PChunk](cast[int](p) and not PageMask)
   #sysAssert(Contains(allocator.chunkStarts, pageIndex(result)))
 
-proc requestOsChunks(a: var TMemRegion, size: int): PBigChunk = 
+when false:
+  proc writeFreeList(a: MemRegion) =
+    var it = a.freeChunksList
+    c_fprintf(stdout, "freeChunksList: %p\n", it)
+    while it != nil:
+      c_fprintf(stdout, "it: %p, next: %p, prev: %p, size: %ld\n",
+                it, it.next, it.prev, it.size)
+      it = it.next
+
+proc requestOsChunks(a: var MemRegion, size: int): PBigChunk =
+  when not defined(emscripten):
+    if not a.blockChunkSizeIncrease:
+      let usedMem = a.occ #a.currMem # - a.freeMem
+      if usedMem < 64 * 1024:
+        a.nextChunkSize = PageSize*4
+      else:
+        a.nextChunkSize = min(roundup(usedMem shr 2, PageSize), a.nextChunkSize * 2)
+        a.nextChunkSize = min(a.nextChunkSize, MaxBigChunkSize).int
+
+  var size = size
+  if size > a.nextChunkSize:
+    result = cast[PBigChunk](allocPages(a, size))
+  else:
+    result = cast[PBigChunk](tryAllocPages(a, a.nextChunkSize))
+    if result == nil:
+      result = cast[PBigChunk](allocPages(a, size))
+      a.blockChunkSizeIncrease = true
+    else:
+      size = a.nextChunkSize
+
   incCurrMem(a, size)
   inc(a.freeMem, size)
-  result = cast[PBigChunk](osAllocPages(size))
-  sysAssert((cast[TAddress](result) and PageMask) == 0, "requestOsChunks 1")
+  let heapLink = a.addHeapLink(result, size)
+  when defined(debugHeapLinks):
+    cprintf("owner: %p; result: %p; next pointer %p; size: %ld\n", addr(a),
+      result, heapLink, size)
+
+  when defined(memtracker):
+    trackLocation(addr result.size, sizeof(int))
+
+  sysAssert((cast[int](result) and PageMask) == 0, "requestOsChunks 1")
   #zeroMem(result, size)
   result.next = nil
   result.prev = nil
-  result.used = false
   result.size = size
   # update next.prevSize:
-  var nxt = cast[TAddress](result) +% size
+  var nxt = cast[int](result) +% size
   sysAssert((nxt and PageMask) == 0, "requestOsChunks 2")
   var next = cast[PChunk](nxt)
   if pageIndex(next) in a.chunkStarts:
     #echo("Next already allocated!")
-    next.prevSize = size
+    next.prevSize = size or (next.prevSize and 1)
   # set result.prevSize:
   var lastSize = if a.lastSize != 0: a.lastSize else: PageSize
-  var prv = cast[TAddress](result) -% lastSize
+  var prv = cast[int](result) -% lastSize
   sysAssert((nxt and PageMask) == 0, "requestOsChunks 3")
   var prev = cast[PChunk](prv)
   if pageIndex(prev) in a.chunkStarts and prev.size == lastSize:
     #echo("Prev already allocated!")
-    result.prevSize = lastSize
+    result.prevSize = lastSize or (result.prevSize and 1)
   else:
-    result.prevSize = 0 # unknown
+    result.prevSize = 0 or (result.prevSize and 1) # unknown
+    # but do not overwrite 'used' field
   a.lastSize = size # for next request
+  sysAssert((cast[int](result) and PageMask) == 0, "requestOschunks: unaligned chunk")
 
-proc freeOsChunks(a: var TMemRegion, p: pointer, size: int) = 
-  # update next.prevSize:
-  var c = cast[PChunk](p)
-  var nxt = cast[TAddress](p) +% c.size
-  sysAssert((nxt and PageMask) == 0, "freeOsChunks")
-  var next = cast[PChunk](nxt)
-  if pageIndex(next) in a.chunkStarts:
-    next.prevSize = 0 # XXX used
-  excl(a.chunkStarts, pageIndex(p))
-  osDeallocPages(p, size)
-  decCurrMem(a, size)
-  dec(a.freeMem, size)
-  #c_fprintf(c_stdout, "[Alloc] back to OS: %ld\n", size)
-
-proc isAccessible(a: TMemRegion, p: pointer): bool {.inline.} = 
-  result = Contains(a.chunkStarts, pageIndex(p))
+proc isAccessible(a: MemRegion, p: pointer): bool {.inline.} =
+  result = contains(a.chunkStarts, pageIndex(p))
 
-proc contains[T](list, x: T): bool = 
+proc contains[T](list, x: T): bool =
   var it = list
   while it != nil:
     if it == x: return true
     it = it.next
-    
-proc writeFreeList(a: TMemRegion) =
-  var it = a.freeChunksList
-  c_fprintf(c_stdout, "freeChunksList: %p\n", it)
-  while it != nil: 
-    c_fprintf(c_stdout, "it: %p, next: %p, prev: %p\n", 
-              it, it.next, it.prev)
-    it = it.next
 
-proc ListAdd[T](head: var T, c: T) {.inline.} = 
+proc listAdd[T](head: var T, c: T) {.inline.} =
   sysAssert(c notin head, "listAdd 1")
   sysAssert c.prev == nil, "listAdd 2"
   sysAssert c.next == nil, "listAdd 3"
   c.next = head
-  if head != nil: 
+  if head != nil:
     sysAssert head.prev == nil, "listAdd 4"
     head.prev = c
   head = c
 
-proc ListRemove[T](head: var T, c: T) {.inline.} =
+proc listRemove[T](head: var T, c: T) {.inline.} =
   sysAssert(c in head, "listRemove")
-  if c == head: 
+  if c == head:
     head = c.next
     sysAssert c.prev == nil, "listRemove 2"
     if head != nil: head.prev = nil
@@ -423,353 +602,639 @@ proc ListRemove[T](head: var T, c: T) {.inline.} =
     if c.next != nil: c.next.prev = c.prev
   c.next = nil
   c.prev = nil
-  
-proc updatePrevSize(a: var TMemRegion, c: PBigChunk, 
-                    prevSize: int) {.inline.} = 
-  var ri = cast[PChunk](cast[TAddress](c) +% c.size)
-  sysAssert((cast[TAddress](ri) and PageMask) == 0, "updatePrevSize")
+
+proc updatePrevSize(a: var MemRegion, c: PBigChunk,
+                    prevSize: int) {.inline.} =
+  var ri = cast[PChunk](cast[int](c) +% c.size)
+  sysAssert((cast[int](ri) and PageMask) == 0, "updatePrevSize")
   if isAccessible(a, ri):
-    ri.prevSize = prevSize
-  
-proc freeBigChunk(a: var TMemRegion, c: PBigChunk) = 
+    ri.prevSize = prevSize or (ri.prevSize and 1)
+
+proc splitChunk2(a: var MemRegion, c: PBigChunk, size: int): PBigChunk =
+  result = cast[PBigChunk](cast[int](c) +% size)
+  result.size = c.size - size
+  track("result.size", addr result.size, sizeof(int))
+  when not defined(nimOptimizedSplitChunk):
+    # still active because of weird codegen issue on some of our CIs:
+    result.next = nil
+    result.prev = nil
+  # size and not used:
+  result.prevSize = size
+  result.owner = addr a
+  sysAssert((size and 1) == 0, "splitChunk 2")
+  sysAssert((size and PageMask) == 0,
+      "splitChunk: size is not a multiple of the PageSize")
+  updatePrevSize(a, c, result.size)
+  c.size = size
+  incl(a, a.chunkStarts, pageIndex(result))
+
+proc splitChunk(a: var MemRegion, c: PBigChunk, size: int) =
+  let rest = splitChunk2(a, c, size)
+  addChunkToMatrix(a, rest)
+
+proc freeBigChunk(a: var MemRegion, c: PBigChunk) =
   var c = c
   sysAssert(c.size >= PageSize, "freeBigChunk")
   inc(a.freeMem, c.size)
-  when coalescRight:
-    var ri = cast[PChunk](cast[TAddress](c) +% c.size)
-    sysAssert((cast[TAddress](ri) and PageMask) == 0, "freeBigChunk 2")
-    if isAccessible(a, ri) and chunkUnused(ri):
-      sysAssert(not isSmallChunk(ri), "freeBigChunk 3")
-      if not isSmallChunk(ri):
-        ListRemove(a.freeChunksList, cast[PBigChunk](ri))
-        inc(c.size, ri.size)
-        excl(a.chunkStarts, pageIndex(ri))
+  c.prevSize = c.prevSize and not 1  # set 'used' to false
   when coalescLeft:
-    if c.prevSize != 0: 
-      var le = cast[PChunk](cast[TAddress](c) -% c.prevSize)
-      sysAssert((cast[TAddress](le) and PageMask) == 0, "freeBigChunk 4")
+    let prevSize = c.prevSize
+    if prevSize != 0:
+      var le = cast[PChunk](cast[int](c) -% prevSize)
+      sysAssert((cast[int](le) and PageMask) == 0, "freeBigChunk 4")
       if isAccessible(a, le) and chunkUnused(le):
         sysAssert(not isSmallChunk(le), "freeBigChunk 5")
-        if not isSmallChunk(le):
-          ListRemove(a.freeChunksList, cast[PBigChunk](le))
+        if not isSmallChunk(le) and le.size < MaxBigChunkSize:
+          removeChunkFromMatrix(a, cast[PBigChunk](le))
           inc(le.size, c.size)
           excl(a.chunkStarts, pageIndex(c))
           c = cast[PBigChunk](le)
+          if c.size > MaxBigChunkSize:
+            let rest = splitChunk2(a, c, MaxBigChunkSize)
+            when defined(nimOptimizedSplitChunk):
+              rest.next = nil
+              rest.prev = nil
+            addChunkToMatrix(a, c)
+            c = rest
+  when coalescRight:
+    var ri = cast[PChunk](cast[int](c) +% c.size)
+    sysAssert((cast[int](ri) and PageMask) == 0, "freeBigChunk 2")
+    if isAccessible(a, ri) and chunkUnused(ri):
+      sysAssert(not isSmallChunk(ri), "freeBigChunk 3")
+      if not isSmallChunk(ri) and c.size < MaxBigChunkSize:
+        removeChunkFromMatrix(a, cast[PBigChunk](ri))
+        inc(c.size, ri.size)
+        excl(a.chunkStarts, pageIndex(ri))
+        if c.size > MaxBigChunkSize:
+          let rest = splitChunk2(a, c, MaxBigChunkSize)
+          addChunkToMatrix(a, rest)
+  addChunkToMatrix(a, c)
 
-  if c.size < ChunkOsReturn or weirdUnmap:
-    incl(a, a.chunkStarts, pageIndex(c))
-    updatePrevSize(a, c, c.size)
-    ListAdd(a.freeChunksList, c)
-    c.used = false
-  else:
-    freeOsChunks(a, c, c.size)
-
-proc splitChunk(a: var TMemRegion, c: PBigChunk, size: int) = 
-  var rest = cast[PBigChunk](cast[TAddress](c) +% size)
-  sysAssert(rest notin a.freeChunksList, "splitChunk")
-  rest.size = c.size - size
-  rest.used = false
-  rest.next = nil
-  rest.prev = nil
-  rest.prevSize = size
-  updatePrevSize(a, c, rest.size)
-  c.size = size
-  incl(a, a.chunkStarts, pageIndex(rest))
-  ListAdd(a.freeChunksList, rest)
-
-proc getBigChunk(a: var TMemRegion, size: int): PBigChunk = 
-  # use first fit for now:
-  sysAssert((size and PageMask) == 0, "getBigChunk 1")
+proc getBigChunk(a: var MemRegion, size: int): PBigChunk =
   sysAssert(size > 0, "getBigChunk 2")
-  result = a.freeChunksList
-  block search:
-    while result != nil:
-      sysAssert chunkUnused(result), "getBigChunk 3"
-      if result.size == size: 
-        ListRemove(a.freeChunksList, result)
-        break search
-      elif result.size > size:
-        ListRemove(a.freeChunksList, result)
-        splitChunk(a, result, size)
-        break search
-      result = result.next
-      sysAssert result != a.freeChunksList, "getBigChunk 4"
-    if size < InitialMemoryRequest: 
-      result = requestOsChunks(a, InitialMemoryRequest)
+  var size = size # roundup(size, PageSize)
+  var fl = 0
+  var sl = 0
+  mappingSearch(size, fl, sl)
+  sysAssert((size and PageMask) == 0, "getBigChunk: unaligned chunk")
+  result = findSuitableBlock(a, fl, sl)
+
+  when RegionHasLock:
+    if not a.lockActive:
+      a.lockActive = true
+      initSysLock(a.lock)
+    acquireSys a.lock
+
+  if result == nil:
+    if size < nimMinHeapPages * PageSize:
+      result = requestOsChunks(a, nimMinHeapPages * PageSize)
       splitChunk(a, result, size)
     else:
       result = requestOsChunks(a, size)
-  result.prevSize = 0 # XXX why is this needed?
-  result.used = true
+      # if we over allocated split the chunk:
+      if result.size > size:
+        splitChunk(a, result, size)
+    result.owner = addr a
+  else:
+    removeChunkFromMatrix2(a, result, fl, sl)
+    if result.size >= size + PageSize:
+      splitChunk(a, result, size)
+  # set 'used' to to true:
+  result.prevSize = 1
+  track("setUsedToFalse", addr result.size, sizeof(int))
+  sysAssert result.owner == addr a, "getBigChunk: No owner set!"
+
   incl(a, a.chunkStarts, pageIndex(result))
   dec(a.freeMem, size)
+  when RegionHasLock:
+    releaseSys a.lock
+
+proc getHugeChunk(a: var MemRegion; size: int): PBigChunk =
+  result = cast[PBigChunk](allocPages(a, size))
+  when RegionHasLock:
+    if not a.lockActive:
+      a.lockActive = true
+      initSysLock(a.lock)
+    acquireSys a.lock
+  incCurrMem(a, size)
+  # XXX add this to the heap links. But also remove it from it later.
+  when false: a.addHeapLink(result, size)
+  sysAssert((cast[int](result) and PageMask) == 0, "getHugeChunk")
+  result.next = nil
+  result.prev = nil
+  result.size = size
+  # set 'used' to to true:
+  result.prevSize = 1
+  result.owner = addr a
+  incl(a, a.chunkStarts, pageIndex(result))
+  when RegionHasLock:
+    releaseSys a.lock
 
-proc getSmallChunk(a: var TMemRegion): PSmallChunk = 
+proc freeHugeChunk(a: var MemRegion; c: PBigChunk) =
+  let size = c.size
+  sysAssert(size >= HugeChunkSize, "freeHugeChunk: invalid size")
+  excl(a.chunkStarts, pageIndex(c))
+  decCurrMem(a, size)
+  osDeallocPages(c, size)
+
+proc getSmallChunk(a: var MemRegion): PSmallChunk =
   var res = getBigChunk(a, PageSize)
   sysAssert res.prev == nil, "getSmallChunk 1"
   sysAssert res.next == nil, "getSmallChunk 2"
   result = cast[PSmallChunk](res)
 
 # -----------------------------------------------------------------------------
-proc isAllocatedPtr(a: TMemRegion, p: pointer): bool
-
-proc allocInv(a: TMemRegion): bool =
-  ## checks some (not all yet) invariants of the allocator's data structures.
-  for s in low(a.freeSmallChunks)..high(a.freeSmallChunks):
-    var c = a.freeSmallChunks[s]
-    while c != nil:
-      if c.next == c: return false
-      if c.size != s * MemAlign: return false
-      var it = c.freeList
-      while it != nil:
-        if it.zeroField != 0: return false
-        it = it.next
-      c = c.next
-  result = true
-
-proc rawAlloc(a: var TMemRegion, requestedSize: int): pointer =
+when not defined(gcDestructors):
+  proc isAllocatedPtr(a: MemRegion, p: pointer): bool {.benign.}
+
+when true:
+  template allocInv(a: MemRegion): bool = true
+else:
+  proc allocInv(a: MemRegion): bool =
+    ## checks some (not all yet) invariants of the allocator's data structures.
+    for s in low(a.freeSmallChunks)..high(a.freeSmallChunks):
+      var c = a.freeSmallChunks[s]
+      while not (c == nil):
+        if c.next == c:
+          echo "[SYSASSERT] c.next == c"
+          return false
+        if not (c.size == s * MemAlign):
+          echo "[SYSASSERT] c.size != s * MemAlign"
+          return false
+        var it = c.freeList
+        while not (it == nil):
+          if not (it.zeroField == 0):
+            echo "[SYSASSERT] it.zeroField != 0"
+            c_printf("%ld %p\n", it.zeroField, it)
+            return false
+          it = it.next
+        c = c.next
+    result = true
+
+when false:
+  var
+    rsizes: array[50_000, int]
+    rsizesLen: int
+
+  proc trackSize(size: int) =
+    rsizes[rsizesLen] = size
+    inc rsizesLen
+
+  proc untrackSize(size: int) =
+    for i in 0 .. rsizesLen-1:
+      if rsizes[i] == size:
+        rsizes[i] = rsizes[rsizesLen-1]
+        dec rsizesLen
+        return
+    c_fprintf(stdout, "%ld\n", size)
+    sysAssert(false, "untracked size!")
+else:
+  template trackSize(x) = discard
+  template untrackSize(x) = discard
+
+proc deallocBigChunk(a: var MemRegion, c: PBigChunk) =
+  when RegionHasLock:
+    acquireSys a.lock
+  dec a.occ, c.size
+  untrackSize(c.size)
+  sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case B)"
+  when not defined(gcDestructors):
+    a.deleted = getBottom(a)
+    del(a, a.root, cast[int](addr(c.data)))
+  if c.size >= HugeChunkSize: freeHugeChunk(a, c)
+  else: freeBigChunk(a, c)
+  when RegionHasLock:
+    releaseSys a.lock
+
+when defined(gcDestructors):
+  template atomicPrepend(head, elem: untyped) =
+    # see also https://en.cppreference.com/w/cpp/atomic/atomic_compare_exchange
+    when hasThreadSupport:
+      while true:
+        elem.next.storea head.loada
+        if atomicCompareExchangeN(addr head, addr elem.next, elem, weak = true, ATOMIC_RELEASE, ATOMIC_RELAXED):
+          break
+    else:
+      elem.next.storea head.loada
+      head.storea elem
+
+  proc addToSharedFreeListBigChunks(a: var MemRegion; c: PBigChunk) {.inline.} =
+    sysAssert c.next == nil, "c.next pointer must be nil"
+    atomicPrepend a.sharedFreeListBigChunks, c
+
+  proc addToSharedFreeList(c: PSmallChunk; f: ptr FreeCell; size: int) {.inline.} =
+    atomicPrepend c.owner.sharedFreeLists[size], f
+
+  const MaxSteps = 20
+
+  proc compensateCounters(a: var MemRegion; c: PSmallChunk; size: int) =
+    # rawDealloc did NOT do the usual:
+    # `inc(c.free, size); dec(a.occ, size)` because it wasn't the owner of these
+    # memory locations. We have to compensate here for these for the entire list.
+    var it = c.freeList
+    var total = 0
+    while it != nil:
+      inc total, size
+      let chunk = cast[PSmallChunk](pageAddr(it))
+      if c != chunk:
+        # The cell is foreign, potentially even from a foreign thread.
+        # It must block the current chunk from being freed, as doing so would leak memory.
+        inc c.foreignCells
+      it = it.next
+    # By not adjusting the foreign chunk we reserve space in it to prevent deallocation
+    inc(c.free, total)
+    dec(a.occ, total)
+
+  proc freeDeferredObjects(a: var MemRegion; root: PBigChunk) =
+    var it = root
+    var maxIters = MaxSteps # make it time-bounded
+    while true:
+      let rest = it.next.loada
+      it.next.storea nil
+      deallocBigChunk(a, cast[PBigChunk](it))
+      if maxIters == 0:
+        if rest != nil:
+          addToSharedFreeListBigChunks(a, rest)
+          sysAssert a.sharedFreeListBigChunks != nil, "re-enqueing failed"
+        break
+      it = rest
+      dec maxIters
+      if it == nil: break
+
+proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
+  when defined(nimTypeNames):
+    inc(a.allocCounter)
   sysAssert(allocInv(a), "rawAlloc: begin")
-  sysAssert(roundup(65, 8) == 72, "rawAlloc 1")
-  sysAssert requestedSize >= sizeof(TFreeCell), "rawAlloc 2"
+  sysAssert(roundup(65, 8) == 72, "rawAlloc: roundup broken")
   var size = roundup(requestedSize, MemAlign)
+  sysAssert(size >= sizeof(FreeCell), "rawAlloc: requested size too small")
   sysAssert(size >= requestedSize, "insufficient allocated size!")
-  #c_fprintf(c_stdout, "alloc; size: %ld; %ld\n", requestedSize, size)
-  if size <= SmallChunkSize-smallChunkOverhead(): 
+  #c_fprintf(stdout, "alloc; size: %ld; %ld\n", requestedSize, size)
+
+  if size <= SmallChunkSize-smallChunkOverhead():
+    template fetchSharedCells(tc: PSmallChunk) =
+      # Consumes cells from (potentially) foreign threads from `a.sharedFreeLists[s]`
+      when defined(gcDestructors):
+        if tc.freeList == nil:
+          when hasThreadSupport:
+            # Steal the entire list from `sharedFreeList`:
+            tc.freeList = atomicExchangeN(addr a.sharedFreeLists[s], nil, ATOMIC_RELAXED)
+          else:
+            tc.freeList = a.sharedFreeLists[s]
+            a.sharedFreeLists[s] = nil
+          # if `tc.freeList` isn't nil, `tc` will gain capacity.
+          # We must calculate how much it gained and how many foreign cells are included.
+          compensateCounters(a, tc, size)
+
     # allocate a small block: for small chunks, we use only its next pointer
-    var s = size div MemAlign
+    let s = size div MemAlign
     var c = a.freeSmallChunks[s]
-    if c == nil: 
+    if c == nil:
+      # There is no free chunk of the requested size available, we need a new one.
       c = getSmallChunk(a)
+      # init all fields in case memory didn't get zeroed
       c.freeList = nil
+      c.foreignCells = 0
       sysAssert c.size == PageSize, "rawAlloc 3"
       c.size = size
-      c.acc = size
-      c.free = SmallChunkSize - smallChunkOverhead() - size
+      c.acc = size.uint32
+      c.free = SmallChunkSize - smallChunkOverhead() - size.int32
+      sysAssert c.owner == addr(a), "rawAlloc: No owner set!"
       c.next = nil
       c.prev = nil
-      ListAdd(a.freeSmallChunks[s], c)
+      # Shared cells are fetched here in case `c.size * 2 >= SmallChunkSize - smallChunkOverhead()`.
+      # For those single cell chunks, we would otherwise have to allocate a new one almost every time.
+      fetchSharedCells(c)
+      if c.free >= size:
+        # Because removals from `a.freeSmallChunks[s]` only happen in the other alloc branch and during dealloc,
+        #  we must not add it to the list if it cannot be used the next time a pointer of `size` bytes is needed.
+        listAdd(a.freeSmallChunks[s], c)
       result = addr(c.data)
-      sysAssert((cast[TAddress](result) and (MemAlign-1)) == 0, "rawAlloc 4")
+      sysAssert((cast[int](result) and (MemAlign-1)) == 0, "rawAlloc 4")
     else:
+      # There is a free chunk of the requested size available, use it.
       sysAssert(allocInv(a), "rawAlloc: begin c != nil")
       sysAssert c.next != c, "rawAlloc 5"
       #if c.size != size:
-      #  c_fprintf(c_stdout, "csize: %lld; size %lld\n", c.size, size)
+      #  c_fprintf(stdout, "csize: %lld; size %lld\n", c.size, size)
       sysAssert c.size == size, "rawAlloc 6"
       if c.freeList == nil:
-        sysAssert(c.acc + smallChunkOverhead() + size <= SmallChunkSize, 
+        sysAssert(c.acc.int + smallChunkOverhead() + size <= SmallChunkSize,
                   "rawAlloc 7")
-        result = cast[pointer](cast[TAddress](addr(c.data)) +% c.acc)
+        result = cast[pointer](cast[int](addr(c.data)) +% c.acc.int)
         inc(c.acc, size)
       else:
+        # There are free cells available, prefer them over the accumulator
         result = c.freeList
-        sysAssert(c.freeList.zeroField == 0, "rawAlloc 8")
+        when not defined(gcDestructors):
+          sysAssert(c.freeList.zeroField == 0, "rawAlloc 8")
         c.freeList = c.freeList.next
+        if cast[PSmallChunk](pageAddr(result)) != c:
+          # This cell isn't a blocker for the current chunk's deallocation anymore
+          dec(c.foreignCells)
+        else:
+          sysAssert(c == cast[PSmallChunk](pageAddr(result)), "rawAlloc: Bad cell")
+      # Even if the cell we return is foreign, the local chunk's capacity decreases.
+      # The capacity was previously reserved in the source chunk (when it first got allocated),
+      #  then added into the current chunk during dealloc,
+      #  so the source chunk will not be freed or leak memory because of this.
       dec(c.free, size)
-      sysAssert((cast[TAddress](result) and (MemAlign-1)) == 0, "rawAlloc 9")
+      sysAssert((cast[int](result) and (MemAlign-1)) == 0, "rawAlloc 9")
       sysAssert(allocInv(a), "rawAlloc: end c != nil")
-    sysAssert(allocInv(a), "rawAlloc: before c.free < size")
-    if c.free < size:
-      sysAssert(allocInv(a), "rawAlloc: before listRemove test")
-      ListRemove(a.freeSmallChunks[s], c)
-      sysAssert(allocInv(a), "rawAlloc: end listRemove test")
-    sysAssert(((cast[TAddress](result) and PageMask) - smallChunkOverhead()) %%
+      # We fetch deferred cells *after* advancing `c.freeList`/`acc` to adjust `c.free`.
+      # If after the adjustment it turns out there's free cells available,
+      #  the chunk stays in `a.freeSmallChunks[s]` and the need for a new chunk is delayed.
+      fetchSharedCells(c)
+      sysAssert(allocInv(a), "rawAlloc: before c.free < size")
+      if c.free < size:
+        # Even after fetching shared cells the chunk has no usable memory left. It is no longer the active chunk
+        sysAssert(allocInv(a), "rawAlloc: before listRemove test")
+        listRemove(a.freeSmallChunks[s], c)
+        sysAssert(allocInv(a), "rawAlloc: end listRemove test")
+    sysAssert(((cast[int](result) and PageMask) - smallChunkOverhead()) %%
                size == 0, "rawAlloc 21")
     sysAssert(allocInv(a), "rawAlloc: end small size")
+    inc a.occ, size
+    trackSize(c.size)
   else:
-    size = roundup(requestedSize+bigChunkOverhead(), PageSize)
+    when defined(gcDestructors):
+      when hasThreadSupport:
+        let deferredFrees = atomicExchangeN(addr a.sharedFreeListBigChunks, nil, ATOMIC_RELAXED)
+      else:
+        let deferredFrees = a.sharedFreeListBigChunks
+        a.sharedFreeListBigChunks = nil
+      if deferredFrees != nil:
+        freeDeferredObjects(a, deferredFrees)
+
+    size = requestedSize + bigChunkOverhead() #  roundup(requestedSize+bigChunkOverhead(), PageSize)
     # allocate a large block
-    var c = getBigChunk(a, size)
+    var c = if size >= HugeChunkSize: getHugeChunk(a, size)
+            else: getBigChunk(a, size)
     sysAssert c.prev == nil, "rawAlloc 10"
     sysAssert c.next == nil, "rawAlloc 11"
-    sysAssert c.size == size, "rawAlloc 12"
     result = addr(c.data)
-    sysAssert((cast[TAddress](result) and (MemAlign-1)) == 0, "rawAlloc 13")
-    if a.root == nil: a.root = bottom
-    add(a, a.root, cast[TAddress](result), cast[TAddress](result)+%size)
+    sysAssert((cast[int](c) and (MemAlign-1)) == 0, "rawAlloc 13")
+    sysAssert((cast[int](c) and PageMask) == 0, "rawAlloc: Not aligned on a page boundary")
+    when not defined(gcDestructors):
+      if a.root == nil: a.root = getBottom(a)
+      add(a, a.root, cast[int](result), cast[int](result)+%size)
+    inc a.occ, c.size
+    trackSize(c.size)
   sysAssert(isAccessible(a, result), "rawAlloc 14")
   sysAssert(allocInv(a), "rawAlloc: end")
+  when logAlloc: cprintf("var pointer_%p = alloc(%ld) # %p\n", result, requestedSize, addr a)
 
-proc rawAlloc0(a: var TMemRegion, requestedSize: int): pointer =
+proc rawAlloc0(a: var MemRegion, requestedSize: int): pointer =
   result = rawAlloc(a, requestedSize)
   zeroMem(result, requestedSize)
 
-proc rawDealloc(a: var TMemRegion, p: pointer) =
+proc rawDealloc(a: var MemRegion, p: pointer) =
+  when defined(nimTypeNames):
+    inc(a.deallocCounter)
   #sysAssert(isAllocatedPtr(a, p), "rawDealloc: no allocated pointer")
   sysAssert(allocInv(a), "rawDealloc: begin")
   var c = pageAddr(p)
+  sysAssert(c != nil, "rawDealloc: begin")
   if isSmallChunk(c):
     # `p` is within a small chunk:
     var c = cast[PSmallChunk](c)
-    var s = c.size
-    sysAssert(((cast[TAddress](p) and PageMask) - smallChunkOverhead()) %%
-               s == 0, "rawDealloc 3")
-    var f = cast[ptr TFreeCell](p)
-    #echo("setting to nil: ", $cast[TAddress](addr(f.zeroField)))
-    sysAssert(f.zeroField != 0, "rawDealloc 1")
-    f.zeroField = 0
-    f.next = c.freeList
-    c.freeList = f
-    when overwriteFree: 
-      # set to 0xff to check for usage after free bugs:
-      c_memset(cast[pointer](cast[int](p) +% sizeof(TFreeCell)), -1'i32, 
-               s -% sizeof(TFreeCell))
-    # check if it is not in the freeSmallChunks[s] list:
-    if c.free < s:
-      # add it to the freeSmallChunks[s] array:
-      ListAdd(a.freeSmallChunks[s div memAlign], c)
-      inc(c.free, s)
+    let s = c.size
+    #       ^ We might access thread foreign storage here.
+    # The other thread cannot possibly free this block as it's still alive.
+    var f = cast[ptr FreeCell](p)
+    if c.owner == addr(a):
+      # We own the block, there is no foreign thread involved.
+      dec a.occ, s
+      untrackSize(s)
+      sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case A)"
+      sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead()) %%
+                s == 0, "rawDealloc 3")
+      when not defined(gcDestructors):
+        #echo("setting to nil: ", $cast[int](addr(f.zeroField)))
+        sysAssert(f.zeroField != 0, "rawDealloc 1")
+        f.zeroField = 0
+      when overwriteFree:
+        # set to 0xff to check for usage after free bugs:
+        nimSetMem(cast[pointer](cast[int](p) +% sizeof(FreeCell)), -1'i32,
+                s -% sizeof(FreeCell))
+      let activeChunk = a.freeSmallChunks[s div MemAlign]
+      if activeChunk != nil and c != activeChunk:
+        # This pointer is not part of the active chunk, lend it out
+        #  and do not adjust the current chunk (same logic as compensateCounters.)
+        # Put the cell into the active chunk,
+        #  may prevent a queue of available chunks from forming in a.freeSmallChunks[s div MemAlign].
+        #  This queue would otherwise waste memory in the form of free cells until we return to those chunks.
+        f.next = activeChunk.freeList
+        activeChunk.freeList = f # lend the cell
+        inc(activeChunk.free, s) # By not adjusting the current chunk's capacity it is prevented from being freed
+        inc(activeChunk.foreignCells) # The cell is now considered foreign from the perspective of the active chunk
+      else:
+        f.next = c.freeList
+        c.freeList = f
+        if c.free < s:
+          # The chunk could not have been active as it didn't have enough space to give
+          listAdd(a.freeSmallChunks[s div MemAlign], c)
+          inc(c.free, s)
+        else:
+          inc(c.free, s)
+          # Free only if the entire chunk is unused and there are no borrowed cells.
+          # If the chunk were to be freed while it references foreign cells,
+          #  the foreign chunks will leak memory and can never be freed.
+          if c.free == SmallChunkSize-smallChunkOverhead() and c.foreignCells == 0:
+            listRemove(a.freeSmallChunks[s div MemAlign], c)
+            c.size = SmallChunkSize
+            freeBigChunk(a, cast[PBigChunk](c))
     else:
-      inc(c.free, s)
-      if c.free == SmallChunkSize-smallChunkOverhead():
-        ListRemove(a.freeSmallChunks[s div memAlign], c)
-        c.size = SmallChunkSize
-        freeBigChunk(a, cast[PBigChunk](c))
-    sysAssert(((cast[TAddress](p) and PageMask) - smallChunkOverhead()) %%
+      when logAlloc: cprintf("dealloc(pointer_%p) # SMALL FROM %p CALLER %p\n", p, c.owner, addr(a))
+
+      when defined(gcDestructors):
+        addToSharedFreeList(c, f, s div MemAlign)
+    sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead()) %%
                s == 0, "rawDealloc 2")
   else:
     # set to 0xff to check for usage after free bugs:
-    when overwriteFree: c_memset(p, -1'i32, c.size -% bigChunkOverhead())
-    # free big chunk
-    var c = cast[PBigChunk](c)
-    a.deleted = bottom
-    del(a, a.root, cast[int](addr(c.data)))
-    freeBigChunk(a, c)
-  sysAssert(allocInv(a), "rawDealloc: end")
-
-proc isAllocatedPtr(a: TMemRegion, p: pointer): bool = 
-  if isAccessible(a, p):
-    var c = pageAddr(p)
-    if not chunkUnused(c):
-      if isSmallChunk(c):
-        var c = cast[PSmallChunk](c)
-        var offset = (cast[TAddress](p) and (PageSize-1)) -% 
-                     smallChunkOverhead()
-        result = (c.acc >% offset) and (offset %% c.size == 0) and
-          (cast[ptr TFreeCell](p).zeroField >% 1)
+    when overwriteFree: nimSetMem(p, -1'i32, c.size -% bigChunkOverhead())
+    when logAlloc: cprintf("dealloc(pointer_%p) # BIG %p\n", p, c.owner)
+    when defined(gcDestructors):
+      if c.owner == addr(a):
+        deallocBigChunk(a, cast[PBigChunk](c))
       else:
-        var c = cast[PBigChunk](c)
-        result = p == addr(c.data) and cast[ptr TFreeCell](p).zeroField >% 1
-
-proc prepareForInteriorPointerChecking(a: var TMemRegion) {.inline.} =
-  a.minLargeObj = lowGauge(a.root)
-  a.maxLargeObj = highGauge(a.root)
+        addToSharedFreeListBigChunks(c.owner[], cast[PBigChunk](c))
+    else:
+      deallocBigChunk(a, cast[PBigChunk](c))
 
-proc interiorAllocatedPtr(a: TMemRegion, p: pointer): pointer =
-  if isAccessible(a, p):
-    var c = pageAddr(p)
-    if not chunkUnused(c):
-      if isSmallChunk(c):
-        var c = cast[PSmallChunk](c)
-        var offset = (cast[TAddress](p) and (PageSize-1)) -% 
-                     smallChunkOverhead()
-        if c.acc >% offset:
-          sysAssert(cast[TAddress](addr(c.data)) +% offset ==
-                    cast[TAddress](p), "offset is not what you think it is")
-          var d = cast[ptr TFreeCell](cast[TAddress](addr(c.data)) +% 
-                    offset -% (offset %% c.size))
-          if d.zeroField >% 1:
+  sysAssert(allocInv(a), "rawDealloc: end")
+  #when logAlloc: cprintf("dealloc(pointer_%p)\n", p)
+
+when not defined(gcDestructors):
+  proc isAllocatedPtr(a: MemRegion, p: pointer): bool =
+    if isAccessible(a, p):
+      var c = pageAddr(p)
+      if not chunkUnused(c):
+        if isSmallChunk(c):
+          var c = cast[PSmallChunk](c)
+          var offset = (cast[int](p) and (PageSize-1)) -%
+                      smallChunkOverhead()
+          result = (c.acc.int >% offset) and (offset %% c.size == 0) and
+            (cast[ptr FreeCell](p).zeroField >% 1)
+        else:
+          var c = cast[PBigChunk](c)
+          result = p == addr(c.data) and cast[ptr FreeCell](p).zeroField >% 1
+
+  proc prepareForInteriorPointerChecking(a: var MemRegion) {.inline.} =
+    a.minLargeObj = lowGauge(a.root)
+    a.maxLargeObj = highGauge(a.root)
+
+  proc interiorAllocatedPtr(a: MemRegion, p: pointer): pointer =
+    if isAccessible(a, p):
+      var c = pageAddr(p)
+      if not chunkUnused(c):
+        if isSmallChunk(c):
+          var c = cast[PSmallChunk](c)
+          var offset = (cast[int](p) and (PageSize-1)) -%
+                      smallChunkOverhead()
+          if c.acc.int >% offset:
+            sysAssert(cast[int](addr(c.data)) +% offset ==
+                      cast[int](p), "offset is not what you think it is")
+            var d = cast[ptr FreeCell](cast[int](addr(c.data)) +%
+                      offset -% (offset %% c.size))
+            if d.zeroField >% 1:
+              result = d
+              sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
+        else:
+          var c = cast[PBigChunk](c)
+          var d = addr(c.data)
+          if p >= d and cast[ptr FreeCell](d).zeroField >% 1:
             result = d
             sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
-      else:
-        var c = cast[PBigChunk](c)
-        var d = addr(c.data)
-        if p >= d and cast[ptr TFreeCell](d).zeroField >% 1:
-          result = d
-          sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
-  else:
-    var q = cast[int](p)
-    if q >=% a.minLargeObj and q <=% a.maxLargeObj:
-      # this check is highly effective! Test fails for 99,96% of all checks on
-      # an x86-64.
-      var avlNode = inRange(a.root, q)
-      if avlNode != nil:
-        var k = cast[pointer](avlNode.key)
-        var c = cast[PBigChunk](pageAddr(k))
-        sysAssert(addr(c.data) == k, " k is not the same as addr(c.data)!")
-        if cast[ptr TFreeCell](k).zeroField >% 1:
-          result = k
-          sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
+    else:
+      var q = cast[int](p)
+      if q >=% a.minLargeObj and q <=% a.maxLargeObj:
+        # this check is highly effective! Test fails for 99,96% of all checks on
+        # an x86-64.
+        var avlNode = inRange(a.root, q)
+        if avlNode != nil:
+          var k = cast[pointer](avlNode.key)
+          var c = cast[PBigChunk](pageAddr(k))
+          sysAssert(addr(c.data) == k, " k is not the same as addr(c.data)!")
+          if cast[ptr FreeCell](k).zeroField >% 1:
+            result = k
+            sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
 
 proc ptrSize(p: pointer): int =
-  var x = cast[pointer](cast[TAddress](p) -% sizeof(TFreeCell))
-  var c = pageAddr(p)
-  sysAssert(not chunkUnused(c), "ptrSize")
-  result = c.size -% sizeof(TFreeCell)
-  if not isSmallChunk(c):
-    dec result, bigChunkOverhead()
-
-proc alloc(allocator: var TMemRegion, size: int): pointer =
-  result = rawAlloc(allocator, size+sizeof(TFreeCell))
-  cast[ptr TFreeCell](result).zeroField = 1 # mark it as used
-  sysAssert(not isAllocatedPtr(allocator, result), "alloc")
-  result = cast[pointer](cast[TAddress](result) +% sizeof(TFreeCell))
-
-proc alloc0(allocator: var TMemRegion, size: int): pointer =
+  when not defined(gcDestructors):
+    var x = cast[pointer](cast[int](p) -% sizeof(FreeCell))
+    var c = pageAddr(p)
+    sysAssert(not chunkUnused(c), "ptrSize")
+    result = c.size -% sizeof(FreeCell)
+    if not isSmallChunk(c):
+      dec result, bigChunkOverhead()
+  else:
+    var c = pageAddr(p)
+    sysAssert(not chunkUnused(c), "ptrSize")
+    result = c.size
+    if not isSmallChunk(c):
+      dec result, bigChunkOverhead()
+
+proc alloc(allocator: var MemRegion, size: Natural): pointer {.gcsafe.} =
+  when not defined(gcDestructors):
+    result = rawAlloc(allocator, size+sizeof(FreeCell))
+    cast[ptr FreeCell](result).zeroField = 1 # mark it as used
+    sysAssert(not isAllocatedPtr(allocator, result), "alloc")
+    result = cast[pointer](cast[int](result) +% sizeof(FreeCell))
+    track("alloc", result, size)
+  else:
+    result = rawAlloc(allocator, size)
+
+proc alloc0(allocator: var MemRegion, size: Natural): pointer =
   result = alloc(allocator, size)
   zeroMem(result, size)
 
-proc dealloc(allocator: var TMemRegion, p: pointer) =
-  var x = cast[pointer](cast[TAddress](p) -% sizeof(TFreeCell))
-  sysAssert(cast[ptr TFreeCell](x).zeroField == 1, "dealloc 1")
-  rawDealloc(allocator, x)
-  sysAssert(not isAllocatedPtr(allocator, x), "dealloc 2")
+proc dealloc(allocator: var MemRegion, p: pointer) =
+  when not defined(gcDestructors):
+    sysAssert(p != nil, "dealloc: p is nil")
+    var x = cast[pointer](cast[int](p) -% sizeof(FreeCell))
+    sysAssert(x != nil, "dealloc: x is nil")
+    sysAssert(isAccessible(allocator, x), "is not accessible")
+    sysAssert(cast[ptr FreeCell](x).zeroField == 1, "dealloc: object header corrupted")
+    rawDealloc(allocator, x)
+    sysAssert(not isAllocatedPtr(allocator, x), "dealloc: object still accessible")
+    track("dealloc", p, 0)
+  else:
+    rawDealloc(allocator, p)
 
-proc realloc(allocator: var TMemRegion, p: pointer, newsize: int): pointer =
+proc realloc(allocator: var MemRegion, p: pointer, newsize: Natural): pointer =
   if newsize > 0:
-    result = alloc0(allocator, newsize)
+    result = alloc(allocator, newsize)
     if p != nil:
-      copyMem(result, p, ptrSize(p))
+      copyMem(result, p, min(ptrSize(p), newsize))
       dealloc(allocator, p)
   elif p != nil:
     dealloc(allocator, p)
 
-proc deallocOsPages(a: var TMemRegion) =
+proc realloc0(allocator: var MemRegion, p: pointer, oldsize, newsize: Natural): pointer =
+  result = realloc(allocator, p, newsize)
+  if newsize > oldsize:
+    zeroMem(cast[pointer](cast[uint](result) + uint(oldsize)), newsize - oldsize)
+
+proc deallocOsPages(a: var MemRegion) =
   # we free every 'ordinarily' allocated page by iterating over the page bits:
-  for p in elements(a.chunkStarts):
-    var page = cast[PChunk](p shl pageShift)
-    when not weirdUnmap:
-      var size = if page.size < PageSize: PageSize else: page.size
-      osDeallocPages(page, size)
-    else:
-      # Linux on PowerPC for example frees MORE than asked if 'munmap'
-      # receives the start of an originally mmap'ed memory block. This is not
-      # too bad, but we must not access 'page.size' then as that could trigger
-      # a segfault. But we don't need to access 'page.size' here anyway,
-      # because calling munmap with PageSize suffices:
-      osDeallocPages(page, PageSize)
+  var it = addr(a.heapLinks)
+  while true:
+    let next = it.next
+    for i in 0..it.len-1:
+      let (p, size) = it.chunks[i]
+      when defined(debugHeapLinks):
+        cprintf("owner %p; dealloc A: %p size: %ld; next: %p\n", addr(a),
+          it, size, next)
+      sysAssert size >= PageSize, "origSize too small"
+      osDeallocPages(p, size)
+    it = next
+    if it == nil: break
   # And then we free the pages that are in use for the page bits:
   llDeallocAll(a)
 
-proc getFreeMem(a: TMemRegion): int {.inline.} = result = a.freeMem
-proc getTotalMem(a: TMemRegion): int {.inline.} = result = a.currMem
-proc getOccupiedMem(a: TMemRegion): int {.inline.} = 
-  result = a.currMem - a.freeMem
+proc getFreeMem(a: MemRegion): int {.inline.} = result = a.freeMem
+proc getTotalMem(a: MemRegion): int {.inline.} = result = a.currMem
+proc getOccupiedMem(a: MemRegion): int {.inline.} =
+  result = a.occ
+  # a.currMem - a.freeMem
+
+when defined(nimTypeNames):
+  proc getMemCounters(a: MemRegion): (int, int) {.inline.} =
+    (a.allocCounter, a.deallocCounter)
 
 # ---------------------- thread memory region -------------------------------
 
-template InstantiateForRegion(allocator: expr) =
-  when false:
+template instantiateForRegion(allocator: untyped) {.dirty.} =
+  {.push stackTrace: off.}
+
+  when defined(nimFulldebug):
     proc interiorAllocatedPtr*(p: pointer): pointer =
       result = interiorAllocatedPtr(allocator, p)
 
     proc isAllocatedPtr*(p: pointer): bool =
-      let p = cast[pointer](cast[TAddress](p)-%TAddress(sizeof(TCell)))
+      let p = cast[pointer](cast[int](p)-%ByteAddress(sizeof(Cell)))
       result = isAllocatedPtr(allocator, p)
 
   proc deallocOsPages = deallocOsPages(allocator)
 
-  proc alloc(size: int): pointer =
+  proc allocImpl(size: Natural): pointer =
     result = alloc(allocator, size)
 
-  proc alloc0(size: int): pointer =
+  proc alloc0Impl(size: Natural): pointer =
     result = alloc0(allocator, size)
 
-  proc dealloc(p: pointer) =
+  proc deallocImpl(p: pointer) =
     dealloc(allocator, p)
 
-  proc realloc(p: pointer, newsize: int): pointer =
-    result = realloc(allocator, p, newsize)
+  proc reallocImpl(p: pointer, newSize: Natural): pointer =
+    result = realloc(allocator, p, newSize)
+
+  proc realloc0Impl(p: pointer, oldSize, newSize: Natural): pointer =
+    result = realloc(allocator, p, newSize)
+    if newSize > oldSize:
+      zeroMem(cast[pointer](cast[uint](result) + uint(oldSize)), newSize - oldSize)
 
   when false:
     proc countFreeMem(): int =
@@ -779,45 +1244,94 @@ template InstantiateForRegion(allocator: expr) =
         inc(result, it.size)
         it = it.next
 
-  proc getFreeMem(): int = 
-    result = allocator.freeMem
+  when hasThreadSupport and not defined(gcDestructors):
+    proc addSysExitProc(quitProc: proc() {.noconv.}) {.importc: "atexit", header: "<stdlib.h>".}
+
+    var sharedHeap: MemRegion
+    var heapLock: SysLock
+    initSysLock(heapLock)
+    addSysExitProc(proc() {.noconv.} = deinitSys(heapLock))
+
+  proc getFreeMem(): int =
     #sysAssert(result == countFreeMem())
+    result = allocator.freeMem
+
+  proc getTotalMem(): int =
+    result = allocator.currMem
+
+  proc getOccupiedMem(): int =
+    result = allocator.occ #getTotalMem() - getFreeMem()
 
-  proc getTotalMem(): int = return allocator.currMem
-  proc getOccupiedMem(): int = return getTotalMem() - getFreeMem()
+  proc getMaxMem*(): int =
+    result = getMaxMem(allocator)
+
+  when defined(nimTypeNames):
+    proc getMemCounters*(): (int, int) = getMemCounters(allocator)
 
   # -------------------- shared heap region ----------------------------------
-  when hasThreadSupport:
-    var sharedHeap: TMemRegion
-    var heapLock: TSysLock
-    InitSysLock(HeapLock)
 
-  proc allocShared(size: int): pointer =
-    when hasThreadSupport:
-      AcquireSys(HeapLock)
+  proc allocSharedImpl(size: Natural): pointer =
+    when hasThreadSupport and not defined(gcDestructors):
+      acquireSys(heapLock)
       result = alloc(sharedHeap, size)
-      ReleaseSys(HeapLock)
+      releaseSys(heapLock)
     else:
-      result = alloc(size)
+      result = allocImpl(size)
 
-  proc allocShared0(size: int): pointer =
-    result = allocShared(size)
+  proc allocShared0Impl(size: Natural): pointer =
+    result = allocSharedImpl(size)
     zeroMem(result, size)
 
-  proc deallocShared(p: pointer) =
-    when hasThreadSupport: 
-      AcquireSys(HeapLock)
+  proc deallocSharedImpl(p: pointer) =
+    when hasThreadSupport and not defined(gcDestructors):
+      acquireSys(heapLock)
       dealloc(sharedHeap, p)
-      ReleaseSys(HeapLock)
+      releaseSys(heapLock)
+    else:
+      deallocImpl(p)
+
+  proc reallocSharedImpl(p: pointer, newSize: Natural): pointer =
+    when hasThreadSupport and not defined(gcDestructors):
+      acquireSys(heapLock)
+      result = realloc(sharedHeap, p, newSize)
+      releaseSys(heapLock)
     else:
-      dealloc(p)
+      result = reallocImpl(p, newSize)
 
-  proc reallocShared(p: pointer, newsize: int): pointer =
-    when hasThreadSupport: 
-      AcquireSys(HeapLock)
-      result = realloc(sharedHeap, p, newsize)
-      ReleaseSys(HeapLock)
+  proc reallocShared0Impl(p: pointer, oldSize, newSize: Natural): pointer =
+    when hasThreadSupport and not defined(gcDestructors):
+      acquireSys(heapLock)
+      result = realloc0(sharedHeap, p, oldSize, newSize)
+      releaseSys(heapLock)
     else:
-      result = realloc(p, newsize)
+      result = realloc0Impl(p, oldSize, newSize)
+
+  when hasThreadSupport:
+    when defined(gcDestructors):
+      proc getFreeSharedMem(): int =
+        allocator.freeMem
+
+      proc getTotalSharedMem(): int =
+        allocator.currMem
+
+      proc getOccupiedSharedMem(): int =
+        allocator.occ
+
+    else:
+      template sharedMemStatsShared(v: int) =
+        acquireSys(heapLock)
+        result = v
+        releaseSys(heapLock)
+
+      proc getFreeSharedMem(): int =
+        sharedMemStatsShared(sharedHeap.freeMem)
+
+      proc getTotalSharedMem(): int =
+        sharedMemStatsShared(sharedHeap.currMem)
+
+      proc getOccupiedSharedMem(): int =
+        sharedMemStatsShared(sharedHeap.occ)
+        #sharedMemStatsShared(sharedHeap.currMem - sharedHeap.freeMem)
+  {.pop.}
 
 {.pop.}
diff --git a/lib/system/ansi_c.nim b/lib/system/ansi_c.nim
index 33e1ea982..3098e17d6 100755..100644
--- a/lib/system/ansi_c.nim
+++ b/lib/system/ansi_c.nim
@@ -1,109 +1,233 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2013 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
 #
 
-# This include file contains headers of Ansi C procs
-# and definitions of Ansi C types in Nimrod syntax
+# This module contains headers of Ansi C procs
+# and definitions of Ansi C types in Nim syntax
 # All symbols are prefixed with 'c_' to avoid ambiguities
 
-{.push hints:off}
-
-proc c_strcmp(a, b: CString): cint {.nodecl, noSideEffect, importc: "strcmp".}
-proc c_memcmp(a, b: CString, size: int): cint {.
-  nodecl, noSideEffect, importc: "memcmp".}
-proc c_memcpy(a, b: CString, size: int) {.nodecl, importc: "memcpy".}
-proc c_strlen(a: CString): int {.nodecl, noSideEffect, importc: "strlen".}
-proc c_memset(p: pointer, value: cint, size: int) {.nodecl, importc: "memset".}
-
-type
-  C_TextFile {.importc: "FILE", nodecl, final, incompleteStruct.} = object
-  C_BinaryFile {.importc: "FILE", nodecl, final, incompleteStruct.} = object
-  C_TextFileStar = ptr CTextFile
-  C_BinaryFileStar = ptr CBinaryFile
-
-  C_JmpBuf {.importc: "jmp_buf".} = array[0..31, int]
-
-var
-  c_stdin {.importc: "stdin", noDecl.}: C_TextFileStar
-  c_stdout {.importc: "stdout", noDecl.}: C_TextFileStar
-  c_stderr {.importc: "stderr", noDecl.}: C_TextFileStar
-
-# constants faked as variables:
-when not defined(SIGINT):
-  var 
-    SIGINT {.importc: "SIGINT", nodecl.}: cint
-    SIGSEGV {.importc: "SIGSEGV", nodecl.}: cint
-    SIGABRT {.importc: "SIGABRT", nodecl.}: cint
-    SIGFPE {.importc: "SIGFPE", nodecl.}: cint
-    SIGILL {.importc: "SIGILL", nodecl.}: cint
+{.push hints:off, stack_trace: off, profiler: off.}
+
+proc c_memchr*(s: pointer, c: cint, n: csize_t): pointer {.
+  importc: "memchr", header: "<string.h>".}
+proc c_memcmp*(a, b: pointer, size: csize_t): cint {.
+  importc: "memcmp", header: "<string.h>", noSideEffect.}
+proc c_memcpy*(a, b: pointer, size: csize_t): pointer {.
+  importc: "memcpy", header: "<string.h>", discardable.}
+proc c_memmove*(a, b: pointer, size: csize_t): pointer {.
+  importc: "memmove", header: "<string.h>",discardable.}
+proc c_memset*(p: pointer, value: cint, size: csize_t): pointer {.
+  importc: "memset", header: "<string.h>", discardable.}
+proc c_strcmp*(a, b: cstring): cint {.
+  importc: "strcmp", header: "<string.h>", noSideEffect.}
+proc c_strlen*(a: cstring): csize_t {.
+  importc: "strlen", header: "<string.h>", noSideEffect.}
+proc c_abort*() {.
+  importc: "abort", header: "<stdlib.h>", noSideEffect, noreturn.}
+
+
+when defined(nimBuiltinSetjmp):
+  type
+    C_JmpBuf* = array[5, pointer]
+elif defined(linux) and defined(amd64):
+  type
+    C_JmpBuf* {.importc: "jmp_buf", header: "<setjmp.h>", bycopy.} = object
+        abi: array[200 div sizeof(clong), clong]
+else:
+  type
+    C_JmpBuf* {.importc: "jmp_buf", header: "<setjmp.h>".} = object
+
+
+type CSighandlerT = proc (a: cint) {.noconv.}
+when defined(windows):
+  const
+    SIGABRT* = cint(22)
+    SIGFPE* = cint(8)
+    SIGILL* = cint(4)
+    SIGINT* = cint(2)
+    SIGSEGV* = cint(11)
+    SIGTERM = cint(15)
+    SIG_DFL* = cast[CSighandlerT](0)
+elif defined(macosx) or defined(linux) or defined(freebsd) or
+     defined(openbsd) or defined(netbsd) or defined(solaris) or
+     defined(dragonfly) or defined(nintendoswitch) or defined(genode) or
+     defined(aix) or hostOS == "standalone":
+  const
+    SIGABRT* = cint(6)
+    SIGFPE* = cint(8)
+    SIGILL* = cint(4)
+    SIGINT* = cint(2)
+    SIGSEGV* = cint(11)
+    SIGTERM* = cint(15)
+    SIGPIPE* = cint(13)
+    SIG_DFL* = CSighandlerT(nil)
+elif defined(haiku):
+  const
+    SIGABRT* = cint(6)
+    SIGFPE* = cint(8)
+    SIGILL* = cint(4)
+    SIGINT* = cint(2)
+    SIGSEGV* = cint(11)
+    SIGTERM* = cint(15)
+    SIGPIPE* = cint(7)
+    SIG_DFL* = CSighandlerT(nil)
+else:
+  when defined(nimscript):
+    {.error: "SIGABRT not ported to your platform".}
+  else:
+    var
+      SIGINT* {.importc: "SIGINT", nodecl.}: cint
+      SIGSEGV* {.importc: "SIGSEGV", nodecl.}: cint
+      SIGABRT* {.importc: "SIGABRT", nodecl.}: cint
+      SIGFPE* {.importc: "SIGFPE", nodecl.}: cint
+      SIGILL* {.importc: "SIGILL", nodecl.}: cint
+      SIG_DFL* {.importc: "SIG_DFL", nodecl.}: CSighandlerT
+    when defined(macosx) or defined(linux):
+      var SIGPIPE* {.importc: "SIGPIPE", nodecl.}: cint
 
 when defined(macosx):
-  var
-    SIGBUS {.importc: "SIGBUS", nodecl.}: cint
-      # hopefully this does not lead to new bugs
+  const SIGBUS* = cint(10)
+elif defined(haiku):
+  const SIGBUS* = cint(30)
+
+# "nimRawSetjmp" is defined by default for certain platforms, so we need the
+# "nimStdSetjmp" escape hatch with it.
+when defined(nimSigSetjmp):
+  proc c_longjmp*(jmpb: C_JmpBuf, retval: cint) {.
+    header: "<setjmp.h>", importc: "siglongjmp".}
+  proc c_setjmp*(jmpb: C_JmpBuf): cint =
+    proc c_sigsetjmp(jmpb: C_JmpBuf, savemask: cint): cint {.
+      header: "<setjmp.h>", importc: "sigsetjmp".}
+    c_sigsetjmp(jmpb, 0)
+elif defined(nimBuiltinSetjmp):
+  proc c_longjmp*(jmpb: C_JmpBuf, retval: cint) =
+    # Apple's Clang++ has trouble converting array names to pointers, so we need
+    # to be very explicit here.
+    proc c_builtin_longjmp(jmpb: ptr pointer, retval: cint) {.
+      importc: "__builtin_longjmp", nodecl.}
+    # The second parameter needs to be 1 and sometimes the C/C++ compiler checks it.
+    c_builtin_longjmp(unsafeAddr jmpb[0], 1)
+  proc c_setjmp*(jmpb: C_JmpBuf): cint =
+    proc c_builtin_setjmp(jmpb: ptr pointer): cint {.
+      importc: "__builtin_setjmp", nodecl.}
+    c_builtin_setjmp(unsafeAddr jmpb[0])
+
+elif defined(nimRawSetjmp) and not defined(nimStdSetjmp):
+  when defined(windows):
+    # No `_longjmp()` on Windows.
+    proc c_longjmp*(jmpb: C_JmpBuf, retval: cint) {.
+      header: "<setjmp.h>", importc: "longjmp".}
+    when defined(vcc) or defined(clangcl):
+      proc c_setjmp*(jmpb: C_JmpBuf): cint {.
+        header: "<setjmp.h>", importc: "setjmp".}
+    else:
+      # The Windows `_setjmp()` takes two arguments, with the second being an
+      # undocumented buffer used by the SEH mechanism for stack unwinding.
+      # Mingw-w64 has been trying to get it right for years, but it's still
+      # prone to stack corruption during unwinding, so we disable that by setting
+      # it to NULL.
+      # More details: https://github.com/status-im/nimbus-eth2/issues/3121
+      when defined(nimHasStyleChecks):
+        {.push styleChecks: off.}
+
+      proc c_setjmp*(jmpb: C_JmpBuf): cint =
+        proc c_setjmp_win(jmpb: C_JmpBuf, ctx: pointer): cint {.
+          header: "<setjmp.h>", importc: "_setjmp".}
+        c_setjmp_win(jmpb, nil)
+
+      when defined(nimHasStyleChecks):
+        {.pop.}
+  else:
+    proc c_longjmp*(jmpb: C_JmpBuf, retval: cint) {.
+      header: "<setjmp.h>", importc: "_longjmp".}
+    proc c_setjmp*(jmpb: C_JmpBuf): cint {.
+      header: "<setjmp.h>", importc: "_setjmp".}
 else:
-  var
-    SIGBUS {.importc: "SIGSEGV", nodecl.}: cint
-      # only Mac OS X has this shit
-
-proc c_longjmp(jmpb: C_JmpBuf, retval: cint) {.nodecl, importc: "longjmp".}
-proc c_setjmp(jmpb: var C_JmpBuf): cint {.nodecl, importc: "setjmp".}
-
-proc c_signal(sig: cint, handler: proc (a: cint) {.noconv.}) {.
-  importc: "signal", header: "<signal.h>".}
-proc c_raise(sig: cint) {.importc: "raise", header: "<signal.h>".}
-
-proc c_fputs(c: cstring, f: C_TextFileStar) {.importc: "fputs", noDecl.}
-proc c_fgets(c: cstring, n: int, f: C_TextFileStar): cstring  {.
-  importc: "fgets", noDecl.}
-proc c_fgetc(stream: C_TextFileStar): int {.importc: "fgetc", nodecl.}
-proc c_ungetc(c: int, f: C_TextFileStar) {.importc: "ungetc", nodecl.}
-proc c_putc(c: Char, stream: C_TextFileStar) {.importc: "putc", nodecl.}
-proc c_fprintf(f: C_TextFileStar, frmt: CString) {.
-  importc: "fprintf", nodecl, varargs.}
-proc c_printf(frmt: CString) {.
-  importc: "printf", nodecl, varargs.}
-
-proc c_fopen(filename, mode: cstring): C_TextFileStar {.
-  importc: "fopen", nodecl.}
-proc c_fclose(f: C_TextFileStar) {.importc: "fclose", nodecl.}
-
-proc c_sprintf(buf, frmt: CString) {.nodecl, importc: "sprintf", varargs,
-                                     noSideEffect.}
-  # we use it only in a way that cannot lead to security issues
-
-proc c_fread(buf: Pointer, size, n: int, f: C_BinaryFileStar): int {.
-  importc: "fread", noDecl.}
-proc c_fseek(f: C_BinaryFileStar, offset: clong, whence: int): int {.
-  importc: "fseek", noDecl.}
-
-proc c_fwrite(buf: Pointer, size, n: int, f: C_BinaryFileStar): int {.
-  importc: "fwrite", noDecl.}
-
-proc c_exit(errorcode: cint) {.importc: "exit", nodecl.}
-proc c_ferror(stream: C_TextFileStar): bool {.importc: "ferror", nodecl.}
-proc c_fflush(stream: C_TextFileStar) {.importc: "fflush", nodecl.}
-proc c_abort() {.importc: "abort", nodecl.}
-proc c_feof(stream: C_TextFileStar): bool {.importc: "feof", nodecl.}
+  proc c_longjmp*(jmpb: C_JmpBuf, retval: cint) {.
+    header: "<setjmp.h>", importc: "longjmp".}
+  proc c_setjmp*(jmpb: C_JmpBuf): cint {.
+    header: "<setjmp.h>", importc: "setjmp".}
 
-proc c_malloc(size: int): pointer {.importc: "malloc", nodecl.}
-proc c_free(p: pointer) {.importc: "free", nodecl.}
-proc c_realloc(p: pointer, newsize: int): pointer {.importc: "realloc", nodecl.}
+proc c_signal*(sign: cint, handler: CSighandlerT): CSighandlerT {.
+  importc: "signal", header: "<signal.h>", discardable.}
+proc c_raise*(sign: cint): cint {.importc: "raise", header: "<signal.h>".}
 
-when not defined(errno):
-  var errno {.importc, header: "<errno.h>".}: cint ## error variable
-proc strerror(errnum: cint): cstring {.importc, header: "<string.h>".}
+type
+  CFile {.importc: "FILE", header: "<stdio.h>",
+          incompleteStruct.} = object
+  CFilePtr* = ptr CFile ## The type representing a file handle.
 
-proc c_remove(filename: CString): cint {.importc: "remove", noDecl.}
-proc c_rename(oldname, newname: CString): cint {.importc: "rename", noDecl.}
+# duplicated between io and ansi_c
+const stdioUsesMacros = (defined(osx) or defined(freebsd) or defined(dragonfly)) and not defined(emscripten)
+const stderrName = when stdioUsesMacros: "__stderrp" else: "stderr"
+const stdoutName = when stdioUsesMacros: "__stdoutp" else: "stdout"
+const stdinName = when stdioUsesMacros: "__stdinp" else: "stdin"
 
-proc c_system(cmd: CString): cint {.importc: "system", header: "<stdlib.h>".}
-proc c_getenv(env: CString): CString {.importc: "getenv", noDecl.}
-proc c_putenv(env: CString): cint {.importc: "putenv", noDecl.}
+var
+  cstderr* {.importc: stderrName, header: "<stdio.h>".}: CFilePtr
+  cstdout* {.importc: stdoutName, header: "<stdio.h>".}: CFilePtr
+  cstdin* {.importc: stdinName, header: "<stdio.h>".}: CFilePtr
+
+proc c_fprintf*(f: CFilePtr, frmt: cstring): cint {.
+  importc: "fprintf", header: "<stdio.h>", varargs, discardable.}
+proc c_printf*(frmt: cstring): cint {.
+  importc: "printf", header: "<stdio.h>", varargs, discardable.}
+
+proc c_fputs*(c: cstring, f: CFilePtr): cint {.
+  importc: "fputs", header: "<stdio.h>", discardable.}
+proc c_fputc*(c: char, f: CFilePtr): cint {.
+  importc: "fputc", header: "<stdio.h>", discardable.}
+
+proc c_sprintf*(buf, frmt: cstring): cint {.
+  importc: "sprintf", header: "<stdio.h>", varargs, noSideEffect.}
+  # we use it only in a way that cannot lead to security issues
 
-{.pop}
+proc c_snprintf*(buf: cstring, n: csize_t, frmt: cstring): cint {.
+  importc: "snprintf", header: "<stdio.h>", varargs, noSideEffect.}
+
+when defined(zephyr) and not defined(zephyrUseLibcMalloc):
+  proc c_malloc*(size: csize_t): pointer {.
+    importc: "k_malloc", header: "<kernel.h>".}
+  proc c_calloc*(nmemb, size: csize_t): pointer {.
+    importc: "k_calloc", header: "<kernel.h>".}
+  proc c_free*(p: pointer) {.
+    importc: "k_free", header: "<kernel.h>".}
+  proc c_realloc*(p: pointer, newsize: csize_t): pointer =
+    # Zephyr's kernel malloc doesn't support realloc
+    result = c_malloc(newSize)
+    # match the ansi c behavior
+    if not result.isNil():
+      copyMem(result, p, newSize)
+      c_free(p)
+else:
+  proc c_malloc*(size: csize_t): pointer {.
+    importc: "malloc", header: "<stdlib.h>".}
+  proc c_calloc*(nmemb, size: csize_t): pointer {.
+    importc: "calloc", header: "<stdlib.h>".}
+  proc c_free*(p: pointer) {.
+    importc: "free", header: "<stdlib.h>".}
+  proc c_realloc*(p: pointer, newsize: csize_t): pointer {.
+    importc: "realloc", header: "<stdlib.h>".}
+
+proc c_fwrite*(buf: pointer, size, n: csize_t, f: CFilePtr): csize_t {.
+  importc: "fwrite", header: "<stdio.h>".}
+
+proc c_fflush*(f: CFilePtr): cint {.
+  importc: "fflush", header: "<stdio.h>".}
+
+proc rawWriteString*(f: CFilePtr, s: cstring, length: int) {.compilerproc, nonReloadable, inline.} =
+  # we cannot throw an exception here!
+  discard c_fwrite(s, 1, cast[csize_t](length), f)
+  discard c_fflush(f)
+
+proc rawWrite*(f: CFilePtr, s: cstring) {.compilerproc, nonReloadable, inline.} =
+  # we cannot throw an exception here!
+  discard c_fwrite(s, 1, c_strlen(s), f)
+  discard c_fflush(f)
+
+{.pop.}
diff --git a/lib/system/arc.nim b/lib/system/arc.nim
new file mode 100644
index 000000000..d001fcaa5
--- /dev/null
+++ b/lib/system/arc.nim
@@ -0,0 +1,267 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2019 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+#[
+In this new runtime we simplify the object layouts a bit: The runtime type
+information is only accessed for the objects that have it and it's always
+at offset 0 then. The ``ref`` object header is independent from the
+runtime type and only contains a reference count.
+]#
+
+when defined(gcOrc):
+  const
+    rcIncrement = 0b10000 # so that lowest 4 bits are not touched
+    rcMask = 0b1111
+    rcShift = 4      # shift by rcShift to get the reference counter
+
+else:
+  const
+    rcIncrement = 0b1000 # so that lowest 3 bits are not touched
+    rcMask = 0b111
+    rcShift = 3      # shift by rcShift to get the reference counter
+
+const
+  orcLeakDetector = defined(nimOrcLeakDetector)
+
+type
+  RefHeader = object
+    rc: int # the object header is now a single RC field.
+            # we could remove it in non-debug builds for the 'owned ref'
+            # design but this seems unwise.
+    when defined(gcOrc):
+      rootIdx: int # thanks to this we can delete potential cycle roots
+                   # in O(1) without doubly linked lists
+    when defined(nimArcDebug) or defined(nimArcIds):
+      refId: int
+    when defined(gcOrc) and orcLeakDetector:
+      filename: cstring
+      line: int
+
+  Cell = ptr RefHeader
+
+template setFrameInfo(c: Cell) =
+  when orcLeakDetector:
+    if framePtr != nil and framePtr.prev != nil:
+      c.filename = framePtr.prev.filename
+      c.line = framePtr.prev.line
+    else:
+      c.filename = nil
+      c.line = 0
+
+template head(p: pointer): Cell =
+  cast[Cell](cast[int](p) -% sizeof(RefHeader))
+
+const
+  traceCollector = defined(traceArc)
+
+when defined(nimArcDebug):
+  include cellsets
+
+  const traceId = 20 # 1037
+
+  var gRefId: int
+  var freedCells: CellSet
+elif defined(nimArcIds):
+  var gRefId: int
+
+  const traceId = -1
+
+when defined(gcAtomicArc) and hasThreadSupport:
+  template decrement(cell: Cell): untyped =
+    discard atomicDec(cell.rc, rcIncrement)
+  template increment(cell: Cell): untyped =
+    discard atomicInc(cell.rc, rcIncrement)
+  template count(x: Cell): untyped =
+    atomicLoadN(x.rc.addr, ATOMIC_ACQUIRE) shr rcShift
+else:
+  template decrement(cell: Cell): untyped =
+    dec(cell.rc, rcIncrement)
+  template increment(cell: Cell): untyped =
+    inc(cell.rc, rcIncrement)
+  template count(x: Cell): untyped =
+    x.rc shr rcShift
+
+proc nimNewObj(size, alignment: int): pointer {.compilerRtl.} =
+  let hdrSize = align(sizeof(RefHeader), alignment)
+  let s = size + hdrSize
+  when defined(nimscript):
+    discard
+  else:
+    result = alignedAlloc0(s, alignment) +! hdrSize
+  when defined(nimArcDebug) or defined(nimArcIds):
+    head(result).refId = gRefId
+    atomicInc gRefId
+    if head(result).refId == traceId:
+      writeStackTrace()
+      cfprintf(cstderr, "[nimNewObj] %p %ld\n", result, head(result).count)
+  when traceCollector:
+    cprintf("[Allocated] %p result: %p\n", result -! sizeof(RefHeader), result)
+  setFrameInfo head(result)
+
+proc nimNewObjUninit(size, alignment: int): pointer {.compilerRtl.} =
+  # Same as 'newNewObj' but do not initialize the memory to zero.
+  # The codegen proved for us that this is not necessary.
+  let hdrSize = align(sizeof(RefHeader), alignment)
+  let s = size + hdrSize
+  when defined(nimscript):
+    discard
+  else:
+    result = cast[ptr RefHeader](alignedAlloc(s, alignment) +! hdrSize)
+  head(result).rc = 0
+  when defined(gcOrc):
+    head(result).rootIdx = 0
+  when defined(nimArcDebug):
+    head(result).refId = gRefId
+    atomicInc gRefId
+    if head(result).refId == traceId:
+      writeStackTrace()
+      cfprintf(cstderr, "[nimNewObjUninit] %p %ld\n", result, head(result).count)
+
+  when traceCollector:
+    cprintf("[Allocated] %p result: %p\n", result -! sizeof(RefHeader), result)
+  setFrameInfo head(result)
+
+proc nimDecWeakRef(p: pointer) {.compilerRtl, inl.} =
+  decrement head(p)
+
+proc isUniqueRef*[T](x: ref T): bool {.inline.} =
+  ## Returns true if the object `x` points to is uniquely referenced. Such
+  ## an object can potentially be passed over to a different thread safely,
+  ## if great care is taken. This queries the internal reference count of
+  ## the object which is subject to lots of optimizations! In other words
+  ## the value of `isUniqueRef` can depend on the used compiler version and
+  ## optimizer setting.
+  ## Nevertheless it can be used as a very valuable debugging tool and can
+  ## be used to specify the constraints of a threading related API
+  ## via `assert isUniqueRef(x)`.
+  head(cast[pointer](x)).rc == 0
+
+proc nimIncRef(p: pointer) {.compilerRtl, inl.} =
+  when defined(nimArcDebug):
+    if head(p).refId == traceId:
+      writeStackTrace()
+      cfprintf(cstderr, "[IncRef] %p %ld\n", p, head(p).count)
+
+  increment head(p)
+  when traceCollector:
+    cprintf("[INCREF] %p\n", head(p))
+
+when not defined(gcOrc) or defined(nimThinout):
+  proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
+    # This is only used by the old RTTI mechanism and we know
+    # that 'dest[]' is nil and needs no destruction. Which is really handy
+    # as we cannot destroy the object reliably if it's an object of unknown
+    # compile-time type.
+    dest[] = src
+    if src != nil: nimIncRef src
+
+when not defined(nimscript) and defined(nimArcDebug):
+  proc deallocatedRefId*(p: pointer): int =
+    ## Returns the ref's ID if the ref was already deallocated. This
+    ## is a memory corruption check. Returns 0 if there is no error.
+    let c = head(p)
+    if freedCells.data != nil and freedCells.contains(c):
+      result = c.refId
+    else:
+      result = 0
+
+proc nimRawDispose(p: pointer, alignment: int) {.compilerRtl.} =
+  when not defined(nimscript):
+    when traceCollector:
+      cprintf("[Freed] %p\n", p -! sizeof(RefHeader))
+    when defined(nimOwnedEnabled):
+      if head(p).rc >= rcIncrement:
+        cstderr.rawWrite "[FATAL] dangling references exist\n"
+        rawQuit 1
+    when defined(nimArcDebug):
+      # we do NOT really free the memory here in order to reliably detect use-after-frees
+      if freedCells.data == nil: init(freedCells)
+      freedCells.incl head(p)
+    else:
+      let hdrSize = align(sizeof(RefHeader), alignment)
+      alignedDealloc(p -! hdrSize, alignment)
+
+template `=dispose`*[T](x: owned(ref T)) = nimRawDispose(cast[pointer](x), T.alignOf)
+#proc dispose*(x: pointer) = nimRawDispose(x)
+
+proc nimDestroyAndDispose(p: pointer) {.compilerRtl, raises: [].} =
+  let rti = cast[ptr PNimTypeV2](p)
+  if rti.destructor != nil:
+    cast[DestructorProc](rti.destructor)(p)
+  when false:
+    cstderr.rawWrite cast[ptr PNimTypeV2](p)[].name
+    cstderr.rawWrite "\n"
+    if d == nil:
+      cstderr.rawWrite "bah, nil\n"
+    else:
+      cstderr.rawWrite "has destructor!\n"
+  nimRawDispose(p, rti.align)
+
+when defined(gcOrc):
+  when defined(nimThinout):
+    include cyclebreaker
+  else:
+    include orc
+    #include cyclecollector
+
+proc nimDecRefIsLast(p: pointer): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+
+    when defined(nimArcDebug):
+      if cell.refId == traceId:
+        writeStackTrace()
+        cfprintf(cstderr, "[DecRef] %p %ld\n", p, cell.count)
+
+    when defined(gcAtomicArc) and hasThreadSupport:
+      # `atomicDec` returns the new value
+      if atomicDec(cell.rc, rcIncrement) == -rcIncrement:
+        result = true
+        when traceCollector:
+          cprintf("[ABOUT TO DESTROY] %p\n", cell)
+    else:
+      if cell.count == 0:
+        result = true
+        when traceCollector:
+          cprintf("[ABOUT TO DESTROY] %p\n", cell)
+      else:
+        decrement cell
+        # According to Lins it's correct to do nothing else here.
+        when traceCollector:
+          cprintf("[DECREF] %p\n", cell)
+
+proc GC_unref*[T](x: ref T) =
+  ## New runtime only supports this operation for 'ref T'.
+  var y {.cursor.} = x
+  `=destroy`(y)
+
+proc GC_ref*[T](x: ref T) =
+  ## New runtime only supports this operation for 'ref T'.
+  if x != nil: nimIncRef(cast[pointer](x))
+
+when not defined(gcOrc):
+  template GC_fullCollect* =
+    ## Forces a full garbage collection pass. With `--mm:arc` a nop.
+    discard
+
+template setupForeignThreadGc* =
+  ## With `--mm:arc` a nop.
+  discard
+
+template tearDownForeignThreadGc* =
+  ## With `--mm:arc` a nop.
+  discard
+
+proc isObjDisplayCheck(source: PNimTypeV2, targetDepth: int16, token: uint32): bool {.compilerRtl, inl.} =
+  result = targetDepth <= source.depth and source.display[targetDepth] == token
+
+when defined(gcDestructors):
+  proc nimGetVTable(p: pointer, index: int): pointer
+        {.compilerRtl, inline, raises: [].} =
+    result = cast[ptr PNimTypeV2](p).vTable[index]
diff --git a/lib/system/arithm.nim b/lib/system/arithm.nim
deleted file mode 100755
index 3efbfa7bb..000000000
--- a/lib/system/arithm.nim
+++ /dev/null
@@ -1,337 +0,0 @@
-#
-#
-#            Nimrod's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-
-# simple integer arithmetic with overflow checking
-
-proc raiseOverflow {.compilerproc, noinline, noreturn.} =
-  # a single proc to reduce code size to a minimum
-  raise newException(EOverflow, "over- or underflow")
-
-proc raiseDivByZero {.compilerproc, noinline, noreturn.} =
-  raise newException(EDivByZero, "divison by zero")
-
-proc addInt64(a, b: int64): int64 {.compilerProc, inline.} =
-  result = a +% b
-  if (result xor a) >= int64(0) or (result xor b) >= int64(0):
-    return result
-  raiseOverflow()
-
-proc subInt64(a, b: int64): int64 {.compilerProc, inline.} =
-  result = a -% b
-  if (result xor a) >= int64(0) or (result xor not b) >= int64(0):
-    return result
-  raiseOverflow()
-
-proc negInt64(a: int64): int64 {.compilerProc, inline.} =
-  if a != low(int64): return -a
-  raiseOverflow()
-
-proc absInt64(a: int64): int64 {.compilerProc, inline.} =
-  if a != low(int64):
-    if a >= 0: return a
-    else: return -a
-  raiseOverflow()
-
-proc divInt64(a, b: int64): int64 {.compilerProc, inline.} =
-  if b == int64(0):
-    raiseDivByZero()
-  if a == low(int64) and b == int64(-1):
-    raiseOverflow()
-  return a div b
-
-proc modInt64(a, b: int64): int64 {.compilerProc, inline.} =
-  if b == int64(0):
-    raiseDivByZero()
-  return a mod b
-
-#
-# This code has been inspired by Python's source code.
-# The native int product x*y is either exactly right or *way* off, being
-# just the last n bits of the true product, where n is the number of bits
-# in an int (the delivered product is the true product plus i*2**n for
-# some integer i).
-#
-# The native float64 product x*y is subject to three
-# rounding errors: on a sizeof(int)==8 box, each cast to double can lose
-# info, and even on a sizeof(int)==4 box, the multiplication can lose info.
-# But, unlike the native int product, it's not in *range* trouble:  even
-# if sizeof(int)==32 (256-bit ints), the product easily fits in the
-# dynamic range of a float64. So the leading 50 (or so) bits of the float64
-# product are correct.
-#
-# We check these two ways against each other, and declare victory if they're
-# approximately the same. Else, because the native int product is the only
-# one that can lose catastrophic amounts of information, it's the native int
-# product that must have overflowed.
-#
-proc mulInt64(a, b: int64): int64 {.compilerproc.} =
-  var
-    resAsFloat, floatProd: float64
-  result = a *% b
-  floatProd = toBiggestFloat(a) # conversion
-  floatProd = floatProd * toBiggestFloat(b)
-  resAsFloat = toBiggestFloat(result)
-
-  # Fast path for normal case: small multiplicands, and no info
-  # is lost in either method.
-  if resAsFloat == floatProd: return result
-
-  # Somebody somewhere lost info. Close enough, or way off? Note
-  # that a != 0 and b != 0 (else resAsFloat == floatProd == 0).
-  # The difference either is or isn't significant compared to the
-  # true value (of which floatProd is a good approximation).
-
-  # abs(diff)/abs(prod) <= 1/32 iff
-  #   32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough"
-  if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
-    return result
-  raiseOverflow()
-
-
-proc absInt(a: int): int {.compilerProc, inline.} =
-  if a != low(int):
-    if a >= 0: return a
-    else: return -a
-  raiseOverflow()
-
-const
-  asmVersion = defined(I386) and (defined(vcc) or defined(wcc) or
-               defined(dmc) or defined(gcc) or defined(llvm_gcc))
-    # my Version of Borland C++Builder does not have
-    # tasm32, which is needed for assembler blocks
-    # this is why Borland is not included in the 'when'
-
-when asmVersion and not defined(gcc) and not defined(llvm_gcc):
-  # assembler optimized versions for compilers that
-  # have an intel syntax assembler:
-  proc addInt(a, b: int): int {.compilerProc, noStackFrame.} =
-    # a in eax, and b in edx
-    asm """
-        mov eax, `a`
-        add eax, `b`
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-    """
-
-  proc subInt(a, b: int): int {.compilerProc, noStackFrame.} =
-    asm """
-        mov eax, `a`
-        sub eax, `b`
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-    """
-
-  proc negInt(a: int): int {.compilerProc, noStackFrame.} =
-    asm """
-        mov eax, `a`
-        neg eax
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-    """
-
-  proc divInt(a, b: int): int {.compilerProc, noStackFrame.} =
-    asm """
-        mov eax, `a`
-        mov ecx, `b`
-        xor edx, edx
-        idiv ecx
-        jno  theEnd
-        call `raiseOverflow`
-      theEnd:
-    """
-
-  proc modInt(a, b: int): int {.compilerProc, noStackFrame.} =
-    asm """
-        mov eax, `a`
-        mov ecx, `b`
-        xor edx, edx
-        idiv ecx
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-        mov eax, edx
-    """
-
-  proc mulInt(a, b: int): int {.compilerProc, noStackFrame.} =
-    asm """
-        mov eax, `a`
-        mov ecx, `b`
-        xor edx, edx
-        imul ecx
-        jno theEnd
-        call `raiseOverflow`
-      theEnd:
-    """
-
-elif false: # asmVersion and (defined(gcc) or defined(llvm_gcc)):
-  proc addInt(a, b: int): int {.compilerProc, inline.} =
-    # don't use a pure proc here!
-    asm """
-      "addl %%ecx, %%eax\n"
-      "jno 1\n"
-      "call _raiseOverflow\n"
-      "1: \n"
-      :"=a"(`result`)
-      :"a"(`a`), "c"(`b`)
-    """
-    #".intel_syntax noprefix"
-    #/* Intel syntax here */
-    #".att_syntax"
-
-  proc subInt(a, b: int): int {.compilerProc, inline.} =
-    asm """ "subl %%ecx,%%eax\n"
-            "jno 1\n"
-            "call _raiseOverflow\n"
-            "1: \n"
-           :"=a"(`result`)
-           :"a"(`a`), "c"(`b`)
-    """
-
-  proc mulInt(a, b: int): int {.compilerProc, inline.} =
-    asm """  "xorl %%edx, %%edx\n"
-             "imull %%ecx\n"
-             "jno 1\n"
-             "call _raiseOverflow\n"
-             "1: \n"
-            :"=a"(`result`)
-            :"a"(`a`), "c"(`b`)
-            :"%edx"
-    """
-
-  proc negInt(a: int): int {.compilerProc, inline.} =
-    asm """ "negl %%eax\n"
-            "jno 1\n"
-            "call _raiseOverflow\n"
-            "1: \n"
-           :"=a"(`result`)
-           :"a"(`a`)
-    """
-
-  proc divInt(a, b: int): int {.compilerProc, inline.} =
-    asm """  "xorl %%edx, %%edx\n"
-             "idivl %%ecx\n"
-             "jno 1\n"
-             "call _raiseOverflow\n"
-             "1: \n"
-            :"=a"(`result`)
-            :"a"(`a`), "c"(`b`)
-            :"%edx"
-    """
-
-  proc modInt(a, b: int): int {.compilerProc, inline.} =
-    asm """  "xorl %%edx, %%edx\n"
-             "idivl %%ecx\n"
-             "jno 1\n"
-             "call _raiseOverflow\n"
-             "1: \n"
-             "movl %%edx, %%eax"
-            :"=a"(`result`)
-            :"a"(`a`), "c"(`b`)
-            :"%edx"
-    """
-
-# Platform independent versions of the above (slower!)
-when not defined(addInt):
-  proc addInt(a, b: int): int {.compilerProc, inline.} =
-    result = a +% b
-    if (result xor a) >= 0 or (result xor b) >= 0:
-      return result
-    raiseOverflow()
-
-when not defined(subInt):
-  proc subInt(a, b: int): int {.compilerProc, inline.} =
-    result = a -% b
-    if (result xor a) >= 0 or (result xor not b) >= 0:
-      return result
-    raiseOverflow()
-
-when not defined(negInt):
-  proc negInt(a: int): int {.compilerProc, inline.} =
-    if a != low(int): return -a
-    raiseOverflow()
-
-when not defined(divInt):
-  proc divInt(a, b: int): int {.compilerProc, inline.} =
-    if b == 0:
-      raiseDivByZero()
-    if a == low(int) and b == -1:
-      raiseOverflow()
-    return a div b
-
-when not defined(modInt):
-  proc modInt(a, b: int): int {.compilerProc, inline.} =
-    if b == 0:
-      raiseDivByZero()
-    return a mod b
-
-when not defined(mulInt):
-  #
-  # This code has been inspired by Python's source code.
-  # The native int product x*y is either exactly right or *way* off, being
-  # just the last n bits of the true product, where n is the number of bits
-  # in an int (the delivered product is the true product plus i*2**n for
-  # some integer i).
-  #
-  # The native float64 product x*y is subject to three
-  # rounding errors: on a sizeof(int)==8 box, each cast to double can lose
-  # info, and even on a sizeof(int)==4 box, the multiplication can lose info.
-  # But, unlike the native int product, it's not in *range* trouble:  even
-  # if sizeof(int)==32 (256-bit ints), the product easily fits in the
-  # dynamic range of a float64. So the leading 50 (or so) bits of the float64
-  # product are correct.
-  #
-  # We check these two ways against each other, and declare victory if
-  # they're approximately the same. Else, because the native int product is
-  # the only one that can lose catastrophic amounts of information, it's the
-  # native int product that must have overflowed.
-  #
-  proc mulInt(a, b: int): int {.compilerProc.} =
-    var
-      resAsFloat, floatProd: float
-
-    result = a *% b
-    floatProd = toFloat(a) * toFloat(b)
-    resAsFloat = toFloat(result)
-
-    # Fast path for normal case: small multiplicands, and no info
-    # is lost in either method.
-    if resAsFloat == floatProd: return result
-
-    # Somebody somewhere lost info. Close enough, or way off? Note
-    # that a != 0 and b != 0 (else resAsFloat == floatProd == 0).
-    # The difference either is or isn't significant compared to the
-    # true value (of which floatProd is a good approximation).
-
-    # abs(diff)/abs(prod) <= 1/32 iff
-    #   32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough"
-    if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
-      return result
-    raiseOverflow()
-
-# We avoid setting the FPU control word here for compatibility with libraries
-# written in other languages.
-
-proc raiseFloatInvalidOp {.noinline, noreturn.} =
-  raise newException(EFloatInvalidOp, "FPU operation caused a NaN result")
-
-proc nanCheck(x: float64) {.compilerProc, inline.} =
-  if x != x: raiseFloatInvalidOp()
-
-proc raiseFloatOverflow(x: float64) {.noinline, noreturn.} =
-  if x > 0.0:
-    raise newException(EFloatOverflow, "FPU operation caused an overflow")
-  else:
-    raise newException(EFloatUnderflow, "FPU operations caused an underflow")
-
-proc infCheck(x: float64) {.compilerProc, inline.} =
-  if x != 0.0 and x*0.5 == x: raiseFloatOverflow(x)
diff --git a/lib/system/arithmetics.nim b/lib/system/arithmetics.nim
new file mode 100644
index 000000000..e229a0f4b
--- /dev/null
+++ b/lib/system/arithmetics.nim
@@ -0,0 +1,405 @@
+proc succ*[T, V: Ordinal](x: T, y: V = 1): T {.magic: "Succ", noSideEffect.} =
+  ## Returns the `y`-th successor (default: 1) of the value `x`.
+  ##
+  ## If such a value does not exist, `OverflowDefect` is raised
+  ## or a compile time error occurs.
+  runnableExamples:
+    assert succ(5) == 6
+    assert succ(5, 3) == 8
+
+proc pred*[T, V: Ordinal](x: T, y: V = 1): T {.magic: "Pred", noSideEffect.} =
+  ## Returns the `y`-th predecessor (default: 1) of the value `x`.
+  ##
+  ## If such a value does not exist, `OverflowDefect` is raised
+  ## or a compile time error occurs.
+  runnableExamples:
+    assert pred(5) == 4
+    assert pred(5, 3) == 2
+
+proc inc*[T, V: Ordinal](x: var T, y: V = 1) {.magic: "Inc", noSideEffect.} =
+  ## Increments the ordinal `x` by `y`.
+  ##
+  ## If such a value does not exist, `OverflowDefect` is raised or a compile
+  ## time error occurs. This is a short notation for: `x = succ(x, y)`.
+  runnableExamples:
+    var i = 2
+    inc(i)
+    assert i == 3
+    inc(i, 3)
+    assert i == 6
+
+proc dec*[T, V: Ordinal](x: var T, y: V = 1) {.magic: "Dec", noSideEffect.} =
+  ## Decrements the ordinal `x` by `y`.
+  ##
+  ## If such a value does not exist, `OverflowDefect` is raised or a compile
+  ## time error occurs. This is a short notation for: `x = pred(x, y)`.
+  runnableExamples:
+    var i = 2
+    dec(i)
+    assert i == 1
+    dec(i, 3)
+    assert i == -2
+
+
+
+# --------------------------------------------------------------------------
+# built-in operators
+
+# integer calculations:
+proc `+`*(x: int): int {.magic: "UnaryPlusI", noSideEffect.}
+  ## Unary `+` operator for an integer. Has no effect.
+proc `+`*(x: int8): int8 {.magic: "UnaryPlusI", noSideEffect.}
+proc `+`*(x: int16): int16 {.magic: "UnaryPlusI", noSideEffect.}
+proc `+`*(x: int32): int32 {.magic: "UnaryPlusI", noSideEffect.}
+proc `+`*(x: int64): int64 {.magic: "UnaryPlusI", noSideEffect.}
+
+proc `-`*(x: int): int {.magic: "UnaryMinusI", noSideEffect.}
+  ## Unary `-` operator for an integer. Negates `x`.
+proc `-`*(x: int8): int8 {.magic: "UnaryMinusI", noSideEffect.}
+proc `-`*(x: int16): int16 {.magic: "UnaryMinusI", noSideEffect.}
+proc `-`*(x: int32): int32 {.magic: "UnaryMinusI", noSideEffect.}
+proc `-`*(x: int64): int64 {.magic: "UnaryMinusI64", noSideEffect.}
+
+proc `not`*(x: int): int {.magic: "BitnotI", noSideEffect.} =
+  ## Computes the `bitwise complement` of the integer `x`.
+  runnableExamples:
+    assert not 0'u8 == 255
+    assert not 0'i8 == -1
+    assert not 1000'u16 == 64535
+    assert not 1000'i16 == -1001
+proc `not`*(x: int8): int8 {.magic: "BitnotI", noSideEffect.}
+proc `not`*(x: int16): int16 {.magic: "BitnotI", noSideEffect.}
+proc `not`*(x: int32): int32 {.magic: "BitnotI", noSideEffect.}
+proc `not`*(x: int64): int64 {.magic: "BitnotI", noSideEffect.}
+
+proc `+`*(x, y: int): int {.magic: "AddI", noSideEffect.}
+  ## Binary `+` operator for an integer.
+proc `+`*(x, y: int8): int8 {.magic: "AddI", noSideEffect.}
+proc `+`*(x, y: int16): int16 {.magic: "AddI", noSideEffect.}
+proc `+`*(x, y: int32): int32 {.magic: "AddI", noSideEffect.}
+proc `+`*(x, y: int64): int64 {.magic: "AddI", noSideEffect.}
+
+proc `-`*(x, y: int): int {.magic: "SubI", noSideEffect.}
+  ## Binary `-` operator for an integer.
+proc `-`*(x, y: int8): int8 {.magic: "SubI", noSideEffect.}
+proc `-`*(x, y: int16): int16 {.magic: "SubI", noSideEffect.}
+proc `-`*(x, y: int32): int32 {.magic: "SubI", noSideEffect.}
+proc `-`*(x, y: int64): int64 {.magic: "SubI", noSideEffect.}
+
+proc `*`*(x, y: int): int {.magic: "MulI", noSideEffect.}
+  ## Binary `*` operator for an integer.
+proc `*`*(x, y: int8): int8 {.magic: "MulI", noSideEffect.}
+proc `*`*(x, y: int16): int16 {.magic: "MulI", noSideEffect.}
+proc `*`*(x, y: int32): int32 {.magic: "MulI", noSideEffect.}
+proc `*`*(x, y: int64): int64 {.magic: "MulI", noSideEffect.}
+
+proc `div`*(x, y: int): int {.magic: "DivI", noSideEffect.} =
+  ## Computes the integer division.
+  ##
+  ## This is roughly the same as `math.trunc(x/y).int`.
+  runnableExamples:
+    assert (1 div 2) == 0
+    assert (2 div 2) == 1
+    assert (3 div 2) == 1
+    assert (7 div 3) == 2
+    assert (-7 div 3) == -2
+    assert (7 div -3) == -2
+    assert (-7 div -3) == 2
+proc `div`*(x, y: int8): int8 {.magic: "DivI", noSideEffect.}
+proc `div`*(x, y: int16): int16 {.magic: "DivI", noSideEffect.}
+proc `div`*(x, y: int32): int32 {.magic: "DivI", noSideEffect.}
+proc `div`*(x, y: int64): int64 {.magic: "DivI", noSideEffect.}
+
+proc `mod`*(x, y: int): int {.magic: "ModI", noSideEffect.} =
+  ## Computes the integer modulo operation (remainder).
+  ##
+  ## This is the same as `x - (x div y) * y`.
+  runnableExamples:
+    assert (7 mod 5) == 2
+    assert (-7 mod 5) == -2
+    assert (7 mod -5) == 2
+    assert (-7 mod -5) == -2
+proc `mod`*(x, y: int8): int8 {.magic: "ModI", noSideEffect.}
+proc `mod`*(x, y: int16): int16 {.magic: "ModI", noSideEffect.}
+proc `mod`*(x, y: int32): int32 {.magic: "ModI", noSideEffect.}
+proc `mod`*(x, y: int64): int64 {.magic: "ModI", noSideEffect.}
+
+when defined(nimOldShiftRight):
+  const shrDepMessage = "`shr` will become sign preserving."
+  proc `shr`*(x: int, y: SomeInteger): int {.magic: "ShrI", noSideEffect, deprecated: shrDepMessage.}
+  proc `shr`*(x: int8, y: SomeInteger): int8 {.magic: "ShrI", noSideEffect, deprecated: shrDepMessage.}
+  proc `shr`*(x: int16, y: SomeInteger): int16 {.magic: "ShrI", noSideEffect, deprecated: shrDepMessage.}
+  proc `shr`*(x: int32, y: SomeInteger): int32 {.magic: "ShrI", noSideEffect, deprecated: shrDepMessage.}
+  proc `shr`*(x: int64, y: SomeInteger): int64 {.magic: "ShrI", noSideEffect, deprecated: shrDepMessage.}
+else:
+  proc `shr`*(x: int, y: SomeInteger): int {.magic: "AshrI", noSideEffect.} =
+    ## Computes the `shift right` operation of `x` and `y`, filling
+    ## vacant bit positions with the sign bit.
+    ##
+    ## **Note**: `Operator precedence <manual.html#syntax-precedence>`_
+    ## is different than in *C*.
+    ##
+    ## See also:
+    ## * `ashr func<#ashr,int,SomeInteger>`_ for arithmetic shift right
+    runnableExamples:
+      assert 0b0001_0000'i8 shr 2 == 0b0000_0100'i8
+      assert 0b0000_0001'i8 shr 1 == 0b0000_0000'i8
+      assert 0b1000_0000'i8 shr 4 == 0b1111_1000'i8
+      assert -1 shr 5 == -1
+      assert 1 shr 5 == 0
+      assert 16 shr 2 == 4
+      assert -16 shr 2 == -4
+  proc `shr`*(x: int8, y: SomeInteger): int8 {.magic: "AshrI", noSideEffect.}
+  proc `shr`*(x: int16, y: SomeInteger): int16 {.magic: "AshrI", noSideEffect.}
+  proc `shr`*(x: int32, y: SomeInteger): int32 {.magic: "AshrI", noSideEffect.}
+  proc `shr`*(x: int64, y: SomeInteger): int64 {.magic: "AshrI", noSideEffect.}
+
+
+proc `shl`*(x: int, y: SomeInteger): int {.magic: "ShlI", noSideEffect.} =
+  ## Computes the `shift left` operation of `x` and `y`.
+  ##
+  ## **Note**: `Operator precedence <manual.html#syntax-precedence>`_
+  ## is different than in *C*.
+  runnableExamples:
+    assert 1'i32 shl 4 == 0x0000_0010
+    assert 1'i64 shl 4 == 0x0000_0000_0000_0010
+proc `shl`*(x: int8, y: SomeInteger): int8 {.magic: "ShlI", noSideEffect.}
+proc `shl`*(x: int16, y: SomeInteger): int16 {.magic: "ShlI", noSideEffect.}
+proc `shl`*(x: int32, y: SomeInteger): int32 {.magic: "ShlI", noSideEffect.}
+proc `shl`*(x: int64, y: SomeInteger): int64 {.magic: "ShlI", noSideEffect.}
+
+proc ashr*(x: int, y: SomeInteger): int {.magic: "AshrI", noSideEffect.} =
+  ## Shifts right by pushing copies of the leftmost bit in from the left,
+  ## and let the rightmost bits fall off.
+  ##
+  ## Note that `ashr` is not an operator so use the normal function
+  ## call syntax for it.
+  ##
+  ## See also:
+  ## * `shr func<#shr,int,SomeInteger>`_
+  runnableExamples:
+    assert ashr(0b0001_0000'i8, 2) == 0b0000_0100'i8
+    assert ashr(0b1000_0000'i8, 8) == 0b1111_1111'i8
+    assert ashr(0b1000_0000'i8, 1) == 0b1100_0000'i8
+proc ashr*(x: int8, y: SomeInteger): int8 {.magic: "AshrI", noSideEffect.}
+proc ashr*(x: int16, y: SomeInteger): int16 {.magic: "AshrI", noSideEffect.}
+proc ashr*(x: int32, y: SomeInteger): int32 {.magic: "AshrI", noSideEffect.}
+proc ashr*(x: int64, y: SomeInteger): int64 {.magic: "AshrI", noSideEffect.}
+
+proc `and`*(x, y: int): int {.magic: "BitandI", noSideEffect.} =
+  ## Computes the `bitwise and` of numbers `x` and `y`.
+  runnableExamples:
+    assert (0b0011 and 0b0101) == 0b0001
+    assert (0b0111 and 0b1100) == 0b0100
+proc `and`*(x, y: int8): int8 {.magic: "BitandI", noSideEffect.}
+proc `and`*(x, y: int16): int16 {.magic: "BitandI", noSideEffect.}
+proc `and`*(x, y: int32): int32 {.magic: "BitandI", noSideEffect.}
+proc `and`*(x, y: int64): int64 {.magic: "BitandI", noSideEffect.}
+
+proc `or`*(x, y: int): int {.magic: "BitorI", noSideEffect.} =
+  ## Computes the `bitwise or` of numbers `x` and `y`.
+  runnableExamples:
+    assert (0b0011 or 0b0101) == 0b0111
+    assert (0b0111 or 0b1100) == 0b1111
+proc `or`*(x, y: int8): int8 {.magic: "BitorI", noSideEffect.}
+proc `or`*(x, y: int16): int16 {.magic: "BitorI", noSideEffect.}
+proc `or`*(x, y: int32): int32 {.magic: "BitorI", noSideEffect.}
+proc `or`*(x, y: int64): int64 {.magic: "BitorI", noSideEffect.}
+
+proc `xor`*(x, y: int): int {.magic: "BitxorI", noSideEffect.} =
+  ## Computes the `bitwise xor` of numbers `x` and `y`.
+  runnableExamples:
+    assert (0b0011 xor 0b0101) == 0b0110
+    assert (0b0111 xor 0b1100) == 0b1011
+proc `xor`*(x, y: int8): int8 {.magic: "BitxorI", noSideEffect.}
+proc `xor`*(x, y: int16): int16 {.magic: "BitxorI", noSideEffect.}
+proc `xor`*(x, y: int32): int32 {.magic: "BitxorI", noSideEffect.}
+proc `xor`*(x, y: int64): int64 {.magic: "BitxorI", noSideEffect.}
+
+# unsigned integer operations:
+proc `not`*(x: uint): uint {.magic: "BitnotI", noSideEffect.}
+  ## Computes the `bitwise complement` of the integer `x`.
+proc `not`*(x: uint8): uint8 {.magic: "BitnotI", noSideEffect.}
+proc `not`*(x: uint16): uint16 {.magic: "BitnotI", noSideEffect.}
+proc `not`*(x: uint32): uint32 {.magic: "BitnotI", noSideEffect.}
+proc `not`*(x: uint64): uint64 {.magic: "BitnotI", noSideEffect.}
+
+proc `shr`*(x: uint, y: SomeInteger): uint {.magic: "ShrI", noSideEffect.}
+  ## Computes the `shift right` operation of `x` and `y`.
+proc `shr`*(x: uint8, y: SomeInteger): uint8 {.magic: "ShrI", noSideEffect.}
+proc `shr`*(x: uint16, y: SomeInteger): uint16 {.magic: "ShrI", noSideEffect.}
+proc `shr`*(x: uint32, y: SomeInteger): uint32 {.magic: "ShrI", noSideEffect.}
+proc `shr`*(x: uint64, y: SomeInteger): uint64 {.magic: "ShrI", noSideEffect.}
+
+proc `shl`*(x: uint, y: SomeInteger): uint {.magic: "ShlI", noSideEffect.}
+  ## Computes the `shift left` operation of `x` and `y`.
+proc `shl`*(x: uint8, y: SomeInteger): uint8 {.magic: "ShlI", noSideEffect.}
+proc `shl`*(x: uint16, y: SomeInteger): uint16 {.magic: "ShlI", noSideEffect.}
+proc `shl`*(x: uint32, y: SomeInteger): uint32 {.magic: "ShlI", noSideEffect.}
+proc `shl`*(x: uint64, y: SomeInteger): uint64 {.magic: "ShlI", noSideEffect.}
+
+proc `and`*(x, y: uint): uint {.magic: "BitandI", noSideEffect.}
+  ## Computes the `bitwise and` of numbers `x` and `y`.
+proc `and`*(x, y: uint8): uint8 {.magic: "BitandI", noSideEffect.}
+proc `and`*(x, y: uint16): uint16 {.magic: "BitandI", noSideEffect.}
+proc `and`*(x, y: uint32): uint32 {.magic: "BitandI", noSideEffect.}
+proc `and`*(x, y: uint64): uint64 {.magic: "BitandI", noSideEffect.}
+
+proc `or`*(x, y: uint): uint {.magic: "BitorI", noSideEffect.}
+  ## Computes the `bitwise or` of numbers `x` and `y`.
+proc `or`*(x, y: uint8): uint8 {.magic: "BitorI", noSideEffect.}
+proc `or`*(x, y: uint16): uint16 {.magic: "BitorI", noSideEffect.}
+proc `or`*(x, y: uint32): uint32 {.magic: "BitorI", noSideEffect.}
+proc `or`*(x, y: uint64): uint64 {.magic: "BitorI", noSideEffect.}
+
+proc `xor`*(x, y: uint): uint {.magic: "BitxorI", noSideEffect.}
+  ## Computes the `bitwise xor` of numbers `x` and `y`.
+proc `xor`*(x, y: uint8): uint8 {.magic: "BitxorI", noSideEffect.}
+proc `xor`*(x, y: uint16): uint16 {.magic: "BitxorI", noSideEffect.}
+proc `xor`*(x, y: uint32): uint32 {.magic: "BitxorI", noSideEffect.}
+proc `xor`*(x, y: uint64): uint64 {.magic: "BitxorI", noSideEffect.}
+
+proc `+`*(x, y: uint): uint {.magic: "AddU", noSideEffect.}
+  ## Binary `+` operator for unsigned integers.
+proc `+`*(x, y: uint8): uint8 {.magic: "AddU", noSideEffect.}
+proc `+`*(x, y: uint16): uint16 {.magic: "AddU", noSideEffect.}
+proc `+`*(x, y: uint32): uint32 {.magic: "AddU", noSideEffect.}
+proc `+`*(x, y: uint64): uint64 {.magic: "AddU", noSideEffect.}
+
+proc `-`*(x, y: uint): uint {.magic: "SubU", noSideEffect.}
+  ## Binary `-` operator for unsigned integers.
+proc `-`*(x, y: uint8): uint8 {.magic: "SubU", noSideEffect.}
+proc `-`*(x, y: uint16): uint16 {.magic: "SubU", noSideEffect.}
+proc `-`*(x, y: uint32): uint32 {.magic: "SubU", noSideEffect.}
+proc `-`*(x, y: uint64): uint64 {.magic: "SubU", noSideEffect.}
+
+proc `*`*(x, y: uint): uint {.magic: "MulU", noSideEffect.}
+  ## Binary `*` operator for unsigned integers.
+proc `*`*(x, y: uint8): uint8 {.magic: "MulU", noSideEffect.}
+proc `*`*(x, y: uint16): uint16 {.magic: "MulU", noSideEffect.}
+proc `*`*(x, y: uint32): uint32 {.magic: "MulU", noSideEffect.}
+proc `*`*(x, y: uint64): uint64 {.magic: "MulU", noSideEffect.}
+
+proc `div`*(x, y: uint): uint {.magic: "DivU", noSideEffect.}
+  ## Computes the integer division for unsigned integers.
+  ## This is roughly the same as `trunc(x/y)`.
+proc `div`*(x, y: uint8): uint8 {.magic: "DivU", noSideEffect.}
+proc `div`*(x, y: uint16): uint16 {.magic: "DivU", noSideEffect.}
+proc `div`*(x, y: uint32): uint32 {.magic: "DivU", noSideEffect.}
+proc `div`*(x, y: uint64): uint64 {.magic: "DivU", noSideEffect.}
+
+proc `mod`*(x, y: uint): uint {.magic: "ModU", noSideEffect.}
+  ## Computes the integer modulo operation (remainder) for unsigned integers.
+  ## This is the same as `x - (x div y) * y`.
+proc `mod`*(x, y: uint8): uint8 {.magic: "ModU", noSideEffect.}
+proc `mod`*(x, y: uint16): uint16 {.magic: "ModU", noSideEffect.}
+proc `mod`*(x, y: uint32): uint32 {.magic: "ModU", noSideEffect.}
+proc `mod`*(x, y: uint64): uint64 {.magic: "ModU", noSideEffect.}
+
+proc `+=`*[T: SomeInteger](x: var T, y: T) {.
+  magic: "Inc", noSideEffect.}
+  ## Increments an integer.
+
+proc `-=`*[T: SomeInteger](x: var T, y: T) {.
+  magic: "Dec", noSideEffect.}
+  ## Decrements an integer.
+
+proc `*=`*[T: SomeInteger](x: var T, y: T) {.
+  inline, noSideEffect.} =
+  ## Binary `*=` operator for integers.
+  x = x * y
+
+# floating point operations:
+proc `+`*(x: float32): float32 {.magic: "UnaryPlusF64", noSideEffect.}
+proc `-`*(x: float32): float32 {.magic: "UnaryMinusF64", noSideEffect.}
+proc `+`*(x, y: float32): float32 {.magic: "AddF64", noSideEffect.}
+proc `-`*(x, y: float32): float32 {.magic: "SubF64", noSideEffect.}
+proc `*`*(x, y: float32): float32 {.magic: "MulF64", noSideEffect.}
+proc `/`*(x, y: float32): float32 {.magic: "DivF64", noSideEffect.}
+
+proc `+`*(x: float): float {.magic: "UnaryPlusF64", noSideEffect.}
+proc `-`*(x: float): float {.magic: "UnaryMinusF64", noSideEffect.}
+proc `+`*(x, y: float): float {.magic: "AddF64", noSideEffect.}
+proc `-`*(x, y: float): float {.magic: "SubF64", noSideEffect.}
+proc `*`*(x, y: float): float {.magic: "MulF64", noSideEffect.}
+proc `/`*(x, y: float): float {.magic: "DivF64", noSideEffect.}
+
+proc `+=`*[T: float|float32|float64] (x: var T, y: T) {.
+  inline, noSideEffect.} =
+  ## Increments in place a floating point number.
+  x = x + y
+
+proc `-=`*[T: float|float32|float64] (x: var T, y: T) {.
+  inline, noSideEffect.} =
+  ## Decrements in place a floating point number.
+  x = x - y
+
+proc `*=`*[T: float|float32|float64] (x: var T, y: T) {.
+  inline, noSideEffect.} =
+  ## Multiplies in place a floating point number.
+  x = x * y
+
+proc `/=`*(x: var float64, y: float64) {.inline, noSideEffect.} =
+  ## Divides in place a floating point number.
+  x = x / y
+
+proc `/=`*[T: float|float32](x: var T, y: T) {.inline, noSideEffect.} =
+  ## Divides in place a floating point number.
+  x = x / y
+
+# the following have to be included in system, not imported for some reason:
+
+proc `+%`*(x, y: int): int {.inline.} =
+  ## Treats `x` and `y` as unsigned and adds them.
+  ##
+  ## The result is truncated to fit into the result.
+  ## This implements modulo arithmetic. No overflow errors are possible.
+  cast[int](cast[uint](x) + cast[uint](y))
+proc `+%`*(x, y: int8): int8 {.inline.}   = cast[int8](cast[uint8](x) + cast[uint8](y))
+proc `+%`*(x, y: int16): int16 {.inline.} = cast[int16](cast[uint16](x) + cast[uint16](y))
+proc `+%`*(x, y: int32): int32 {.inline.} = cast[int32](cast[uint32](x) + cast[uint32](y))
+proc `+%`*(x, y: int64): int64 {.inline.} = cast[int64](cast[uint64](x) + cast[uint64](y))
+
+proc `-%`*(x, y: int): int {.inline.} =
+  ## Treats `x` and `y` as unsigned and subtracts them.
+  ##
+  ## The result is truncated to fit into the result.
+  ## This implements modulo arithmetic. No overflow errors are possible.
+  cast[int](cast[uint](x) - cast[uint](y))
+proc `-%`*(x, y: int8): int8 {.inline.}   = cast[int8](cast[uint8](x) - cast[uint8](y))
+proc `-%`*(x, y: int16): int16 {.inline.} = cast[int16](cast[uint16](x) - cast[uint16](y))
+proc `-%`*(x, y: int32): int32 {.inline.} = cast[int32](cast[uint32](x) - cast[uint32](y))
+proc `-%`*(x, y: int64): int64 {.inline.} = cast[int64](cast[uint64](x) - cast[uint64](y))
+
+proc `*%`*(x, y: int): int {.inline.} =
+  ## Treats `x` and `y` as unsigned and multiplies them.
+  ##
+  ## The result is truncated to fit into the result.
+  ## This implements modulo arithmetic. No overflow errors are possible.
+  cast[int](cast[uint](x) * cast[uint](y))
+proc `*%`*(x, y: int8): int8 {.inline.}   = cast[int8](cast[uint8](x) * cast[uint8](y))
+proc `*%`*(x, y: int16): int16 {.inline.} = cast[int16](cast[uint16](x) * cast[uint16](y))
+proc `*%`*(x, y: int32): int32 {.inline.} = cast[int32](cast[uint32](x) * cast[uint32](y))
+proc `*%`*(x, y: int64): int64 {.inline.} = cast[int64](cast[uint64](x) * cast[uint64](y))
+
+proc `/%`*(x, y: int): int {.inline.} =
+  ## Treats `x` and `y` as unsigned and divides them.
+  ##
+  ## The result is truncated to fit into the result.
+  ## This implements modulo arithmetic. No overflow errors are possible.
+  cast[int](cast[uint](x) div cast[uint](y))
+proc `/%`*(x, y: int8): int8 {.inline.}   = cast[int8](cast[uint8](x) div cast[uint8](y))
+proc `/%`*(x, y: int16): int16 {.inline.} = cast[int16](cast[uint16](x) div cast[uint16](y))
+proc `/%`*(x, y: int32): int32 {.inline.} = cast[int32](cast[uint32](x) div cast[uint32](y))
+proc `/%`*(x, y: int64): int64 {.inline.} = cast[int64](cast[uint64](x) div cast[uint64](y))
+
+proc `%%`*(x, y: int): int {.inline.} =
+  ## Treats `x` and `y` as unsigned and compute the modulo of `x` and `y`.
+  ##
+  ## The result is truncated to fit into the result.
+  ## This implements modulo arithmetic. No overflow errors are possible.
+  cast[int](cast[uint](x) mod cast[uint](y))
+proc `%%`*(x, y: int8): int8 {.inline.}   = cast[int8](cast[uint8](x) mod cast[uint8](y))
+proc `%%`*(x, y: int16): int16 {.inline.} = cast[int16](cast[uint16](x) mod cast[uint16](y))
+proc `%%`*(x, y: int32): int32 {.inline.} = cast[int32](cast[uint32](x) mod cast[uint32](y))
+proc `%%`*(x, y: int64): int64 {.inline.} = cast[int64](cast[uint64](x) mod cast[uint64](y))
diff --git a/lib/system/assign.nim b/lib/system/assign.nim
index 4b8d2033d..9f4cbc0fe 100755..100644
--- a/lib/system/assign.nim
+++ b/lib/system/assign.nim
@@ -1,22 +1,25 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2012 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
 #
 
-proc genericResetAux(dest: Pointer, n: ptr TNimNode)
+include seqs_v2_reimpl
 
-proc genericAssignAux(dest, src: Pointer, mt: PNimType, shallow: bool)
-proc genericAssignAux(dest, src: Pointer, n: ptr TNimNode, shallow: bool) =
+proc genericResetAux(dest: pointer, n: ptr TNimNode) {.benign.}
+
+proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) {.benign.}
+proc genericAssignAux(dest, src: pointer, n: ptr TNimNode,
+                      shallow: bool) {.benign.} =
   var
-    d = cast[TAddress](dest)
-    s = cast[TAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   case n.kind
   of nkSlot:
-    genericAssignAux(cast[pointer](d +% n.offset), 
+    genericAssignAux(cast[pointer](d +% n.offset),
                      cast[pointer](s +% n.offset), n.typ, shallow)
   of nkList:
     for i in 0..n.len-1:
@@ -26,7 +29,7 @@ proc genericAssignAux(dest, src: Pointer, n: ptr TNimNode, shallow: bool) =
     var m = selectBranch(src, n)
     # reset if different branches are in use; note different branches also
     # imply that's not self-assignment (``x = x``)!
-    if m != dd and dd != nil: 
+    if m != dd and dd != nil:
       genericResetAux(dest, dd)
     copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset),
             n.typ.size)
@@ -37,67 +40,114 @@ proc genericAssignAux(dest, src: Pointer, n: ptr TNimNode, shallow: bool) =
   #  echo "ugh memory corruption! ", n.kind
   #  quit 1
 
-proc genericAssignAux(dest, src: Pointer, mt: PNimType, shallow: bool) =
+template deepSeqAssignImpl(operation, additionalArg) {.dirty.} =
+  var d = cast[ptr NimSeqV2Reimpl](dest)
+  var s = cast[ptr NimSeqV2Reimpl](src)
+  d.len = s.len
+  let elem = mt.base
+  d.p = cast[ptr NimSeqPayloadReimpl](newSeqPayload(s.len, elem.size, elem.align))
+
+  let bs = elem.size
+  let ba = elem.align
+  let headerSize = align(sizeof(NimSeqPayloadBase), ba)
+
+  for i in 0..d.len-1:
+    operation(d.p +! (headerSize+i*bs), s.p +! (headerSize+i*bs), mt.base, additionalArg)
+
+proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) =
   var
-    d = cast[TAddress](dest)
-    s = cast[TAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   sysAssert(mt != nil, "genericAssignAux 2")
-  case mt.Kind
+  case mt.kind
   of tyString:
-    var x = cast[ppointer](dest)
-    var s2 = cast[ppointer](s)[]
-    if s2 == nil or shallow or (
-        cast[PGenericSeq](s2).reserved and seqShallowFlag) != 0:
-      unsureAsgnRef(x, s2)
+    when defined(nimSeqsV2):
+      var x = cast[ptr NimStringV2](dest)
+      var s2 = cast[ptr NimStringV2](s)[]
+      nimAsgnStrV2(x[], s2)
     else:
-      unsureAsgnRef(x, copyString(cast[NimString](s2)))
+      var x = cast[PPointer](dest)
+      var s2 = cast[PPointer](s)[]
+      if s2 == nil or shallow or (
+          cast[PGenericSeq](s2).reserved and seqShallowFlag) != 0:
+        unsureAsgnRef(x, s2)
+      else:
+        unsureAsgnRef(x, copyString(cast[NimString](s2)))
   of tySequence:
-    var s2 = cast[ppointer](src)[]
-    var seq = cast[PGenericSeq](s2)      
-    var x = cast[ppointer](dest)
-    if s2 == nil or shallow or (seq.reserved and seqShallowFlag) != 0:
-      # this can happen! nil sequences are allowed
-      unsureAsgnRef(x, s2)
-      return
-    sysAssert(dest != nil, "genericAssignAux 3")
-    unsureAsgnRef(x, newSeq(mt, seq.len))
-    var dst = cast[taddress](cast[ppointer](dest)[])
-    for i in 0..seq.len-1:
-      genericAssignAux(
-        cast[pointer](dst +% i*% mt.base.size +% GenericSeqSize),
-        cast[pointer](cast[taddress](s2) +% i *% mt.base.size +%
-                     GenericSeqSize),
-        mt.Base, shallow)
+    when defined(nimSeqsV2):
+      deepSeqAssignImpl(genericAssignAux, shallow)
+    else:
+      var s2 = cast[PPointer](src)[]
+      var seq = cast[PGenericSeq](s2)
+      var x = cast[PPointer](dest)
+      if s2 == nil or shallow or (seq.reserved and seqShallowFlag) != 0:
+        # this can happen! nil sequences are allowed
+        unsureAsgnRef(x, s2)
+        return
+      sysAssert(dest != nil, "genericAssignAux 3")
+      if ntfNoRefs in mt.base.flags:
+        var ss = nimNewSeqOfCap(mt, seq.len)
+        cast[PGenericSeq](ss).len = seq.len
+        unsureAsgnRef(x, ss)
+        var dst = cast[int](cast[PPointer](dest)[])
+        copyMem(cast[pointer](dst +% align(GenericSeqSize, mt.base.align)),
+                cast[pointer](cast[int](s2) +% align(GenericSeqSize, mt.base.align)),
+                seq.len *% mt.base.size)
+      else:
+        unsureAsgnRef(x, newSeq(mt, seq.len))
+        var dst = cast[int](cast[PPointer](dest)[])
+        for i in 0..seq.len-1:
+          genericAssignAux(
+            cast[pointer](dst +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size ),
+            cast[pointer](cast[int](s2) +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size ),
+            mt.base, shallow)
   of tyObject:
+    var it = mt.base
+    # don't use recursion here on the PNimType because the subtype
+    # check should only be done at the very end:
+    while it != nil:
+      genericAssignAux(dest, src, it.node, shallow)
+      it = it.base
+    genericAssignAux(dest, src, mt.node, shallow)
     # we need to copy m_type field for tyObject, as it could be empty for
     # sequence reallocations:
-    var pint = cast[ptr PNimType](dest)
-    pint[] = cast[ptr PNimType](src)[]
-    genericAssignAux(dest, src, mt.node, shallow)
+    when defined(nimSeqsV2):
+      var pint = cast[ptr PNimTypeV2](dest)
+      #chckObjAsgn(cast[ptr PNimTypeV2](src)[].typeInfoV2, mt)
+      pint[] = cast[PNimTypeV2](mt.typeInfoV2)
+    else:
+      var pint = cast[ptr PNimType](dest)
+      # We need to copy the *static* type not the dynamic type:
+      #   if p of TB:
+      #     var tbObj = TB(p)
+      #     tbObj of TC # needs to be false!
+      #c_fprintf(stdout, "%s %s\n", pint[].name, mt.name)
+      let srcType = cast[ptr PNimType](src)[]
+      if srcType != nil:
+        # `!= nil` needed because of cases where object is not initialized properly (see bug #16706)
+        # note that you can have `srcType == nil` yet `src != nil`
+        chckObjAsgn(srcType, mt)
+      pint[] = mt # cast[ptr PNimType](src)[]
   of tyTuple:
     genericAssignAux(dest, src, mt.node, shallow)
   of tyArray, tyArrayConstr:
     for i in 0..(mt.size div mt.base.size)-1:
-      genericAssignAux(cast[pointer](d +% i*% mt.base.size),
-                       cast[pointer](s +% i*% mt.base.size), mt.base, shallow)
+      genericAssignAux(cast[pointer](d +% i *% mt.base.size),
+                       cast[pointer](s +% i *% mt.base.size), mt.base, shallow)
   of tyRef:
-    unsureAsgnRef(cast[ppointer](dest), cast[ppointer](s)[])
+    unsureAsgnRef(cast[PPointer](dest), cast[PPointer](s)[])
   else:
     copyMem(dest, src, mt.size) # copy raw bits
 
-proc genericAssign(dest, src: Pointer, mt: PNimType) {.compilerProc.} =
-  GC_disable()
+proc genericAssign(dest, src: pointer, mt: PNimType) {.compilerproc.} =
   genericAssignAux(dest, src, mt, false)
-  GC_enable()
 
-proc genericShallowAssign(dest, src: Pointer, mt: PNimType) {.compilerProc.} =
-  GC_disable()
+proc genericShallowAssign(dest, src: pointer, mt: PNimType) {.compilerproc.} =
   genericAssignAux(dest, src, mt, true)
-  GC_enable()
 
 when false:
   proc debugNimType(t: PNimType) =
-    if t.isNil: 
+    if t.isNil:
       cprintf("nil!")
       return
     var k: cstring
@@ -117,32 +167,32 @@ when false:
     of tyPointer: k = "range"
     of tyOpenArray: k = "openarray"
     of tyString: k = "string"
-    of tyCString: k = "cstring"
+    of tyCstring: k = "cstring"
     of tyInt: k = "int"
     of tyInt32: k = "int32"
     else: k = "other"
     cprintf("%s %ld\n", k, t.size)
     debugNimType(t.base)
 
-proc genericSeqAssign(dest, src: Pointer, mt: PNimType) {.compilerProc.} =
+proc genericSeqAssign(dest, src: pointer, mt: PNimType) {.compilerproc.} =
   var src = src # ugly, but I like to stress the parser sometimes :-)
   genericAssign(dest, addr(src), mt)
 
 proc genericAssignOpenArray(dest, src: pointer, len: int,
                             mt: PNimType) {.compilerproc.} =
   var
-    d = cast[TAddress](dest)
-    s = cast[TAddress](src)
+    d = cast[int](dest)
+    s = cast[int](src)
   for i in 0..len-1:
-    genericAssign(cast[pointer](d +% i*% mt.base.size),
-                  cast[pointer](s +% i*% mt.base.size), mt.base)
+    genericAssign(cast[pointer](d +% i *% mt.base.size),
+                  cast[pointer](s +% i *% mt.base.size), mt.base)
 
-proc objectInit(dest: Pointer, typ: PNimType) {.compilerProc.}
-proc objectInitAux(dest: Pointer, n: ptr TNimNode) =
-  var d = cast[TAddress](dest)
+proc objectInit(dest: pointer, typ: PNimType) {.compilerproc, benign.}
+proc objectInitAux(dest: pointer, n: ptr TNimNode) {.benign.} =
+  var d = cast[int](dest)
   case n.kind
   of nkNone: sysAssert(false, "objectInitAux")
-  of nkSLot: objectInit(cast[pointer](d +% n.offset), n.typ)
+  of nkSlot: objectInit(cast[pointer](d +% n.offset), n.typ)
   of nkList:
     for i in 0..n.len-1:
       objectInitAux(dest, n.sons[i])
@@ -150,41 +200,33 @@ proc objectInitAux(dest: Pointer, n: ptr TNimNode) =
     var m = selectBranch(dest, n)
     if m != nil: objectInitAux(dest, m)
 
-proc objectInit(dest: Pointer, typ: PNimType) =
+proc objectInit(dest: pointer, typ: PNimType) =
   # the generic init proc that takes care of initialization of complex
   # objects on the stack or heap
-  var d = cast[TAddress](dest)
+  var d = cast[int](dest)
   case typ.kind
   of tyObject:
     # iterate over any structural type
     # here we have to init the type field:
-    var pint = cast[ptr PNimType](dest)
-    pint[] = typ
+    when defined(nimSeqsV2):
+      var pint = cast[ptr PNimTypeV2](dest)
+      pint[] = cast[PNimTypeV2](typ.typeInfoV2)
+    else:
+      var pint = cast[ptr PNimType](dest)
+      pint[] = typ
     objectInitAux(dest, typ.node)
   of tyTuple:
     objectInitAux(dest, typ.node)
   of tyArray, tyArrayConstr:
     for i in 0..(typ.size div typ.base.size)-1:
       objectInit(cast[pointer](d +% i * typ.base.size), typ.base)
-  else: nil # nothing to do
-  
+  else: discard # nothing to do
+
 # ---------------------- assign zero -----------------------------------------
 
-when not defined(nimmixin):
-  proc destroy(x: int) = nil
-  proc nimDestroyRange*[T](r: T) =
-    # internal proc used for destroying sequences and arrays
-    for i in countup(0, r.len - 1): destroy(r[i])
-else:
-  # XXX Why is this exported and no compilerproc?
-  proc nimDestroyRange*[T](r: T) =
-    # internal proc used for destroying sequences and arrays
-    mixin destroy
-    for i in countup(0, r.len - 1): destroy(r[i])
-
-proc genericReset(dest: Pointer, mt: PNimType) {.compilerProc.}
-proc genericResetAux(dest: Pointer, n: ptr TNimNode) =
-  var d = cast[TAddress](dest)
+proc genericReset(dest: pointer, mt: PNimType) {.compilerproc, benign.}
+proc genericResetAux(dest: pointer, n: ptr TNimNode) =
+  var d = cast[int](dest)
   case n.kind
   of nkNone: sysAssert(false, "genericResetAux")
   of nkSlot: genericReset(cast[pointer](d +% n.offset), n.typ)
@@ -194,35 +236,63 @@ proc genericResetAux(dest: Pointer, n: ptr TNimNode) =
     var m = selectBranch(dest, n)
     if m != nil: genericResetAux(dest, m)
     zeroMem(cast[pointer](d +% n.offset), n.typ.size)
-  
-proc genericReset(dest: Pointer, mt: PNimType) =
-  var d = cast[TAddress](dest)
+
+proc genericReset(dest: pointer, mt: PNimType) =
+  var d = cast[int](dest)
   sysAssert(mt != nil, "genericReset 2")
-  case mt.Kind
-  of tyString, tyRef, tySequence:
-    unsureAsgnRef(cast[ppointer](dest), nil)
-  of tyObject, tyTuple:
-    # we don't need to reset m_type field for tyObject
+  case mt.kind
+  of tyRef:
+    unsureAsgnRef(cast[PPointer](dest), nil)
+  of tyString:
+    when defined(nimSeqsV2):
+      var s = cast[ptr NimStringV2](dest)
+      frees(s[])
+      zeroMem(dest, mt.size)
+    else:
+      unsureAsgnRef(cast[PPointer](dest), nil)
+  of tySequence:
+    when defined(nimSeqsV2):
+      frees(cast[ptr NimSeqV2Reimpl](dest)[])
+      zeroMem(dest, mt.size)
+    else:
+      unsureAsgnRef(cast[PPointer](dest), nil)
+  of tyTuple:
     genericResetAux(dest, mt.node)
+  of tyObject:
+    genericResetAux(dest, mt.node)
+    # also reset the type field for tyObject, for correct branch switching!
+    when defined(nimSeqsV2):
+      var pint = cast[ptr PNimTypeV2](dest)
+      pint[] = nil
+    else:
+      var pint = cast[ptr PNimType](dest)
+      pint[] = nil
   of tyArray, tyArrayConstr:
     for i in 0..(mt.size div mt.base.size)-1:
-      genericReset(cast[pointer](d +% i*% mt.base.size), mt.base)
+      genericReset(cast[pointer](d +% i *% mt.base.size), mt.base)
   else:
     zeroMem(dest, mt.size) # set raw bits to zero
 
-proc selectBranch(discVal, L: int, 
-                  a: ptr array [0..0x7fff, ptr TNimNode]): ptr TNimNode =
-  result = a[L] # a[L] contains the ``else`` part (but may be nil)
+proc selectBranch(discVal, L: int,
+                  a: ptr array[0x7fff, ptr TNimNode]): ptr TNimNode =
   if discVal <% L:
-    var x = a[discVal]
-    if x != nil: result = x
-  
-proc FieldDiscriminantCheck(oldDiscVal, newDiscVal: int, 
-                            a: ptr array [0..0x7fff, ptr TNimNode], 
-                            L: int) {.compilerProc.} =
-  var oldBranch = selectBranch(oldDiscVal, L, a)
-  var newBranch = selectBranch(newDiscVal, L, a)
-  if newBranch != oldBranch and oldDiscVal != 0:
-    raise newException(EInvalidField, 
-                       "assignment to discriminant changes object branch")
+    result = a[discVal]
+    if result == nil:
+      result = a[L]
+  else:
+    result = a[L] # a[L] contains the ``else`` part (but may be nil)
 
+proc FieldDiscriminantCheck(oldDiscVal, newDiscVal: int,
+                            a: ptr array[0x7fff, ptr TNimNode],
+                            L: int) {.compilerproc.} =
+  let oldBranch = selectBranch(oldDiscVal, L, a)
+  let newBranch = selectBranch(newDiscVal, L, a)
+  when defined(nimOldCaseObjects):
+    if newBranch != oldBranch and oldDiscVal != 0:
+      sysFatal(FieldDefect, "assignment to discriminant changes object branch")
+  else:
+    if newBranch != oldBranch:
+      if oldDiscVal != 0:
+        sysFatal(FieldDefect, "assignment to discriminant changes object branch")
+      else:
+        sysFatal(FieldDefect, "assignment to discriminant changes object branch; compile with -d:nimOldCaseObjects for a transition period")
diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim
deleted file mode 100755
index 623f8d0d2..000000000
--- a/lib/system/atomics.nim
+++ /dev/null
@@ -1,76 +0,0 @@
-#
-#
-#            Nimrod's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-# Atomic operations for Nimrod.
-
-when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport and 
-    not defined(windows):
-  proc sync_add_and_fetch(p: var int, val: int): int {.
-    importc: "__sync_add_and_fetch", nodecl.}
-  proc sync_sub_and_fetch(p: var int, val: int): int {.
-    importc: "__sync_sub_and_fetch", nodecl.}
-elif defined(vcc) and hasThreadSupport:
-  proc sync_add_and_fetch(p: var int, val: int): int {.
-    importc: "NimXadd", nodecl.}
-else:
-  proc sync_add_and_fetch(p: var int, val: int): int {.inline.} =
-    inc(p, val)
-    result = p
-
-proc atomicInc(memLoc: var int, x: int = 1): int =
-  when hasThreadSupport:
-    result = sync_add_and_fetch(memLoc, x)
-  else:
-    inc(memLoc, x)
-    result = memLoc
-  
-proc atomicDec(memLoc: var int, x: int = 1): int =
-  when hasThreadSupport:
-    when defined(sync_sub_and_fetch):
-      result = sync_sub_and_fetch(memLoc, x)
-    else:
-      result = sync_add_and_fetch(memLoc, -x)
-  else:
-    dec(memLoc, x)
-    result = memLoc  
-
-
-# atomic compare and swap (CAS) funcitons to implement lock-free algorithms
-
-when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport:
-  proc compareAndSwap*[T: ptr|ref|pointer](mem: var T, expected: T, newValue: T): bool {.nodecl, 
-      importc: " __sync_bool_compare_and_swap".}
-    ## Returns true if successfully set value at mem to newValue when value
-    ## at mem == expected
-      
-elif defined(windows) and hasThreadSupport:
-    proc InterlockedCompareExchangePointer(mem: ptr pointer,
-      newValue: pointer, comparand: pointer) : pointer {.nodecl, 
-        importc: "InterlockedCompareExchangePointer", header:"windows.h".}
-
-
-    proc compareAndSwap*[T: ptr|ref|pointer](mem: var T, 
-      expected: T, newValue: T): bool {.inline.}=
-      ## Returns true if successfully set value at mem to newValue when value
-      ## at mem == expected
-      return InterlockedCompareExchangePointer(addr(mem), 
-        newValue, expected) == expected
-    
-elif not hasThreadSupport:
-  proc compareAndSwap*[T: ptr|ref|pointer](mem: var T, 
-    expected: T, newValue: T): bool {.inline.} =
-      ## Returns true if successfully set value at mem to newValue when value
-      ## at mem == expected
-      var oldval = mem
-      if oldval == expected:
-        mem = newValue
-        return true
-      return false
-
-
diff --git a/lib/system/avltree.nim b/lib/system/avltree.nim
index 6a268b453..8d4b7e897 100644
--- a/lib/system/avltree.nim
+++ b/lib/system/avltree.nim
@@ -1,6 +1,6 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2012 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
@@ -9,30 +9,30 @@
 
 # not really an AVL tree anymore, but still balanced ...
 
-template IsBottom(n: PAvlNode): bool = n == bottom
+template isBottom(n: PAvlNode): bool = n.link[0] == n
 
 proc lowGauge(n: PAvlNode): int =
   var it = n
-  while not IsBottom(it):
+  while not isBottom(it):
     result = it.key
     it = it.link[0]
-  
+
 proc highGauge(n: PAvlNode): int =
   result = -1
   var it = n
-  while not IsBottom(it):
+  while not isBottom(it):
     result = it.upperBound
     it = it.link[1]
 
-proc find(root: PAvlNode, key: int): PAvlNode = 
+proc find(root: PAvlNode, key: int): PAvlNode =
   var it = root
-  while not IsBottom(it):
+  while not isBottom(it):
     if it.key == key: return it
     it = it.link[ord(it.key <% key)]
 
 proc inRange(root: PAvlNode, key: int): PAvlNode =
   var it = root
-  while not IsBottom(it):
+  while not isBottom(it):
     if it.key <=% key and key <% it.upperBound: return it
     it = it.link[ord(it.key <% key)]
 
@@ -51,31 +51,37 @@ proc split(t: var PAvlNode) =
     t.link[0] = temp
     inc t.level
 
-proc add(a: var TMemRegion, t: var PAvlNode, key, upperBound: int) =
-  if t == bottom:
+proc add(a: var MemRegion, t: var PAvlNode, key, upperBound: int) {.benign.} =
+  if t.isBottom:
     t = allocAvlNode(a, key, upperBound)
   else:
     if key <% t.key:
+      when defined(avlcorruption):
+        if t.link[0] == nil:
+          cprintf("bug here %p\n", t)
       add(a, t.link[0], key, upperBound)
     elif key >% t.key:
+      when defined(avlcorruption):
+        if t.link[1] == nil:
+          cprintf("bug here B %p\n", t)
       add(a, t.link[1], key, upperBound)
     else:
       sysAssert false, "key already exists"
     skew(t)
     split(t)
 
-proc del(a: var TMemRegion, t: var PAvlNode, x: int) =
-  if t == bottom: return
+proc del(a: var MemRegion, t: var PAvlNode, x: int) {.benign.} =
+  if isBottom(t): return
   a.last = t
   if x <% t.key:
     del(a, t.link[0], x)
   else:
     a.deleted = t
     del(a, t.link[1], x)
-  if t == a.last and a.deleted != bottom and x == a.deleted.key:
+  if t == a.last and not isBottom(a.deleted) and x == a.deleted.key:
     a.deleted.key = t.key
     a.deleted.upperBound = t.upperBound
-    a.deleted = bottom
+    a.deleted = getBottom(a)
     t = t.link[1]
     deallocAvlNode(a, a.last)
   elif t.link[0].level < t.level-1 or
diff --git a/lib/system/basic_types.nim b/lib/system/basic_types.nim
new file mode 100644
index 000000000..bf81b9b6a
--- /dev/null
+++ b/lib/system/basic_types.nim
@@ -0,0 +1,94 @@
+type
+  int* {.magic: Int.}         ## Default integer type; bitwidth depends on
+                              ## architecture, but is always the same as a pointer.
+  int8* {.magic: Int8.}       ## Signed 8 bit integer type.
+  int16* {.magic: Int16.}     ## Signed 16 bit integer type.
+  int32* {.magic: Int32.}     ## Signed 32 bit integer type.
+  int64* {.magic: Int64.}     ## Signed 64 bit integer type.
+  uint* {.magic: UInt.}       ## Unsigned default integer type.
+  uint8* {.magic: UInt8.}     ## Unsigned 8 bit integer type.
+  uint16* {.magic: UInt16.}   ## Unsigned 16 bit integer type.
+  uint32* {.magic: UInt32.}   ## Unsigned 32 bit integer type.
+  uint64* {.magic: UInt64.}   ## Unsigned 64 bit integer type.
+
+type
+  float* {.magic: Float.}     ## Default floating point type.
+  float32* {.magic: Float32.} ## 32 bit floating point type.
+  float64* {.magic: Float.}   ## 64 bit floating point type.
+
+# 'float64' is now an alias to 'float'; this solves many problems
+
+type
+  char* {.magic: Char.}         ## Built-in 8 bit character type (unsigned).
+  string* {.magic: String.}     ## Built-in string type.
+  cstring* {.magic: Cstring.}   ## Built-in cstring (*compatible string*) type.
+  pointer* {.magic: Pointer.}   ## Built-in pointer type, use the `addr`
+                                ## operator to get a pointer to a variable.
+
+  typedesc* {.magic: TypeDesc.} ## Meta type to denote a type description.
+
+type
+  `ptr`*[T] {.magic: Pointer.}   ## Built-in generic untraced pointer type.
+  `ref`*[T] {.magic: Pointer.}   ## Built-in generic traced pointer type.
+
+  `nil` {.magic: "Nil".}
+
+  void* {.magic: "VoidType".}    ## Meta type to denote the absence of any type.
+  auto* {.magic: Expr.}          ## Meta type for automatic type determination.
+  any* {.deprecated: "Deprecated since v1.5; Use auto instead.".} = distinct auto  ## Deprecated; Use `auto` instead. See https://github.com/nim-lang/RFCs/issues/281
+  untyped* {.magic: Expr.}       ## Meta type to denote an expression that
+                                 ## is not resolved (for templates).
+  typed* {.magic: Stmt.}         ## Meta type to denote an expression that
+                                 ## is resolved (for templates).
+
+type # we need to start a new type section here, so that ``0`` can have a type
+  bool* {.magic: "Bool".} = enum ## Built-in boolean type.
+    false = 0, true = 1
+
+const
+  on* = true    ## Alias for `true`.
+  off* = false  ## Alias for `false`.
+
+type
+  SomeSignedInt* = int|int8|int16|int32|int64
+    ## Type class matching all signed integer types.
+
+  SomeUnsignedInt* = uint|uint8|uint16|uint32|uint64
+    ## Type class matching all unsigned integer types.
+
+  SomeInteger* = SomeSignedInt|SomeUnsignedInt
+    ## Type class matching all integer types.
+
+  SomeFloat* = float|float32|float64
+    ## Type class matching all floating point number types.
+
+  SomeNumber* = SomeInteger|SomeFloat
+    ## Type class matching all number types.
+
+  SomeOrdinal* = int|int8|int16|int32|int64|bool|enum|uint|uint8|uint16|uint32|uint64
+    ## Type class matching all ordinal types; however this includes enums with
+    ## holes. See also `Ordinal`
+
+
+{.push warning[GcMem]: off, warning[Uninit]: off.}
+{.push hints: off.}
+
+proc `not`*(x: bool): bool {.magic: "Not", noSideEffect.}
+  ## Boolean not; returns true if `x == false`.
+
+proc `and`*(x, y: bool): bool {.magic: "And", noSideEffect.}
+  ## Boolean `and`; returns true if `x == y == true` (if both arguments
+  ## are true).
+  ##
+  ## Evaluation is lazy: if `x` is false, `y` will not even be evaluated.
+proc `or`*(x, y: bool): bool {.magic: "Or", noSideEffect.}
+  ## Boolean `or`; returns true if `not (not x and not y)` (if any of
+  ## the arguments is true).
+  ##
+  ## Evaluation is lazy: if `x` is true, `y` will not even be evaluated.
+proc `xor`*(x, y: bool): bool {.magic: "Xor", noSideEffect.}
+  ## Boolean `exclusive or`; returns true if `x != y` (if either argument
+  ## is true while the other is false).
+
+{.pop.}
+{.pop.}
diff --git a/lib/system/bitmasks.nim b/lib/system/bitmasks.nim
new file mode 100644
index 000000000..0663247c2
--- /dev/null
+++ b/lib/system/bitmasks.nim
@@ -0,0 +1,39 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Page size of the system; in most cases 4096 bytes. For exotic OS or
+# CPU this needs to be changed:
+const
+  PageShift = when defined(nimPage256) or defined(cpu16): 3
+              elif defined(nimPage512): 9
+              elif defined(nimPage1k): 10
+              else: 12 # \ # my tests showed no improvements for using larger page sizes.
+
+  PageSize = 1 shl PageShift
+  PageMask = PageSize-1
+
+
+  MemAlign = # also minimal allocatable memory block
+    when defined(nimMemAlignTiny): 4
+    elif defined(useMalloc):
+      when defined(amd64): 16 
+      else: 8
+    else: 16
+
+  BitsPerPage = PageSize div MemAlign
+  UnitsPerPage = BitsPerPage div (sizeof(int)*8)
+    # how many ints do we need to describe a page:
+    # on 32 bit systems this is only 16 (!)
+
+  TrunkShift = 9
+  BitsPerTrunk = 1 shl TrunkShift # needs to be power of 2 and divisible by 64
+  TrunkMask = BitsPerTrunk - 1
+  IntsPerTrunk = BitsPerTrunk div (sizeof(int)*8)
+  IntShift = 5 + ord(sizeof(int) == 8) # 5 or 6, depending on int width
+  IntMask = 1 shl IntShift - 1
diff --git a/lib/system/cellseqs_v1.nim b/lib/system/cellseqs_v1.nim
new file mode 100644
index 000000000..1a305aa42
--- /dev/null
+++ b/lib/system/cellseqs_v1.nim
@@ -0,0 +1,46 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2019 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# ------------------- cell seq handling ---------------------------------------
+
+type
+  PCellArray = ptr UncheckedArray[PCell]
+  CellSeq {.final, pure.} = object
+    len, cap: int
+    d: PCellArray
+
+proc contains(s: CellSeq, c: PCell): bool {.inline.} =
+  for i in 0 ..< s.len:
+    if s.d[i] == c:
+      return true
+  return false
+
+proc resize(s: var CellSeq) =
+  s.cap = s.cap * 3 div 2
+  let d = cast[PCellArray](alloc(s.cap * sizeof(PCell)))
+  copyMem(d, s.d, s.len * sizeof(PCell))
+  dealloc(s.d)
+  s.d = d
+
+proc add(s: var CellSeq, c: PCell) {.inline.} =
+  if s.len >= s.cap:
+    resize(s)
+  s.d[s.len] = c
+  inc(s.len)
+
+proc init(s: var CellSeq, cap: int = 1024) =
+  s.len = 0
+  s.cap = cap
+  s.d = cast[PCellArray](alloc0(cap * sizeof(PCell)))
+
+proc deinit(s: var CellSeq) =
+  dealloc(s.d)
+  s.d = nil
+  s.len = 0
+  s.cap = 0
diff --git a/lib/system/cellseqs_v2.nim b/lib/system/cellseqs_v2.nim
new file mode 100644
index 000000000..c6c7b1a8e
--- /dev/null
+++ b/lib/system/cellseqs_v2.nim
@@ -0,0 +1,53 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2019 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Cell seqs for cyclebreaker and cyclicrefs_v2.
+
+type
+  CellTuple[T] = (T, PNimTypeV2)
+  CellArray[T] = ptr UncheckedArray[CellTuple[T]]
+  CellSeq[T] = object
+    len, cap: int
+    d: CellArray[T]
+
+proc resize[T](s: var CellSeq[T]) =
+  s.cap = s.cap * 3 div 2
+  var newSize = s.cap * sizeof(CellTuple[T])
+  when compileOption("threads"):
+    s.d = cast[CellArray[T]](reallocShared(s.d, newSize))
+  else:
+    s.d = cast[CellArray[T]](realloc(s.d, newSize))
+
+proc add[T](s: var CellSeq[T], c: T, t: PNimTypeV2) {.inline.} =
+  if s.len >= s.cap:
+    s.resize()
+  s.d[s.len] = (c, t)
+  inc(s.len)
+
+proc init[T](s: var CellSeq[T], cap: int = 1024) =
+  s.len = 0
+  s.cap = cap
+  when compileOption("threads"):
+    s.d = cast[CellArray[T]](allocShared(uint(s.cap * sizeof(CellTuple[T]))))
+  else:
+    s.d = cast[CellArray[T]](alloc(s.cap * sizeof(CellTuple[T])))
+
+proc deinit[T](s: var CellSeq[T]) =
+  if s.d != nil:
+    when compileOption("threads"):
+      deallocShared(s.d)
+    else:
+      dealloc(s.d)
+    s.d = nil
+  s.len = 0
+  s.cap = 0
+
+proc pop[T](s: var CellSeq[T]): (T, PNimTypeV2) =
+  result = s.d[s.len-1]
+  dec s.len
diff --git a/lib/system/cellsets.nim b/lib/system/cellsets.nim
index 7ad814da4..92036c226 100755..100644
--- a/lib/system/cellsets.nim
+++ b/lib/system/cellsets.nim
@@ -1,92 +1,107 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2013 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
 #
 
-# Efficient set of pointers for the GC (and repr)
 
-type
-  TRefCount = int
+#[
+
+Efficient set of pointers for the GC (and repr)
+-----------------------------------------------
+
+The GC depends on an extremely efficient datastructure for storing a
+set of pointers - this is called a `CellSet` in the source code.
+Inserting, deleting and searching are done in constant time. However,
+modifying a `CellSet` during traversal leads to undefined behaviour.
+
+All operations on a CellSet have to perform efficiently. Because a Cellset can
+become huge a hash table alone is not suitable for this.
+
+We use a mixture of bitset and hash table for this. The hash table maps *pages*
+to a page descriptor. The page descriptor contains a bit for any possible cell
+address within this page. So including a cell is done as follows:
+
+- Find the page descriptor for the page the cell belongs to.
+- Set the appropriate bit in the page descriptor indicating that the
+  cell points to the start of a memory block.
+
+Removing a cell is analogous - the bit has to be set to zero.
+Single page descriptors are never deleted from the hash table. This is not
+needed as the data structures needs to be rebuilt periodically anyway.
+
+Complete traversal is done in this way::
+
+  for each page descriptor d:
+    for each bit in d:
+      if bit == 1:
+        traverse the pointer belonging to this bit
+
+]#
 
-  TCell {.pure.} = object
-    refcount: TRefCount  # the refcount and some flags
-    typ: PNimType
-    when trackAllocationSource:
-      filename: cstring
-      line: int
+when defined(gcOrc) or defined(gcArc) or defined(gcAtomicArc):
+  type
+    PCell = Cell
 
-  PCell = ptr TCell
+  when not declaredInScope(PageShift):
+    include bitmasks
 
-  PPageDesc = ptr TPageDesc
-  TBitIndex = range[0..UnitsPerPage-1]
-  TPageDesc {.final, pure.} = object
+else:
+  type
+    RefCount = int
+
+    Cell {.pure.} = object
+      refcount: RefCount  # the refcount and some flags
+      typ: PNimType
+      when trackAllocationSource:
+        filename: cstring
+        line: int
+      when useCellIds:
+        id: int
+
+    PCell = ptr Cell
+
+type
+  PPageDesc = ptr PageDesc
+  BitIndex = range[0..UnitsPerPage-1]
+  PageDesc {.final, pure.} = object
     next: PPageDesc # all nodes are connected with this pointer
-    key: TAddress   # start address at bit 0
-    bits: array[TBitIndex, int] # a bit vector
+    key: uint   # start address at bit 0
+    bits: array[BitIndex, int] # a bit vector
 
-  PPageDescArray = ptr array[0..1000_000, PPageDesc]
-  TCellSet {.final, pure.} = object
+  PPageDescArray = ptr UncheckedArray[PPageDesc]
+  CellSet {.final, pure.} = object
     counter, max: int
     head: PPageDesc
     data: PPageDescArray
 
-  PCellArray = ptr array[0..100_000_000, PCell]
-  TCellSeq {.final, pure.} = object
-    len, cap: int
-    d: PCellArray
-
-# ------------------- cell seq handling ---------------------------------------
-
-proc contains(s: TCellSeq, c: PCell): bool {.inline.} =
-  for i in 0 .. s.len-1:
-    if s.d[i] == c: return True
-  return False
-
-proc add(s: var TCellSeq, c: PCell) {.inline.} =
-  if s.len >= s.cap:
-    s.cap = s.cap * 3 div 2
-    var d = cast[PCellArray](Alloc(s.cap * sizeof(PCell)))
-    copyMem(d, s.d, s.len * sizeof(PCell))
-    Dealloc(s.d)
-    s.d = d
-    # XXX: realloc?
-  s.d[s.len] = c
-  inc(s.len)
-
-proc init(s: var TCellSeq, cap: int = 1024) =
-  s.len = 0
-  s.cap = cap
-  s.d = cast[PCellArray](Alloc0(cap * sizeof(PCell)))
-
-proc deinit(s: var TCellSeq) = 
-  Dealloc(s.d)
-  s.d = nil
-  s.len = 0
-  s.cap = 0
+when defined(gcOrc) or defined(gcArc) or defined(gcAtomicArc):
+  discard
+else:
+  include cellseqs_v1
 
 # ------------------- cell set handling ---------------------------------------
 
 const
   InitCellSetSize = 1024 # must be a power of two!
 
-proc Init(s: var TCellSet) =
-  s.data = cast[PPageDescArray](Alloc0(InitCellSetSize * sizeof(PPageDesc)))
+proc init(s: var CellSet) =
+  s.data = cast[PPageDescArray](alloc0(InitCellSetSize * sizeof(PPageDesc)))
   s.max = InitCellSetSize-1
   s.counter = 0
   s.head = nil
 
-proc Deinit(s: var TCellSet) =
+proc deinit(s: var CellSet) =
   var it = s.head
   while it != nil:
     var n = it.next
-    Dealloc(it)
+    dealloc(it)
     it = n
   s.head = nil # play it safe here
-  Dealloc(s.data)
+  dealloc(s.data)
   s.data = nil
   s.counter = 0
 
@@ -95,15 +110,15 @@ proc nextTry(h, maxHash: int): int {.inline.} =
   # For any initial h in range(maxHash), repeating that maxHash times
   # generates each int in range(maxHash) exactly once (see any text on
   # random-number generation for proof).
-  
-proc CellSetGet(t: TCellSet, key: TAddress): PPageDesc =
+
+proc cellSetGet(t: CellSet, key: uint): PPageDesc =
   var h = cast[int](key) and t.max
   while t.data[h] != nil:
     if t.data[h].key == key: return t.data[h]
     h = nextTry(h, t.max)
   return nil
 
-proc CellSetRawInsert(t: TCellSet, data: PPageDescArray, desc: PPageDesc) =
+proc cellSetRawInsert(t: CellSet, data: PPageDescArray, desc: PPageDesc) =
   var h = cast[int](desc.key) and t.max
   while data[h] != nil:
     sysAssert(data[h] != desc, "CellSetRawInsert 1")
@@ -111,17 +126,17 @@ proc CellSetRawInsert(t: TCellSet, data: PPageDescArray, desc: PPageDesc) =
   sysAssert(data[h] == nil, "CellSetRawInsert 2")
   data[h] = desc
 
-proc CellSetEnlarge(t: var TCellSet) =
+proc cellSetEnlarge(t: var CellSet) =
   var oldMax = t.max
   t.max = ((t.max+1)*2)-1
-  var n = cast[PPageDescArray](Alloc0((t.max + 1) * sizeof(PPageDesc)))
-  for i in 0 .. oldmax:
+  var n = cast[PPageDescArray](alloc0((t.max + 1) * sizeof(PPageDesc)))
+  for i in 0 .. oldMax:
     if t.data[i] != nil:
-      CellSetRawInsert(t, n, t.data[i])
-  Dealloc(t.data)
+      cellSetRawInsert(t, n, t.data[i])
+  dealloc(t.data)
   t.data = n
 
-proc CellSetPut(t: var TCellSet, key: TAddress): PPageDesc =
+proc cellSetPut(t: var CellSet, key: uint): PPageDesc =
   var h = cast[int](key) and t.max
   while true:
     var x = t.data[h]
@@ -130,13 +145,13 @@ proc CellSetPut(t: var TCellSet, key: TAddress): PPageDesc =
     h = nextTry(h, t.max)
 
   if ((t.max+1)*2 < t.counter*3) or ((t.max+1)-t.counter < 4):
-    CellSetEnlarge(t)
+    cellSetEnlarge(t)
   inc(t.counter)
   h = cast[int](key) and t.max
   while t.data[h] != nil: h = nextTry(h, t.max)
   sysAssert(t.data[h] == nil, "CellSetPut")
   # the new page descriptor goes into result
-  result = cast[PPageDesc](Alloc0(sizeof(TPageDesc)))
+  result = cast[PPageDesc](alloc0(sizeof(PageDesc)))
   result.next = t.head
   result.key = key
   t.head = result
@@ -144,74 +159,109 @@ proc CellSetPut(t: var TCellSet, key: TAddress): PPageDesc =
 
 # ---------- slightly higher level procs --------------------------------------
 
-proc contains(s: TCellSet, cell: PCell): bool =
-  var u = cast[TAddress](cell)
-  var t = CellSetGet(s, u shr PageShift)
+proc contains(s: CellSet, cell: PCell): bool =
+  var u = cast[uint](cell)
+  var t = cellSetGet(s, u shr PageShift)
   if t != nil:
-    u = (u %% PageSize) /% MemAlign
+    u = (u mod PageSize) div MemAlign
     result = (t.bits[u shr IntShift] and (1 shl (u and IntMask))) != 0
   else:
     result = false
 
-proc incl(s: var TCellSet, cell: PCell) {.noinline.} =
-  var u = cast[TAddress](cell)
-  var t = CellSetPut(s, u shr PageShift)
-  u = (u %% PageSize) /% MemAlign
+proc incl(s: var CellSet, cell: PCell) =
+  var u = cast[uint](cell)
+  var t = cellSetPut(s, u shr PageShift)
+  u = (u mod PageSize) div MemAlign
   t.bits[u shr IntShift] = t.bits[u shr IntShift] or (1 shl (u and IntMask))
 
-proc excl(s: var TCellSet, cell: PCell) =
-  var u = cast[TAddress](cell)
-  var t = CellSetGet(s, u shr PageShift)
+proc excl(s: var CellSet, cell: PCell) =
+  var u = cast[uint](cell)
+  var t = cellSetGet(s, u shr PageShift)
   if t != nil:
-    u = (u %% PageSize) /% MemAlign
+    u = (u mod PageSize) div MemAlign
     t.bits[u shr IntShift] = (t.bits[u shr IntShift] and
                               not (1 shl (u and IntMask)))
 
-proc containsOrIncl(s: var TCellSet, cell: PCell): bool = 
-  var u = cast[TAddress](cell)
-  var t = CellSetGet(s, u shr PageShift)
+proc containsOrIncl(s: var CellSet, cell: PCell): bool =
+  var u = cast[uint](cell)
+  var t = cellSetGet(s, u shr PageShift)
   if t != nil:
-    u = (u %% PageSize) /% MemAlign
+    u = (u mod PageSize) div MemAlign
     result = (t.bits[u shr IntShift] and (1 shl (u and IntMask))) != 0
-    if not result: 
+    if not result:
       t.bits[u shr IntShift] = t.bits[u shr IntShift] or
           (1 shl (u and IntMask))
-  else: 
-    Incl(s, cell)
+  else:
+    incl(s, cell)
     result = false
 
-iterator elements(t: TCellSet): PCell {.inline.} =
+iterator elements(t: CellSet): PCell {.inline.} =
   # while traversing it is forbidden to add pointers to the tree!
   var r = t.head
   while r != nil:
-    var i = 0
-    while i <= high(r.bits):
+    var i: uint = 0
+    while int(i) <= high(r.bits):
       var w = r.bits[i] # taking a copy of r.bits[i] here is correct, because
       # modifying operations are not allowed during traversation
-      var j = 0
+      var j: uint = 0
       while w != 0:         # test all remaining bits for zero
         if (w and 1) != 0:  # the bit is set!
           yield cast[PCell]((r.key shl PageShift) or
-                              (i shl IntShift +% j) *% MemAlign)
+                              (i shl IntShift + j) * MemAlign)
         inc(j)
         w = w shr 1
       inc(i)
     r = r.next
 
-iterator elementsExcept(t, s: TCellSet): PCell {.inline.} =
+when false:
+  type
+    CellSetIter = object
+      p: PPageDesc
+      i, w, j: int
+
+  proc next(it: var CellSetIter): PCell =
+    while true:
+      while it.w != 0:         # test all remaining bits for zero
+        if (it.w and 1) != 0:  # the bit is set!
+          result = cast[PCell]((it.p.key shl PageShift) or
+                               (it.i shl IntShift +% it.j) *% MemAlign)
+
+          inc(it.j)
+          it.w = it.w shr 1
+          return
+        else:
+          inc(it.j)
+          it.w = it.w shr 1
+      # load next w:
+      if it.i >= high(it.p.bits):
+        it.i = 0
+        it.j = 0
+        it.p = it.p.next
+        if it.p == nil: return nil
+      else:
+        inc it.i
+      it.w = it.p.bits[i]
+
+  proc init(it: var CellSetIter; t: CellSet): PCell =
+    it.p = t.head
+    it.i = -1
+    it.w = 0
+    result = it.next
+
+iterator elementsExcept(t, s: CellSet): PCell {.inline.} =
   var r = t.head
   while r != nil:
-    let ss = CellSetGet(s, r.key)
-    var i = 0
-    while i <= high(r.bits):
+    let ss = cellSetGet(s, r.key)
+    var i:uint = 0
+    while int(i) <= high(r.bits):
       var w = r.bits[i]
       if ss != nil:
         w = w and not ss.bits[i]
-      var j = 0
+      var j:uint = 0
       while w != 0:
         if (w and 1) != 0:
           yield cast[PCell]((r.key shl PageShift) or
-                              (i shl IntShift +% j) *% MemAlign)
+                              (i shl IntShift + j) * MemAlign)
         inc(j)
         w = w shr 1
       inc(i)
diff --git a/lib/system/cgprocs.nim b/lib/system/cgprocs.nim
index e30cfa469..9a7645f9b 100755..100644
--- a/lib/system/cgprocs.nim
+++ b/lib/system/cgprocs.nim
@@ -1,6 +1,6 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2012 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
@@ -8,18 +8,3 @@
 #
 
 # Headers for procs that the code generator depends on ("compilerprocs")
-
-proc addChar(s: NimString, c: char): NimString {.compilerProc.}
-
-type
-  TLibHandle = pointer       # private type
-  TProcAddr = pointer        # libary loading and loading of procs:
-
-proc nimLoadLibrary(path: string): TLibHandle {.compilerproc.}
-proc nimUnloadLibrary(lib: TLibHandle) {.compilerproc.}
-proc nimGetProcAddr(lib: TLibHandle, name: cstring): TProcAddr {.compilerproc.}
-
-proc nimLoadLibraryError(path: string) {.compilerproc, noinline.}
-
-proc setStackBottom(theStackBottom: pointer) {.compilerRtl, noinline.}
-
diff --git a/lib/system/channels.nim b/lib/system/channels.nim
deleted file mode 100755
index 13d751d80..000000000
--- a/lib/system/channels.nim
+++ /dev/null
@@ -1,247 +0,0 @@
-#
-#
-#            Nimrod's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## Channel support for threads. **Note**: This is part of the system module.
-## Do not import it directly. To activate thread support you need to compile
-## with the ``--threads:on`` command line switch.
-##
-## **Note:** The current implementation of message passing is slow and does
-## not work with cyclic data structures.
-
-type
-  pbytes = ptr array[0.. 0xffff, byte]
-  TRawChannel {.pure, final.} = object ## msg queue for a thread
-    rd, wr, count, mask: int
-    data: pbytes
-    lock: TSysLock
-    cond: TSysCond
-    elemType: PNimType
-    ready: bool
-    region: TMemRegion
-  PRawChannel = ptr TRawChannel
-  TLoadStoreMode = enum mStore, mLoad
-  TChannel*[TMsg] = TRawChannel ## a channel for thread communication
-
-const ChannelDeadMask = -2
-
-proc initRawChannel(p: pointer) =
-  var c = cast[PRawChannel](p)
-  initSysLock(c.lock)
-  initSysCond(c.cond)
-  c.mask = -1
-
-proc deinitRawChannel(p: pointer) =
-  var c = cast[PRawChannel](p)
-  # we need to grab the lock to be safe against sending threads!
-  acquireSys(c.lock)
-  c.mask = ChannelDeadMask
-  deallocOsPages(c.region)
-  deinitSys(c.lock)
-  deinitSysCond(c.cond)
-
-proc storeAux(dest, src: Pointer, mt: PNimType, t: PRawChannel, 
-              mode: TLoadStoreMode)
-proc storeAux(dest, src: Pointer, n: ptr TNimNode, t: PRawChannel,
-              mode: TLoadStoreMode) =
-  var
-    d = cast[TAddress](dest)
-    s = cast[TAddress](src)
-  case n.kind
-  of nkSlot: storeAux(cast[pointer](d +% n.offset), 
-                      cast[pointer](s +% n.offset), n.typ, t, mode)
-  of nkList:
-    for i in 0..n.len-1: storeAux(dest, src, n.sons[i], t, mode)
-  of nkCase:
-    copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset),
-            n.typ.size)
-    var m = selectBranch(src, n)
-    if m != nil: storeAux(dest, src, m, t, mode)
-  of nkNone: sysAssert(false, "storeAux")
-
-proc storeAux(dest, src: Pointer, mt: PNimType, t: PRawChannel, 
-              mode: TLoadStoreMode) =
-  var
-    d = cast[TAddress](dest)
-    s = cast[TAddress](src)
-  sysAssert(mt != nil, "mt == nil")
-  case mt.Kind
-  of tyString:
-    if mode == mStore:
-      var x = cast[ppointer](dest)
-      var s2 = cast[ppointer](s)[]
-      if s2 == nil: 
-        x[] = nil
-      else:
-        var ss = cast[NimString](s2)
-        var ns = cast[NimString](Alloc(t.region, ss.len+1 + GenericSeqSize))
-        copyMem(ns, ss, ss.len+1 + GenericSeqSize)
-        x[] = ns
-    else:
-      var x = cast[ppointer](dest)
-      var s2 = cast[ppointer](s)[]
-      if s2 == nil:
-        unsureAsgnRef(x, s2)
-      else:
-        unsureAsgnRef(x, copyString(cast[NimString](s2)))
-        Dealloc(t.region, s2)
-  of tySequence:
-    var s2 = cast[ppointer](src)[]
-    var seq = cast[PGenericSeq](s2)
-    var x = cast[ppointer](dest)
-    if s2 == nil:
-      if mode == mStore:
-        x[] = nil
-      else:
-        unsureAsgnRef(x, nil)
-    else:
-      sysAssert(dest != nil, "dest == nil")
-      if mode == mStore:
-        x[] = Alloc(t.region, seq.len *% mt.base.size +% GenericSeqSize)
-      else:
-        unsureAsgnRef(x, newObj(mt, seq.len * mt.base.size + GenericSeqSize))
-      var dst = cast[taddress](cast[ppointer](dest)[])
-      for i in 0..seq.len-1:
-        storeAux(
-          cast[pointer](dst +% i*% mt.base.size +% GenericSeqSize),
-          cast[pointer](cast[TAddress](s2) +% i *% mt.base.size +%
-                        GenericSeqSize),
-          mt.Base, t, mode)
-      var dstseq = cast[PGenericSeq](dst)
-      dstseq.len = seq.len
-      dstseq.reserved = seq.len
-      if mode != mStore: Dealloc(t.region, s2)
-  of tyObject:
-    # copy type field:
-    var pint = cast[ptr PNimType](dest)
-    # XXX use dynamic type here!
-    pint[] = mt
-    storeAux(dest, src, mt.node, t, mode)
-  of tyTuple:
-    storeAux(dest, src, mt.node, t, mode)
-  of tyArray, tyArrayConstr:
-    for i in 0..(mt.size div mt.base.size)-1:
-      storeAux(cast[pointer](d +% i*% mt.base.size),
-               cast[pointer](s +% i*% mt.base.size), mt.base, t, mode)
-  of tyRef:
-    var s = cast[ppointer](src)[]
-    var x = cast[ppointer](dest)
-    if s == nil:
-      if mode == mStore:
-        x[] = nil
-      else:
-        unsureAsgnRef(x, nil)
-    else:
-      if mode == mStore:
-        x[] = Alloc(t.region, mt.base.size)
-      else:
-        # XXX we should use the dynamic type here too, but that is not stored
-        # in the inbox at all --> use source[]'s object type? but how? we need
-        # a tyRef to the object!
-        var obj = newObj(mt.base, mt.base.size)
-        unsureAsgnRef(x, obj)
-      storeAux(x[], s, mt.base, t, mode)
-      if mode != mStore: Dealloc(t.region, s)
-  else:
-    copyMem(dest, src, mt.size) # copy raw bits
-
-proc rawSend(q: PRawChannel, data: pointer, typ: PNimType) =
-  ## adds an `item` to the end of the queue `q`.
-  var cap = q.mask+1
-  if q.count >= cap:
-    # start with capacity for 2 entries in the queue:
-    if cap == 0: cap = 1
-    var n = cast[pbytes](Alloc0(q.region, cap*2*typ.size))
-    var z = 0
-    var i = q.rd
-    var c = q.count
-    while c > 0:
-      dec c
-      copyMem(addr(n[z*typ.size]), addr(q.data[i*typ.size]), typ.size)
-      i = (i + 1) and q.mask
-      inc z
-    if q.data != nil: Dealloc(q.region, q.data)
-    q.data = n
-    q.mask = cap*2 - 1
-    q.wr = q.count
-    q.rd = 0
-  storeAux(addr(q.data[q.wr * typ.size]), data, typ, q, mStore)
-  inc q.count
-  q.wr = (q.wr + 1) and q.mask
-
-proc rawRecv(q: PRawChannel, data: pointer, typ: PNimType) =
-  sysAssert q.count > 0, "rawRecv"
-  dec q.count
-  storeAux(data, addr(q.data[q.rd * typ.size]), typ, q, mLoad)
-  q.rd = (q.rd + 1) and q.mask
-
-template lockChannel(q: expr, action: stmt) {.immediate.} =
-  acquireSys(q.lock)
-  action
-  releaseSys(q.lock)
-
-template sendImpl(q: expr) {.immediate.} =  
-  if q.mask == ChannelDeadMask:
-    raise newException(EDeadThread, "cannot send message; thread died")
-  acquireSys(q.lock)
-  var m: TMsg
-  shallowCopy(m, msg)
-  var typ = cast[PNimType](getTypeInfo(msg))
-  rawSend(q, addr(m), typ)
-  q.elemType = typ
-  releaseSys(q.lock)
-  SignalSysCond(q.cond)
-
-proc send*[TMsg](c: var TChannel[TMsg], msg: TMsg) =
-  ## sends a message to a thread. `msg` is deeply copied.
-  var q = cast[PRawChannel](addr(c))
-  sendImpl(q)
-
-proc llRecv(q: PRawChannel, res: pointer, typ: PNimType) =
-  # to save space, the generic is as small as possible
-  acquireSys(q.lock)
-  q.ready = true
-  while q.count <= 0:
-    WaitSysCond(q.cond, q.lock)
-  q.ready = false
-  if typ != q.elemType:
-    releaseSys(q.lock)
-    raise newException(EInvalidValue, "cannot receive message of wrong type")
-  rawRecv(q, res, typ)
-  releaseSys(q.lock)
-
-proc recv*[TMsg](c: var TChannel[TMsg]): TMsg =
-  ## receives a message from the channel `c`. This blocks until
-  ## a message has arrived! You may use ``peek`` to avoid the blocking.
-  var q = cast[PRawChannel](addr(c))
-  llRecv(q, addr(result), cast[PNimType](getTypeInfo(result)))
-
-proc peek*[TMsg](c: var TChannel[TMsg]): int =
-  ## returns the current number of messages in the channel `c`. Returns -1
-  ## if the channel has been closed.
-  var q = cast[PRawChannel](addr(c))
-  if q.mask != ChannelDeadMask:
-    lockChannel(q):
-      result = q.count
-  else:
-    result = -1
-
-proc open*[TMsg](c: var TChannel[TMsg]) =
-  ## opens a channel `c` for inter thread communication.
-  initRawChannel(addr(c))
-
-proc close*[TMsg](c: var TChannel[TMsg]) =
-  ## closes a channel `c` and frees its associated resources.
-  deinitRawChannel(addr(c))
-
-proc ready*[TMsg](c: var TChannel[TMsg]): bool =
-  ## returns true iff some thread is waiting on the channel `c` for
-  ## new messages.
-  var q = cast[PRawChannel](addr(c))
-  result = q.ready
-
diff --git a/lib/system/channels_builtin.nim b/lib/system/channels_builtin.nim
new file mode 100644
index 000000000..02b4d8cbf
--- /dev/null
+++ b/lib/system/channels_builtin.nim
@@ -0,0 +1,459 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Channel support for threads.
+##
+## **Note**: This is part of the system module. Do not import it directly.
+## To activate thread support compile with the `--threads:on` command line switch.
+##
+## **Note:** Channels are designed for the `Thread` type. They are unstable when
+## used with `spawn`
+##
+## **Note:** The current implementation of message passing does
+## not work with cyclic data structures.
+##
+## **Note:** Channels cannot be passed between threads. Use globals or pass
+## them by `ptr`.
+##
+## Example
+## =======
+## The following is a simple example of two different ways to use channels:
+## blocking and non-blocking.
+##
+##   ```Nim
+##   # Be sure to compile with --threads:on.
+##   # The channels and threads modules are part of system and should not be
+##   # imported.
+##   import std/os
+##
+##   # Channels can either be:
+##   #  - declared at the module level, or
+##   #  - passed to procedures by ptr (raw pointer) -- see note on safety.
+##   #
+##   # For simplicity, in this example a channel is declared at module scope.
+##   # Channels are generic, and they include support for passing objects between
+##   # threads.
+##   # Note that objects passed through channels will be deeply copied.
+##   var chan: Channel[string]
+##
+##   # This proc will be run in another thread using the threads module.
+##   proc firstWorker() =
+##     chan.send("Hello World!")
+##
+##   # This is another proc to run in a background thread. This proc takes a while
+##   # to send the message since it sleeps for 2 seconds (or 2000 milliseconds).
+##   proc secondWorker() =
+##     sleep(2000)
+##     chan.send("Another message")
+##
+##   # Initialize the channel.
+##   chan.open()
+##
+##   # Launch the worker.
+##   var worker1: Thread[void]
+##   createThread(worker1, firstWorker)
+##
+##   # Block until the message arrives, then print it out.
+##   echo chan.recv() # "Hello World!"
+##
+##   # Wait for the thread to exit before moving on to the next example.
+##   worker1.joinThread()
+##
+##   # Launch the other worker.
+##   var worker2: Thread[void]
+##   createThread(worker2, secondWorker)
+##   # This time, use a non-blocking approach with tryRecv.
+##   # Since the main thread is not blocked, it could be used to perform other
+##   # useful work while it waits for data to arrive on the channel.
+##   while true:
+##     let tried = chan.tryRecv()
+##     if tried.dataAvailable:
+##       echo tried.msg # "Another message"
+##       break
+##
+##     echo "Pretend I'm doing useful work..."
+##     # For this example, sleep in order not to flood stdout with the above
+##     # message.
+##     sleep(400)
+##
+##   # Wait for the second thread to exit before cleaning up the channel.
+##   worker2.joinThread()
+##
+##   # Clean up the channel.
+##   chan.close()
+##   ```
+##
+## Sample output
+## -------------
+## The program should output something similar to this, but keep in mind that
+## exact results may vary in the real world:
+##
+##     Hello World!
+##     Pretend I'm doing useful work...
+##     Pretend I'm doing useful work...
+##     Pretend I'm doing useful work...
+##     Pretend I'm doing useful work...
+##     Pretend I'm doing useful work...
+##     Another message
+##
+## Passing Channels Safely
+## -----------------------
+## Note that when passing objects to procedures on another thread by pointer
+## (for example through a thread's argument), objects created using the default
+## allocator will use thread-local, GC-managed memory. Thus it is generally
+## safer to store channel objects in global variables (as in the above example),
+## in which case they will use a process-wide (thread-safe) shared heap.
+##
+## However, it is possible to manually allocate shared memory for channels
+## using e.g. `system.allocShared0` and pass these pointers through thread
+## arguments:
+##
+##   ```Nim
+##   proc worker(channel: ptr Channel[string]) =
+##     let greeting = channel[].recv()
+##     echo greeting
+##
+##   proc localChannelExample() =
+##     # Use allocShared0 to allocate some shared-heap memory and zero it.
+##     # The usual warnings about dealing with raw pointers apply. Exercise caution.
+##     var channel = cast[ptr Channel[string]](
+##       allocShared0(sizeof(Channel[string]))
+##     )
+##     channel[].open()
+##     # Create a thread which will receive the channel as an argument.
+##     var thread: Thread[ptr Channel[string]]
+##     createThread(thread, worker, channel)
+##     channel[].send("Hello from the main thread!")
+##     # Clean up resources.
+##     thread.joinThread()
+##     channel[].close()
+##     deallocShared(channel)
+##
+##   localChannelExample() # "Hello from the main thread!"
+##   ```
+
+when not declared(ThisIsSystem):
+  {.error: "You must not import this module explicitly".}
+
+import std/private/syslocks
+
+type
+  pbytes = ptr UncheckedArray[byte]
+  RawChannel {.pure, final.} = object ## msg queue for a thread
+    rd, wr, count, mask, maxItems: int
+    data: pbytes
+    lock: SysLock
+    cond: SysCond
+    elemType: PNimType
+    ready: bool
+    when not usesDestructors:
+      region: MemRegion
+  PRawChannel = ptr RawChannel
+  LoadStoreMode = enum mStore, mLoad
+  Channel*[TMsg] {.gcsafe.} = RawChannel ## a channel for thread communication
+
+const ChannelDeadMask = -2
+
+proc initRawChannel(p: pointer, maxItems: int) =
+  var c = cast[PRawChannel](p)
+  initSysLock(c.lock)
+  initSysCond(c.cond)
+  c.mask = -1
+  c.maxItems = maxItems
+
+proc deinitRawChannel(p: pointer) =
+  var c = cast[PRawChannel](p)
+  # we need to grab the lock to be safe against sending threads!
+  acquireSys(c.lock)
+  c.mask = ChannelDeadMask
+  when not usesDestructors:
+    deallocOsPages(c.region)
+  else:
+    if c.data != nil: deallocShared(c.data)
+  deinitSys(c.lock)
+  deinitSysCond(c.cond)
+
+when not usesDestructors:
+
+  proc storeAux(dest, src: pointer, mt: PNimType, t: PRawChannel,
+                mode: LoadStoreMode) {.benign.}
+
+  proc storeAux(dest, src: pointer, n: ptr TNimNode, t: PRawChannel,
+                mode: LoadStoreMode) {.benign.} =
+    var
+      d = cast[int](dest)
+      s = cast[int](src)
+    case n.kind
+    of nkSlot: storeAux(cast[pointer](d +% n.offset),
+                        cast[pointer](s +% n.offset), n.typ, t, mode)
+    of nkList:
+      for i in 0..n.len-1: storeAux(dest, src, n.sons[i], t, mode)
+    of nkCase:
+      copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset),
+              n.typ.size)
+      var m = selectBranch(src, n)
+      if m != nil: storeAux(dest, src, m, t, mode)
+    of nkNone: sysAssert(false, "storeAux")
+
+  proc storeAux(dest, src: pointer, mt: PNimType, t: PRawChannel,
+                mode: LoadStoreMode) =
+    template `+!`(p: pointer; x: int): pointer =
+      cast[pointer](cast[int](p) +% x)
+
+    var
+      d = cast[int](dest)
+      s = cast[int](src)
+    sysAssert(mt != nil, "mt == nil")
+    case mt.kind
+    of tyString:
+      if mode == mStore:
+        var x = cast[PPointer](dest)
+        var s2 = cast[PPointer](s)[]
+        if s2 == nil:
+          x[] = nil
+        else:
+          var ss = cast[NimString](s2)
+          var ns = cast[NimString](alloc(t.region, GenericSeqSize + ss.len+1))
+          copyMem(ns, ss, ss.len+1 + GenericSeqSize)
+          x[] = ns
+      else:
+        var x = cast[PPointer](dest)
+        var s2 = cast[PPointer](s)[]
+        if s2 == nil:
+          unsureAsgnRef(x, s2)
+        else:
+          let y = copyDeepString(cast[NimString](s2))
+          #echo "loaded ", cast[int](y), " ", cast[string](y)
+          unsureAsgnRef(x, y)
+          dealloc(t.region, s2)
+    of tySequence:
+      var s2 = cast[PPointer](src)[]
+      var seq = cast[PGenericSeq](s2)
+      var x = cast[PPointer](dest)
+      if s2 == nil:
+        if mode == mStore:
+          x[] = nil
+        else:
+          unsureAsgnRef(x, nil)
+      else:
+        sysAssert(dest != nil, "dest == nil")
+        if mode == mStore:
+          x[] = alloc0(t.region, align(GenericSeqSize, mt.base.align) +% seq.len *% mt.base.size)
+        else:
+          unsureAsgnRef(x, newSeq(mt, seq.len))
+        var dst = cast[int](cast[PPointer](dest)[])
+        var dstseq = cast[PGenericSeq](dst)
+        dstseq.len = seq.len
+        dstseq.reserved = seq.len
+        for i in 0..seq.len-1:
+          storeAux(
+            cast[pointer](dst +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size),
+            cast[pointer](cast[int](s2) +% align(GenericSeqSize, mt.base.align) +%
+                          i *% mt.base.size),
+            mt.base, t, mode)
+        if mode != mStore: dealloc(t.region, s2)
+    of tyObject:
+      if mt.base != nil:
+        storeAux(dest, src, mt.base, t, mode)
+      else:
+        # copy type field:
+        var pint = cast[ptr PNimType](dest)
+        pint[] = cast[ptr PNimType](src)[]
+      storeAux(dest, src, mt.node, t, mode)
+    of tyTuple:
+      storeAux(dest, src, mt.node, t, mode)
+    of tyArray, tyArrayConstr:
+      for i in 0..(mt.size div mt.base.size)-1:
+        storeAux(cast[pointer](d +% i *% mt.base.size),
+                cast[pointer](s +% i *% mt.base.size), mt.base, t, mode)
+    of tyRef:
+      var s = cast[PPointer](src)[]
+      var x = cast[PPointer](dest)
+      if s == nil:
+        if mode == mStore:
+          x[] = nil
+        else:
+          unsureAsgnRef(x, nil)
+      else:
+        #let size = if mt.base.kind == tyObject: cast[ptr PNimType](s)[].size
+        #           else: mt.base.size
+        if mode == mStore:
+          let dyntype = when declared(usrToCell): usrToCell(s).typ
+                        else: mt
+          let size = dyntype.base.size
+          # we store the real dynamic 'ref type' at offset 0, so that
+          # no information is lost
+          let a = alloc0(t.region, size+sizeof(pointer))
+          x[] = a
+          cast[PPointer](a)[] = dyntype
+          storeAux(a +! sizeof(pointer), s, dyntype.base, t, mode)
+        else:
+          let dyntype = cast[ptr PNimType](s)[]
+          var obj = newObj(dyntype, dyntype.base.size)
+          unsureAsgnRef(x, obj)
+          storeAux(x[], s +! sizeof(pointer), dyntype.base, t, mode)
+          dealloc(t.region, s)
+    else:
+      copyMem(dest, src, mt.size) # copy raw bits
+
+proc rawSend(q: PRawChannel, data: pointer, typ: PNimType) =
+  ## Adds an `item` to the end of the queue `q`.
+  var cap = q.mask+1
+  if q.count >= cap:
+    # start with capacity for 2 entries in the queue:
+    if cap == 0: cap = 1
+    when not usesDestructors:
+      var n = cast[pbytes](alloc0(q.region, cap*2*typ.size))
+    else:
+      var n = cast[pbytes](allocShared0(cap*2*typ.size))
+    var z = 0
+    var i = q.rd
+    var c = q.count
+    while c > 0:
+      dec c
+      copyMem(addr(n[z*typ.size]), addr(q.data[i*typ.size]), typ.size)
+      i = (i + 1) and q.mask
+      inc z
+    if q.data != nil:
+      when not usesDestructors:
+        dealloc(q.region, q.data)
+      else:
+        deallocShared(q.data)
+    q.data = n
+    q.mask = cap*2 - 1
+    q.wr = q.count
+    q.rd = 0
+  when not usesDestructors:
+    storeAux(addr(q.data[q.wr * typ.size]), data, typ, q, mStore)
+  else:
+    copyMem(addr(q.data[q.wr * typ.size]), data, typ.size)
+  inc q.count
+  q.wr = (q.wr + 1) and q.mask
+
+proc rawRecv(q: PRawChannel, data: pointer, typ: PNimType) =
+  sysAssert q.count > 0, "rawRecv"
+  dec q.count
+  when not usesDestructors:
+    storeAux(data, addr(q.data[q.rd * typ.size]), typ, q, mLoad)
+  else:
+    copyMem(data, addr(q.data[q.rd * typ.size]), typ.size)
+  q.rd = (q.rd + 1) and q.mask
+
+template lockChannel(q, action): untyped =
+  acquireSys(q.lock)
+  action
+  releaseSys(q.lock)
+
+proc sendImpl(q: PRawChannel, typ: PNimType, msg: pointer, noBlock: bool): bool =
+  if q.mask == ChannelDeadMask:
+    sysFatal(DeadThreadDefect, "cannot send message; thread died")
+  acquireSys(q.lock)
+  if q.maxItems > 0:
+    # Wait until count is less than maxItems
+    if noBlock and q.count >= q.maxItems:
+      releaseSys(q.lock)
+      return
+
+    while q.count >= q.maxItems:
+      waitSysCond(q.cond, q.lock)
+
+  rawSend(q, msg, typ)
+  q.elemType = typ
+  signalSysCond(q.cond)
+  releaseSys(q.lock)
+  result = true
+
+proc send*[TMsg](c: var Channel[TMsg], msg: sink TMsg) {.inline.} =
+  ## Sends a message to a thread. `msg` is deeply copied.
+  discard sendImpl(cast[PRawChannel](addr c), cast[PNimType](getTypeInfo(msg)), unsafeAddr(msg), false)
+  when defined(gcDestructors):
+    wasMoved(msg)
+
+proc trySend*[TMsg](c: var Channel[TMsg], msg: sink TMsg): bool {.inline.} =
+  ## Tries to send a message to a thread.
+  ##
+  ## `msg` is deeply copied. Doesn't block.
+  ##
+  ## Returns `false` if the message was not sent because number of pending items
+  ## in the channel exceeded `maxItems`.
+  result = sendImpl(cast[PRawChannel](addr c), cast[PNimType](getTypeInfo(msg)), unsafeAddr(msg), true)
+  when defined(gcDestructors):
+    if result:
+      wasMoved(msg)
+
+proc llRecv(q: PRawChannel, res: pointer, typ: PNimType) =
+  q.ready = true
+  while q.count <= 0:
+    waitSysCond(q.cond, q.lock)
+  q.ready = false
+  if typ != q.elemType:
+    releaseSys(q.lock)
+    raise newException(ValueError, "cannot receive message of wrong type")
+  rawRecv(q, res, typ)
+  if q.maxItems > 0 and q.count == q.maxItems - 1:
+    # Parent thread is awaiting in send. Wake it up.
+    signalSysCond(q.cond)
+
+proc recv*[TMsg](c: var Channel[TMsg]): TMsg =
+  ## Receives a message from the channel `c`.
+  ##
+  ## This blocks until a message has arrived!
+  ## You may use `peek proc <#peek,Channel[TMsg]>`_ to avoid the blocking.
+  var q = cast[PRawChannel](addr(c))
+  acquireSys(q.lock)
+  llRecv(q, addr(result), cast[PNimType](getTypeInfo(result)))
+  releaseSys(q.lock)
+
+proc tryRecv*[TMsg](c: var Channel[TMsg]): tuple[dataAvailable: bool,
+                                                  msg: TMsg] =
+  ## Tries to receive a message from the channel `c`, but this can fail
+  ## for all sort of reasons, including contention.
+  ##
+  ## If it fails, it returns `(false, default(msg))` otherwise it
+  ## returns `(true, msg)`.
+  var q = cast[PRawChannel](addr(c))
+  if q.mask != ChannelDeadMask:
+    if tryAcquireSys(q.lock):
+      if q.count > 0:
+        llRecv(q, addr(result.msg), cast[PNimType](getTypeInfo(result.msg)))
+        result.dataAvailable = true
+      releaseSys(q.lock)
+
+proc peek*[TMsg](c: var Channel[TMsg]): int =
+  ## Returns the current number of messages in the channel `c`.
+  ##
+  ## Returns -1 if the channel has been closed.
+  ##
+  ## **Note**: This is dangerous to use as it encourages races.
+  ## It's much better to use `tryRecv proc <#tryRecv,Channel[TMsg]>`_ instead.
+  var q = cast[PRawChannel](addr(c))
+  if q.mask != ChannelDeadMask:
+    lockChannel(q):
+      result = q.count
+  else:
+    result = -1
+
+proc open*[TMsg](c: var Channel[TMsg], maxItems: int = 0) =
+  ## Opens a channel `c` for inter thread communication.
+  ##
+  ## The `send` operation will block until number of unprocessed items is
+  ## less than `maxItems`.
+  ##
+  ## For unlimited queue set `maxItems` to 0.
+  initRawChannel(addr(c), maxItems)
+
+proc close*[TMsg](c: var Channel[TMsg]) =
+  ## Closes a channel `c` and frees its associated resources.
+  deinitRawChannel(addr(c))
+
+proc ready*[TMsg](c: var Channel[TMsg]): bool =
+  ## Returns true if some thread is waiting on the channel `c` for
+  ## new messages.
+  var q = cast[PRawChannel](addr(c))
+  result = q.ready
diff --git a/lib/system/chcks.nim b/lib/system/chcks.nim
new file mode 100644
index 000000000..b48855964
--- /dev/null
+++ b/lib/system/chcks.nim
@@ -0,0 +1,161 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2013 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Implementation of some runtime checks.
+include system/indexerrors
+when defined(nimPreviewSlimSystem):
+  import std/formatfloat
+
+proc raiseRangeError(val: BiggestInt) {.compilerproc, noinline.} =
+  when hostOS == "standalone":
+    sysFatal(RangeDefect, "value out of range")
+  else:
+    sysFatal(RangeDefect, "value out of range: ", $val)
+
+proc raiseIndexError4(l1, h1, h2: int) {.compilerproc, noinline.} =
+  sysFatal(IndexDefect, "index out of bounds: " & $l1 & ".." & $h1 & " notin 0.." & $(h2 - 1))
+
+proc raiseIndexError3(i, a, b: int) {.compilerproc, noinline.} =
+  sysFatal(IndexDefect, formatErrorIndexBound(i, a, b))
+
+proc raiseIndexError2(i, n: int) {.compilerproc, noinline.} =
+  sysFatal(IndexDefect, formatErrorIndexBound(i, n))
+
+proc raiseIndexError() {.compilerproc, noinline.} =
+  sysFatal(IndexDefect, "index out of bounds")
+
+proc raiseFieldError(f: string) {.compilerproc, noinline.} =
+  ## remove after bootstrap > 1.5.1
+  sysFatal(FieldDefect, f)
+
+when defined(nimV2):
+  proc raiseFieldError2(f: string, discVal: int) {.compilerproc, noinline.} =
+    ## raised when field is inaccessible given runtime value of discriminant
+    sysFatal(FieldDefect, f & $discVal & "'")
+
+  proc raiseFieldErrorStr(f: string, discVal: string) {.compilerproc, noinline.} =
+    ## raised when field is inaccessible given runtime value of discriminant
+    sysFatal(FieldDefect, formatFieldDefect(f, discVal))
+else:
+  proc raiseFieldError2(f: string, discVal: string) {.compilerproc, noinline.} =
+    ## raised when field is inaccessible given runtime value of discriminant
+    sysFatal(FieldDefect, formatFieldDefect(f, discVal))
+
+proc raiseRangeErrorI(i, a, b: BiggestInt) {.compilerproc, noinline.} =
+  when defined(standalone):
+    sysFatal(RangeDefect, "value out of range")
+  else:
+    sysFatal(RangeDefect, "value out of range: " & $i & " notin " & $a & " .. " & $b)
+
+proc raiseRangeErrorF(i, a, b: float) {.compilerproc, noinline.} =
+  when defined(standalone):
+    sysFatal(RangeDefect, "value out of range")
+  else:
+    sysFatal(RangeDefect, "value out of range: " & $i & " notin " & $a & " .. " & $b)
+
+proc raiseRangeErrorU(i, a, b: uint64) {.compilerproc, noinline.} =
+  # todo: better error reporting
+  sysFatal(RangeDefect, "value out of range")
+
+proc raiseRangeErrorNoArgs() {.compilerproc, noinline.} =
+  sysFatal(RangeDefect, "value out of range")
+
+proc raiseObjectConversionError() {.compilerproc, noinline.} =
+  sysFatal(ObjectConversionDefect, "invalid object conversion")
+
+proc chckIndx(i, a, b: int): int =
+  if i >= a and i <= b:
+    return i
+  else:
+    raiseIndexError3(i, a, b)
+
+proc chckRange(i, a, b: int): int =
+  if i >= a and i <= b:
+    return i
+  else:
+    raiseRangeError(i)
+
+proc chckRange64(i, a, b: int64): int64 {.compilerproc.} =
+  if i >= a and i <= b:
+    return i
+  else:
+    raiseRangeError(i)
+
+proc chckRangeU(i, a, b: uint64): uint64 {.compilerproc.} =
+  if i >= a and i <= b:
+    return i
+  else:
+    sysFatal(RangeDefect, "value out of range")
+
+proc chckRangeF(x, a, b: float): float =
+  if x >= a and x <= b:
+    return x
+  else:
+    when hostOS == "standalone":
+      sysFatal(RangeDefect, "value out of range")
+    else:
+      sysFatal(RangeDefect, "value out of range: ", $x)
+
+proc chckNil(p: pointer) =
+  if p == nil:
+    sysFatal(NilAccessDefect, "attempt to write to a nil address")
+
+proc chckNilDisp(p: pointer) {.compilerproc.} =
+  if p == nil:
+    sysFatal(NilAccessDefect, "cannot dispatch; dispatcher is nil")
+
+when not defined(nimV2):
+
+  proc chckObj(obj, subclass: PNimType) {.compilerproc.} =
+    # checks if obj is of type subclass:
+    var x = obj
+    if x == subclass: return # optimized fast path
+    while x != subclass:
+      if x == nil:
+        sysFatal(ObjectConversionDefect, "invalid object conversion")
+      x = x.base
+
+  proc chckObjAsgn(a, b: PNimType) {.compilerproc, inline.} =
+    if a != b:
+      sysFatal(ObjectAssignmentDefect, "invalid object assignment")
+
+  type ObjCheckCache = array[0..1, PNimType]
+
+  proc isObjSlowPath(obj, subclass: PNimType;
+                    cache: var ObjCheckCache): bool {.noinline.} =
+    # checks if obj is of type subclass:
+    var x = obj.base
+    while x != subclass:
+      if x == nil:
+        cache[0] = obj
+        return false
+      x = x.base
+    cache[1] = obj
+    return true
+
+  proc isObjWithCache(obj, subclass: PNimType;
+                      cache: var ObjCheckCache): bool {.compilerproc, inline.} =
+    if obj == subclass: return true
+    if obj.base == subclass: return true
+    if cache[0] == obj: return false
+    if cache[1] == obj: return true
+    return isObjSlowPath(obj, subclass, cache)
+
+  proc isObj(obj, subclass: PNimType): bool {.compilerproc.} =
+    # checks if obj is of type subclass:
+    var x = obj
+    if x == subclass: return true # optimized fast path
+    while x != subclass:
+      if x == nil: return false
+      x = x.base
+    return true
+
+when defined(nimV2):
+  proc raiseObjectCaseTransition() {.compilerproc.} =
+    sysFatal(FieldDefect, "assignment to discriminant changes object branch")
diff --git a/lib/system/comparisons.nim b/lib/system/comparisons.nim
new file mode 100644
index 000000000..a8d78bb93
--- /dev/null
+++ b/lib/system/comparisons.nim
@@ -0,0 +1,337 @@
+# comparison operators:
+proc `==`*[Enum: enum](x, y: Enum): bool {.magic: "EqEnum", noSideEffect.} =
+  ## Checks whether values within the *same enum* have the same underlying value.
+  runnableExamples:
+    type
+      Enum1 = enum
+        field1 = 3, field2
+      Enum2 = enum
+        place1, place2 = 3
+    var
+      e1 = field1
+      e2 = place2.ord.Enum1
+    assert e1 == e2
+    assert not compiles(e1 == place2) # raises error
+proc `==`*(x, y: pointer): bool {.magic: "EqRef", noSideEffect.} =
+  ## Checks for equality between two `pointer` variables.
+  runnableExamples:
+    var # this is a wildly dangerous example
+      a = cast[pointer](0)
+      b = cast[pointer](nil)
+    assert a == b # true due to the special meaning of `nil`/0 as a pointer
+proc `==`*(x, y: string): bool {.magic: "EqStr", noSideEffect.}
+  ## Checks for equality between two `string` variables.
+
+proc `==`*(x, y: char): bool {.magic: "EqCh", noSideEffect.}
+  ## Checks for equality between two `char` variables.
+proc `==`*(x, y: bool): bool {.magic: "EqB", noSideEffect.}
+  ## Checks for equality between two `bool` variables.
+proc `==`*[T](x, y: set[T]): bool {.magic: "EqSet", noSideEffect.} =
+  ## Checks for equality between two variables of type `set`.
+  runnableExamples:
+    assert {1, 2, 2, 3} == {1, 2, 3} # duplication in sets is ignored
+
+proc `==`*[T](x, y: ref T): bool {.magic: "EqRef", noSideEffect.}
+  ## Checks that two `ref` variables refer to the same item.
+proc `==`*[T](x, y: ptr T): bool {.magic: "EqRef", noSideEffect.}
+  ## Checks that two `ptr` variables refer to the same item.
+proc `==`*[T: proc | iterator](x, y: T): bool {.magic: "EqProc", noSideEffect.}
+  ## Checks that two `proc` variables refer to the same procedure.
+
+proc `<=`*[Enum: enum](x, y: Enum): bool {.magic: "LeEnum", noSideEffect.}
+proc `<=`*(x, y: string): bool {.magic: "LeStr", noSideEffect.} =
+  ## Compares two strings and returns true if `x` is lexicographically
+  ## before `y` (uppercase letters come before lowercase letters).
+  runnableExamples:
+    let
+      a = "abc"
+      b = "abd"
+      c = "ZZZ"
+    assert a <= b
+    assert a <= a
+    assert not (a <= c)
+
+proc `<=`*(x, y: char): bool {.magic: "LeCh", noSideEffect.} =
+  ## Compares two chars and returns true if `x` is lexicographically
+  ## before `y` (uppercase letters come before lowercase letters).
+  runnableExamples:
+    let
+      a = 'a'
+      b = 'b'
+      c = 'Z'
+    assert a <= b
+    assert a <= a
+    assert not (a <= c)
+
+proc `<=`*[T](x, y: set[T]): bool {.magic: "LeSet", noSideEffect.} =
+  ## Returns true if `x` is a subset of `y`.
+  ##
+  ## A subset `x` has all of its members in `y` and `y` doesn't necessarily
+  ## have more members than `x`. That is, `x` can be equal to `y`.
+  runnableExamples:
+    let
+      a = {3, 5}
+      b = {1, 3, 5, 7}
+      c = {2}
+    assert a <= b
+    assert a <= a
+    assert not (a <= c)
+
+proc `<=`*(x, y: bool): bool {.magic: "LeB", noSideEffect.}
+proc `<=`*[T](x, y: ref T): bool {.magic: "LePtr", noSideEffect.}
+proc `<=`*(x, y: pointer): bool {.magic: "LePtr", noSideEffect.}
+
+proc `<`*[Enum: enum](x, y: Enum): bool {.magic: "LtEnum", noSideEffect.}
+proc `<`*(x, y: string): bool {.magic: "LtStr", noSideEffect.} =
+  ## Compares two strings and returns true if `x` is lexicographically
+  ## before `y` (uppercase letters come before lowercase letters).
+  runnableExamples:
+    let
+      a = "abc"
+      b = "abd"
+      c = "ZZZ"
+    assert a < b
+    assert not (a < a)
+    assert not (a < c)
+
+proc `<`*(x, y: char): bool {.magic: "LtCh", noSideEffect.} =
+  ## Compares two chars and returns true if `x` is lexicographically
+  ## before `y` (uppercase letters come before lowercase letters).
+  runnableExamples:
+    let
+      a = 'a'
+      b = 'b'
+      c = 'Z'
+    assert a < b
+    assert not (a < a)
+    assert not (a < c)
+
+proc `<`*[T](x, y: set[T]): bool {.magic: "LtSet", noSideEffect.} =
+  ## Returns true if `x` is a strict or proper subset of `y`.
+  ##
+  ## A strict or proper subset `x` has all of its members in `y` but `y` has
+  ## more elements than `y`.
+  runnableExamples:
+    let
+      a = {3, 5}
+      b = {1, 3, 5, 7}
+      c = {2}
+    assert a < b
+    assert not (a < a)
+    assert not (a < c)
+
+proc `<`*(x, y: bool): bool {.magic: "LtB", noSideEffect.}
+proc `<`*[T](x, y: ref T): bool {.magic: "LtPtr", noSideEffect.}
+proc `<`*[T](x, y: ptr T): bool {.magic: "LtPtr", noSideEffect.}
+proc `<`*(x, y: pointer): bool {.magic: "LtPtr", noSideEffect.}
+
+when not defined(nimHasCallsitePragma):
+  {.pragma: callsite.}
+
+template `!=`*(x, y: untyped): untyped {.callsite.} =
+  ## Unequals operator. This is a shorthand for `not (x == y)`.
+  not (x == y)
+
+template `>=`*(x, y: untyped): untyped {.callsite.} =
+  ## "is greater or equals" operator. This is the same as `y <= x`.
+  y <= x
+
+template `>`*(x, y: untyped): untyped {.callsite.} =
+  ## "is greater" operator. This is the same as `y < x`.
+  y < x
+
+
+proc `==`*(x, y: int): bool {.magic: "EqI", noSideEffect.}
+  ## Compares two integers for equality.
+proc `==`*(x, y: int8): bool {.magic: "EqI", noSideEffect.}
+proc `==`*(x, y: int16): bool {.magic: "EqI", noSideEffect.}
+proc `==`*(x, y: int32): bool {.magic: "EqI", noSideEffect.}
+proc `==`*(x, y: int64): bool {.magic: "EqI", noSideEffect.}
+
+proc `<=`*(x, y: int): bool {.magic: "LeI", noSideEffect.}
+  ## Returns true if `x` is less than or equal to `y`.
+proc `<=`*(x, y: int8): bool {.magic: "LeI", noSideEffect.}
+proc `<=`*(x, y: int16): bool {.magic: "LeI", noSideEffect.}
+proc `<=`*(x, y: int32): bool {.magic: "LeI", noSideEffect.}
+proc `<=`*(x, y: int64): bool {.magic: "LeI", noSideEffect.}
+
+proc `<`*(x, y: int): bool {.magic: "LtI", noSideEffect.}
+  ## Returns true if `x` is less than `y`.
+proc `<`*(x, y: int8): bool {.magic: "LtI", noSideEffect.}
+proc `<`*(x, y: int16): bool {.magic: "LtI", noSideEffect.}
+proc `<`*(x, y: int32): bool {.magic: "LtI", noSideEffect.}
+proc `<`*(x, y: int64): bool {.magic: "LtI", noSideEffect.}
+
+proc `<=`*(x, y: uint): bool {.magic: "LeU", noSideEffect.}
+  ## Returns true if `x <= y`.
+proc `<=`*(x, y: uint8): bool {.magic: "LeU", noSideEffect.}
+proc `<=`*(x, y: uint16): bool {.magic: "LeU", noSideEffect.}
+proc `<=`*(x, y: uint32): bool {.magic: "LeU", noSideEffect.}
+proc `<=`*(x, y: uint64): bool {.magic: "LeU", noSideEffect.}
+
+proc `<`*(x, y: uint): bool {.magic: "LtU", noSideEffect.}
+  ## Returns true if `x < y`.
+proc `<`*(x, y: uint8): bool {.magic: "LtU", noSideEffect.}
+proc `<`*(x, y: uint16): bool {.magic: "LtU", noSideEffect.}
+proc `<`*(x, y: uint32): bool {.magic: "LtU", noSideEffect.}
+proc `<`*(x, y: uint64): bool {.magic: "LtU", noSideEffect.}
+
+proc `<=%`*(x, y: int): bool {.inline.} =
+  ## Treats `x` and `y` as unsigned and compares them.
+  ## Returns true if `unsigned(x) <= unsigned(y)`.
+  cast[uint](x) <= cast[uint](y)
+proc `<=%`*(x, y: int8): bool {.inline.} = cast[uint8](x) <= cast[uint8](y)
+proc `<=%`*(x, y: int16): bool {.inline.} = cast[uint16](x) <= cast[uint16](y)
+proc `<=%`*(x, y: int32): bool {.inline.} = cast[uint32](x) <= cast[uint32](y)
+proc `<=%`*(x, y: int64): bool {.inline.} = cast[uint64](x) <= cast[uint64](y)
+
+proc `<%`*(x, y: int): bool {.inline.} =
+  ## Treats `x` and `y` as unsigned and compares them.
+  ## Returns true if `unsigned(x) < unsigned(y)`.
+  cast[uint](x) < cast[uint](y)
+proc `<%`*(x, y: int8): bool {.inline.} = cast[uint8](x) < cast[uint8](y)
+proc `<%`*(x, y: int16): bool {.inline.} = cast[uint16](x) < cast[uint16](y)
+proc `<%`*(x, y: int32): bool {.inline.} = cast[uint32](x) < cast[uint32](y)
+proc `<%`*(x, y: int64): bool {.inline.} = cast[uint64](x) < cast[uint64](y)
+
+template `>=%`*(x, y: untyped): untyped = y <=% x
+  ## Treats `x` and `y` as unsigned and compares them.
+  ## Returns true if `unsigned(x) >= unsigned(y)`.
+
+template `>%`*(x, y: untyped): untyped = y <% x
+  ## Treats `x` and `y` as unsigned and compares them.
+  ## Returns true if `unsigned(x) > unsigned(y)`.
+
+proc `==`*(x, y: uint): bool {.magic: "EqI", noSideEffect.}
+  ## Compares two unsigned integers for equality.
+proc `==`*(x, y: uint8): bool {.magic: "EqI", noSideEffect.}
+proc `==`*(x, y: uint16): bool {.magic: "EqI", noSideEffect.}
+proc `==`*(x, y: uint32): bool {.magic: "EqI", noSideEffect.}
+proc `==`*(x, y: uint64): bool {.magic: "EqI", noSideEffect.}
+
+proc `<=`*(x, y: float32): bool {.magic: "LeF64", noSideEffect.}
+proc `<=`*(x, y: float): bool {.magic: "LeF64", noSideEffect.}
+
+proc `<`*(x, y: float32): bool {.magic: "LtF64", noSideEffect.}
+proc `<`*(x, y: float): bool {.magic: "LtF64", noSideEffect.}
+
+proc `==`*(x, y: float32): bool {.magic: "EqF64", noSideEffect.}
+proc `==`*(x, y: float): bool {.magic: "EqF64", noSideEffect.}
+
+{.push stackTrace: off.}
+
+proc min*(x, y: int): int {.magic: "MinI", noSideEffect.} =
+  if x <= y: x else: y
+proc min*(x, y: int8): int8 {.magic: "MinI", noSideEffect.} =
+  if x <= y: x else: y
+proc min*(x, y: int16): int16 {.magic: "MinI", noSideEffect.} =
+  if x <= y: x else: y
+proc min*(x, y: int32): int32 {.magic: "MinI", noSideEffect.} =
+  if x <= y: x else: y
+proc min*(x, y: int64): int64 {.magic: "MinI", noSideEffect.} =
+  ## The minimum value of two integers.
+  if x <= y: x else: y
+proc min*(x, y: float32): float32 {.noSideEffect, inline.} =
+  if x <= y or y != y: x else: y
+proc min*(x, y: float64): float64 {.noSideEffect, inline.} =
+  if x <= y or y != y: x else: y
+proc min*[T: not SomeFloat](x, y: T): T {.inline.} =
+  ## Generic minimum operator of 2 values based on `<=`.
+  if x <= y: x else: y
+
+proc max*(x, y: int): int {.magic: "MaxI", noSideEffect.} =
+  if y <= x: x else: y
+proc max*(x, y: int8): int8 {.magic: "MaxI", noSideEffect.} =
+  if y <= x: x else: y
+proc max*(x, y: int16): int16 {.magic: "MaxI", noSideEffect.} =
+  if y <= x: x else: y
+proc max*(x, y: int32): int32 {.magic: "MaxI", noSideEffect.} =
+  if y <= x: x else: y
+proc max*(x, y: int64): int64 {.magic: "MaxI", noSideEffect.} =
+  ## The maximum value of two integers.
+  if y <= x: x else: y
+proc max*(x, y: float32): float32 {.noSideEffect, inline.} =
+  if y <= x or y != y: x else: y
+proc max*(x, y: float64): float64 {.noSideEffect, inline.} =
+  if y <= x or y != y: x else: y
+proc max*[T: not SomeFloat](x, y: T): T {.inline.} =
+  ## Generic maximum operator of 2 values based on `<=`.
+  if y <= x: x else: y
+
+
+proc min*[T](x: openArray[T]): T =
+  ## The minimum value of `x`. `T` needs to have a `<` operator.
+  result = x[0]
+  for i in 1..high(x):
+    if x[i] < result: result = x[i]
+
+proc max*[T](x: openArray[T]): T =
+  ## The maximum value of `x`. `T` needs to have a `<` operator.
+  result = x[0]
+  for i in 1..high(x):
+    if result < x[i]: result = x[i]
+
+{.pop.} # stackTrace: off
+
+
+proc clamp*[T](x, a, b: T): T =
+  ## Limits the value `x` within the interval \[a, b].
+  ## This proc is equivalent to but faster than `max(a, min(b, x))`.
+  ## 
+  ## .. warning:: `a <= b` is assumed and will not be checked (currently).
+  ##
+  ## **See also:**
+  ## `math.clamp` for a version that takes a `Slice[T]` instead.
+  runnableExamples:
+    assert (1.4).clamp(0.0, 1.0) == 1.0
+    assert (0.5).clamp(0.0, 1.0) == 0.5
+    assert 4.clamp(1, 3) == max(1, min(3, 4))
+  if x < a: return a
+  if x > b: return b
+  return x
+
+
+proc `==`*[I, T](x, y: array[I, T]): bool =
+  for f in low(x)..high(x):
+    if x[f] != y[f]:
+      return
+  result = true
+
+proc `==`*[T](x, y: openArray[T]): bool =
+  if x.len != y.len:
+    return false
+  for f in low(x)..high(x):
+    if x[f] != y[f]:
+      return false
+  result = true
+
+
+proc `==`*[T](x, y: seq[T]): bool {.noSideEffect.} =
+  ## Generic equals operator for sequences: relies on a equals operator for
+  ## the element type `T`.
+  when nimvm:
+    if x.len == 0 and y.len == 0:
+      return true
+  else:
+    when not defined(js):
+      proc seqToPtr[T](x: seq[T]): pointer {.inline, noSideEffect.} =
+        when defined(nimSeqsV2):
+          result = cast[NimSeqV2[T]](x).p
+        else:
+          result = cast[pointer](x)
+
+      if seqToPtr(x) == seqToPtr(y):
+        return true
+    else:
+      var sameObject = false
+      {.emit: """`sameObject` = `x` === `y`;""".}
+      if sameObject: return true
+
+  if x.len != y.len:
+    return false
+
+  for i in 0..x.len-1:
+    if x[i] != y[i]:
+      return false
+
+  return true
diff --git a/lib/system/compilation.nim b/lib/system/compilation.nim
new file mode 100644
index 000000000..cdb976ed5
--- /dev/null
+++ b/lib/system/compilation.nim
@@ -0,0 +1,209 @@
+const
+  NimMajor* {.intdefine.}: int = 2
+    ## is the major number of Nim's version. Example:
+    ##   ```nim
+    ##   when (NimMajor, NimMinor, NimPatch) >= (1, 3, 1): discard
+    ##   ```
+    # see also std/private/since
+
+  NimMinor* {.intdefine.}: int = 2
+    ## is the minor number of Nim's version.
+    ## Odd for devel, even for releases.
+
+  NimPatch* {.intdefine.}: int = 1
+    ## is the patch number of Nim's version.
+    ## Odd for devel, even for releases.
+
+{.push profiler: off.}
+let nimvm* {.magic: "Nimvm", compileTime.}: bool = false
+  ## May be used only in `when` expression.
+  ## It is true in Nim VM context and false otherwise.
+{.pop.}
+
+const
+  isMainModule* {.magic: "IsMainModule".}: bool = false
+    ## True only when accessed in the main module. This works thanks to
+    ## compiler magic. It is useful to embed testing code in a module.
+
+  CompileDate* {.magic: "CompileDate".}: string = "0000-00-00"
+    ## The date (in UTC) of compilation as a string of the form
+    ## `YYYY-MM-DD`. This works thanks to compiler magic.
+
+  CompileTime* {.magic: "CompileTime".}: string = "00:00:00"
+    ## The time (in UTC) of compilation as a string of the form
+    ## `HH:MM:SS`. This works thanks to compiler magic.
+
+proc defined*(x: untyped): bool {.magic: "Defined", noSideEffect, compileTime.}
+  ## Special compile-time procedure that checks whether `x` is
+  ## defined.
+  ##
+  ## `x` is an external symbol introduced through the compiler's
+  ## `-d:x switch <nimc.html#compiler-usage-compileminustime-symbols>`_ to enable
+  ## build time conditionals:
+  ##   ```nim
+  ##   when not defined(release):
+  ##     # Do here programmer friendly expensive sanity checks.
+  ##   # Put here the normal code
+  ##   ```
+  ##
+  ## See also:
+  ## * `compileOption <#compileOption,string>`_ for `on|off` options
+  ## * `compileOption <#compileOption,string,string>`_ for enum options
+  ## * `define pragmas <manual.html#implementation-specific-pragmas-compileminustime-define-pragmas>`_
+
+proc declared*(x: untyped): bool {.magic: "Declared", noSideEffect, compileTime.}
+  ## Special compile-time procedure that checks whether `x` is
+  ## declared. `x` has to be an identifier or a qualified identifier.
+  ##
+  ## This can be used to check whether a library provides a certain
+  ## feature or not:
+  ##   ```nim
+  ##   when not declared(strutils.toUpper):
+  ##     # provide our own toUpper proc here, because strutils is
+  ##     # missing it.
+  ##   ```
+  ##
+  ## See also:
+  ## * `declaredInScope <#declaredInScope,untyped>`_
+
+proc declaredInScope*(x: untyped): bool {.magic: "DeclaredInScope", noSideEffect, compileTime.}
+  ## Special compile-time procedure that checks whether `x` is
+  ## declared in the current scope. `x` has to be an identifier.
+
+proc compiles*(x: untyped): bool {.magic: "Compiles", noSideEffect, compileTime.} =
+  ## Special compile-time procedure that checks whether `x` can be compiled
+  ## without any semantic error.
+  ## This can be used to check whether a type supports some operation:
+  ##   ```nim
+  ##   when compiles(3 + 4):
+  ##     echo "'+' for integers is available"
+  ##   ```
+  discard
+
+proc astToStr*[T](x: T): string {.magic: "AstToStr", noSideEffect.}
+  ## Converts the AST of `x` into a string representation. This is very useful
+  ## for debugging.
+
+proc runnableExamples*(rdoccmd = "", body: untyped) {.magic: "RunnableExamples".} =
+  ## A section you should use to mark `runnable example`:idx: code with.
+  ##
+  ## - In normal debug and release builds code within
+  ##   a `runnableExamples` section is ignored.
+  ## - The documentation generator is aware of these examples and considers them
+  ##   part of the `##` doc comment. As the last step of documentation
+  ##   generation each runnableExample is put in its own file `$file_examples$i.nim`,
+  ##   compiled and tested. The collected examples are
+  ##   put into their own module to ensure the examples do not refer to
+  ##   non-exported symbols.
+  runnableExamples:
+    proc timesTwo*(x: int): int =
+      ## This proc doubles a number.
+      runnableExamples:
+        # at module scope
+        const exported* = 123
+        assert timesTwo(5) == 10
+        block: # at block scope
+          defer: echo "done"
+      runnableExamples "-d:foo -b:cpp":
+        import std/compilesettings
+        assert querySetting(backend) == "cpp"
+        assert defined(foo)
+      runnableExamples "-r:off": ## this one is only compiled
+         import std/browsers
+         openDefaultBrowser "https://forum.nim-lang.org/"
+      2 * x
+
+proc compileOption*(option: string): bool {.
+  magic: "CompileOption", noSideEffect.} =
+  ## Can be used to determine an `on|off` compile-time option.
+  ##
+  ## See also:
+  ## * `compileOption <#compileOption,string,string>`_ for enum options
+  ## * `defined <#defined,untyped>`_
+  ## * `std/compilesettings module <compilesettings.html>`_
+  runnableExamples("--floatChecks:off"):
+    static: doAssert not compileOption("floatchecks")
+    {.push floatChecks: on.}
+    static: doAssert compileOption("floatchecks")
+    # floating point NaN and Inf checks enabled in this scope
+    {.pop.}
+
+proc compileOption*(option, arg: string): bool {.
+  magic: "CompileOptionArg", noSideEffect.} =
+  ## Can be used to determine an enum compile-time option.
+  ##
+  ## See also:
+  ## * `compileOption <#compileOption,string>`_ for `on|off` options
+  ## * `defined <#defined,untyped>`_
+  ## * `std/compilesettings module <compilesettings.html>`_
+  runnableExamples:
+    when compileOption("opt", "size") and compileOption("gc", "boehm"):
+      discard "compiled with optimization for size and uses Boehm's GC"
+
+template currentSourcePath*: string = instantiationInfo(-1, true).filename
+  ## Returns the full file-system path of the current source.
+  ##
+  ## To get the directory containing the current source, use it with
+  ## `ospaths2.parentDir() <ospaths2.html#parentDir%2Cstring>`_ as
+  ## `currentSourcePath.parentDir()`.
+  ##
+  ## The path returned by this template is set at compile time.
+  ##
+  ## See the docstring of `macros.getProjectPath() <macros.html#getProjectPath>`_
+  ## for an example to see the distinction between the `currentSourcePath()`
+  ## and `getProjectPath()`.
+  ##
+  ## See also:
+  ## * `ospaths2.getCurrentDir() proc <ospaths2.html#getCurrentDir>`_
+
+proc slurp*(filename: string): string {.magic: "Slurp".}
+  ## This is an alias for `staticRead <#staticRead,string>`_.
+
+proc staticRead*(filename: string): string {.magic: "Slurp".}
+  ## Compile-time `readFile <syncio.html#readFile,string>`_ proc for easy
+  ## `resource`:idx: embedding:
+  ##
+  ## The maximum file size limit that `staticRead` and `slurp` can read is
+  ## near or equal to the *free* memory of the device you are using to compile.
+  ##   ```nim
+  ##   const myResource = staticRead"mydatafile.bin"
+  ##   ```
+  ##
+  ## `slurp <#slurp,string>`_ is an alias for `staticRead`.
+
+proc gorge*(command: string, input = "", cache = ""): string {.
+  magic: "StaticExec".} = discard
+  ## This is an alias for `staticExec <#staticExec,string,string,string>`_.
+
+proc staticExec*(command: string, input = "", cache = ""): string {.
+  magic: "StaticExec".} = discard
+  ## Executes an external process at compile-time and returns its text output
+  ## (stdout + stderr).
+  ##
+  ## If `input` is not an empty string, it will be passed as a standard input
+  ## to the executed program.
+  ##   ```nim
+  ##   const buildInfo = "Revision " & staticExec("git rev-parse HEAD") &
+  ##                     "\nCompiled on " & staticExec("uname -v")
+  ##   ```
+  ##
+  ## `gorge <#gorge,string,string,string>`_ is an alias for `staticExec`.
+  ##
+  ## Note that you can use this proc inside a pragma like
+  ## `passc <manual.html#implementation-specific-pragmas-passc-pragma>`_ or
+  ## `passl <manual.html#implementation-specific-pragmas-passl-pragma>`_.
+  ##
+  ## If `cache` is not empty, the results of `staticExec` are cached within
+  ## the `nimcache` directory. Use `--forceBuild` to get rid of this caching
+  ## behaviour then. `command & input & cache` (the concatenated string) is
+  ## used to determine whether the entry in the cache is still valid. You can
+  ## use versioning information for `cache`:
+  ##   ```nim
+  ##   const stateMachine = staticExec("dfaoptimizer", "input", "0.8.0")
+  ##   ```
+
+proc gorgeEx*(command: string, input = "", cache = ""): tuple[output: string,
+                                                              exitCode: int] =
+  ## Similar to `gorge <#gorge,string,string,string>`_ but also returns the
+  ## precious exit code.
+  discard
diff --git a/lib/system/coro_detection.nim b/lib/system/coro_detection.nim
new file mode 100644
index 000000000..f6c1b5c15
--- /dev/null
+++ b/lib/system/coro_detection.nim
@@ -0,0 +1,20 @@
+## Coroutine detection logic
+
+template coroutinesSupportedPlatform(): bool =
+  when defined(sparc) or defined(ELATE) or defined(boehmgc) or defined(gogc) or
+    defined(nogc) or defined(gcRegions) or defined(gcMarkAndSweep):
+    false
+  else:
+    true
+
+when defined(nimCoroutines):
+  # Explicit opt-in.
+  when not coroutinesSupportedPlatform():
+    {.error: "Coroutines are not supported on this architecture and/or garbage collector.".}
+  const nimCoroutines* = true
+elif defined(noNimCoroutines):
+  # Explicit opt-out.
+  const nimCoroutines* = false
+else:
+  # Autodetect coroutine support.
+  const nimCoroutines* = false
diff --git a/lib/system/countbits_impl.nim b/lib/system/countbits_impl.nim
new file mode 100644
index 000000000..34969cb32
--- /dev/null
+++ b/lib/system/countbits_impl.nim
@@ -0,0 +1,93 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2012 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Contains the used algorithms for counting bits.
+
+from std/private/bitops_utils import forwardImpl, castToUnsigned
+
+const useBuiltins* = not defined(noIntrinsicsBitOpts)
+const noUndefined* = defined(noUndefinedBitOpts)
+const useGCC_builtins* = (defined(gcc) or defined(llvm_gcc) or
+                         defined(clang)) and useBuiltins
+const useICC_builtins* = defined(icc) and useBuiltins
+const useVCC_builtins* = defined(vcc) and useBuiltins
+const arch64* = sizeof(int) == 8
+
+template countBitsImpl(n: uint32): int =
+  # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+  var v = uint32(n)
+  v = v - ((v shr 1'u32) and 0x55555555'u32)
+  v = (v and 0x33333333'u32) + ((v shr 2'u32) and 0x33333333'u32)
+  (((v + (v shr 4'u32) and 0xF0F0F0F'u32) * 0x1010101'u32) shr 24'u32).int
+
+template countBitsImpl(n: uint64): int =
+  # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+  var v = uint64(n)
+  v = v - ((v shr 1'u64) and 0x5555555555555555'u64)
+  v = (v and 0x3333333333333333'u64) + ((v shr 2'u64) and 0x3333333333333333'u64)
+  v = (v + (v shr 4'u64) and 0x0F0F0F0F0F0F0F0F'u64)
+  ((v * 0x0101010101010101'u64) shr 56'u64).int
+
+
+when useGCC_builtins:
+  # Returns the number of set 1-bits in value.
+  proc builtin_popcount(x: cuint): cint {.importc: "__builtin_popcount", cdecl.}
+  proc builtin_popcountll(x: culonglong): cint {.
+      importc: "__builtin_popcountll", cdecl.}
+
+elif useVCC_builtins:
+  # Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer.
+  func builtin_popcnt16(a2: uint16): uint16 {.
+      importc: "__popcnt16", header: "<intrin.h>".}
+  func builtin_popcnt32(a2: uint32): uint32 {.
+      importc: "__popcnt", header: "<intrin.h>".}
+  func builtin_popcnt64(a2: uint64): uint64 {.
+      importc: "__popcnt64", header: "<intrin.h>".}
+
+elif useICC_builtins:
+  # Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm
+  # see also: https://software.intel.com/en-us/node/523362
+  # Count the number of bits set to 1 in an integer a, and return that count in dst.
+  func builtin_popcnt32(a: cint): cint {.
+      importc: "_popcnt", header: "<immintrin.h>".}
+  func builtin_popcnt64(a: uint64): cint {.
+      importc: "_popcnt64", header: "<immintrin.h>".}
+
+
+func countSetBitsImpl*(x: SomeInteger): int {.inline.} =
+  ## Counts the set bits in an integer (also called `Hamming weight`:idx:).
+  # TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT.
+  # like GCC and MSVC
+  let x = x.castToUnsigned
+  when nimvm:
+    result = forwardImpl(countBitsImpl, x)
+  else:
+    when useGCC_builtins:
+      when sizeof(x) <= 4: result = builtin_popcount(x.cuint).int
+      else: result = builtin_popcountll(x.culonglong).int
+    elif useVCC_builtins:
+      when sizeof(x) <= 2: result = builtin_popcnt16(x.uint16).int
+      elif sizeof(x) <= 4: result = builtin_popcnt32(x.uint32).int
+      elif arch64: result = builtin_popcnt64(x.uint64).int
+      else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).uint32).int +
+                     builtin_popcnt32((x.uint64 shr 32'u64).uint32).int
+    elif useICC_builtins:
+      when sizeof(x) <= 4: result = builtin_popcnt32(x.cint).int
+      elif arch64: result = builtin_popcnt64(x.uint64).int
+      else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).cint).int +
+                     builtin_popcnt32((x.uint64 shr 32'u64).cint).int
+    else:
+      when sizeof(x) <= 4: result = countBitsImpl(x.uint32)
+      else: result = countBitsImpl(x.uint64)
+
+proc countBits32*(n: uint32): int {.compilerproc, inline.} =
+  result = countSetBitsImpl(n)
+
+proc countBits64*(n: uint64): int {.compilerproc, inline.} =
+  result = countSetBitsImpl(n)
diff --git a/lib/system/ctypes.nim b/lib/system/ctypes.nim
new file mode 100644
index 000000000..b788274bd
--- /dev/null
+++ b/lib/system/ctypes.nim
@@ -0,0 +1,84 @@
+## Some type definitions for compatibility between different
+## backends and platforms.
+
+type
+  BiggestInt* = int64
+    ## is an alias for the biggest signed integer type the Nim compiler
+    ## supports. Currently this is `int64`, but it is platform-dependent
+    ## in general.
+
+  BiggestFloat* = float64
+    ## is an alias for the biggest floating point type the Nim
+    ## compiler supports. Currently this is `float64`, but it is
+    ## platform-dependent in general.
+
+  BiggestUInt* = uint64
+    ## is an alias for the biggest unsigned integer type the Nim compiler
+    ## supports. Currently this is `uint64`, but it is platform-dependent
+    ## in general.
+
+when defined(windows):
+  type
+    clong* {.importc: "long", nodecl.} = int32
+      ## This is the same as the type `long` in *C*.
+    culong* {.importc: "unsigned long", nodecl.} = uint32
+      ## This is the same as the type `unsigned long` in *C*.
+else:
+  type
+    clong* {.importc: "long", nodecl.} = int
+      ## This is the same as the type `long` in *C*.
+    culong* {.importc: "unsigned long", nodecl.} = uint
+      ## This is the same as the type `unsigned long` in *C*.
+
+type # these work for most platforms:
+  cchar* {.importc: "char", nodecl.} = char
+    ## This is the same as the type `char` in *C*.
+  cschar* {.importc: "signed char", nodecl.} = int8
+    ## This is the same as the type `signed char` in *C*.
+  cshort* {.importc: "short", nodecl.} = int16
+    ## This is the same as the type `short` in *C*.
+  cint* {.importc: "int", nodecl.} = int32
+    ## This is the same as the type `int` in *C*.
+  csize_t* {.importc: "size_t", nodecl.} = uint
+    ## This is the same as the type `size_t` in *C*.
+  clonglong* {.importc: "long long", nodecl.} = int64
+    ## This is the same as the type `long long` in *C*.
+  cfloat* {.importc: "float", nodecl.} = float32
+    ## This is the same as the type `float` in *C*.
+  cdouble* {.importc: "double", nodecl.} = float64
+    ## This is the same as the type `double` in *C*.
+  clongdouble* {.importc: "long double", nodecl.} = BiggestFloat
+    ## This is the same as the type `long double` in *C*.
+    ## This C type is not supported by Nim's code generator.
+
+  cuchar* {.importc: "unsigned char", nodecl, deprecated: "use `char` or `uint8` instead".} = char
+    ## Deprecated: Use `uint8` instead.
+  cushort* {.importc: "unsigned short", nodecl.} = uint16
+    ## This is the same as the type `unsigned short` in *C*.
+  cuint* {.importc: "unsigned int", nodecl.} = uint32
+    ## This is the same as the type `unsigned int` in *C*.
+  culonglong* {.importc: "unsigned long long", nodecl.} = uint64
+    ## This is the same as the type `unsigned long long` in *C*.
+
+type
+  ByteAddress* {.deprecated: "use `uint`".} = int
+    ## is the signed integer type that should be used for converting
+    ## pointers to integer addresses for readability.
+
+  cstringArray* {.importc: "char**", nodecl.} = ptr UncheckedArray[cstring]
+    ## This is binary compatible to the type `char**` in *C*. The array's
+    ## high value is large enough to disable bounds checking in practice.
+    ## Use `cstringArrayToSeq proc <#cstringArrayToSeq,cstringArray,Natural>`_
+    ## to convert it into a `seq[string]`.
+
+when not defined(nimPreviewSlimSystem):
+  # pollutes namespace
+  type
+    PFloat32* {.deprecated: "use `ptr float32`".} = ptr float32
+      ## An alias for `ptr float32`.
+    PFloat64* {.deprecated: "use `ptr float64`".} = ptr float64
+      ## An alias for `ptr float64`.
+    PInt64* {.deprecated: "use `ptr int64`".} = ptr int64
+      ## An alias for `ptr int64`.
+    PInt32* {.deprecated: "use `ptr int32`".} = ptr int32
+      ## An alias for `ptr int32`.
diff --git a/lib/system/cyclebreaker.nim b/lib/system/cyclebreaker.nim
new file mode 100644
index 000000000..45b0a5a65
--- /dev/null
+++ b/lib/system/cyclebreaker.nim
@@ -0,0 +1,184 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2020 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+#[
+A Cycle breaker for Nim
+-----------------------
+
+Instead of "collecting" cycles with all of its pitfalls we will break cycles.
+We exploit that every 'ref' can be 'nil' for this and so get away without
+a distinction between weak and strong pointers. The required runtime
+mechanisms are the same though: We need to be able to traverse the graph.
+This design has the tremendous benefit that it doesn't require a dedicated
+'rawDispose' operation and that it plays well with Nim's cost model.
+The cost of freeing a subgraph with cycles is 2 * N rather than N, that's all.
+
+Cycles do not have to be prepared via .acyclic, there are not multiple
+pointless traversals, only a single proc, `breakCycles` is exposed as a
+separate module.
+
+Algorithm
+---------
+
+We traverse the graph and notice the nodes we've already traversed. If we
+marked the node already, we set the pointer that leads to this node to 'nil'
+and decrement the reference count of the cell we pointed at.
+
+We notice that multiple paths to the same object do not mean
+we found a cycle, it only means the node is shared.
+
+
+   a -------> b <----- c
+   |          ^        ^
+   +----------+        |
+   |                   |
+   +-------------------+
+
+If we simply remove all links to already processed nodes we end up with:
+
+   a -------> b        c
+   |                   ^
+   +                   |
+   |                   |
+   +-------------------+
+
+That seems acceptable, no leak is produced. This implies that the standard
+depth-first traversal suffices.
+
+]#
+
+include cellseqs_v2
+
+const
+  colGreen = 0b000
+  colYellow = 0b001
+  colRed = 0b010
+  colorMask = 0b011
+
+type
+  TraceProc = proc (p, env: pointer) {.nimcall, benign.}
+  DisposeProc = proc (p: pointer) {.nimcall, benign.}
+
+template color(c): untyped = c.rc and colorMask
+template setColor(c, col) =
+  c.rc = c.rc and not colorMask or col
+
+proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} =
+  let h = head(p)
+  inc h.rc, rcIncrement
+
+proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} = discard
+
+type
+  GcEnv = object
+    traceStack: CellSeq[ptr pointer]
+
+proc trace(p: pointer; desc: PNimTypeV2; j: var GcEnv) {.inline.} =
+  when false:
+    cprintf("[Trace] desc: %p %p\n", desc, p)
+    cprintf("[Trace] trace: %p\n", desc.traceImpl)
+  if desc.traceImpl != nil:
+    cast[TraceProc](desc.traceImpl)(p, addr(j))
+
+proc nimTraceRef(q: pointer; desc: PNimTypeV2; env: pointer) {.compilerRtl.} =
+  let p = cast[ptr pointer](q)
+  when traceCollector:
+    cprintf("[Trace] raw: %p\n", p)
+    cprintf("[Trace] deref: %p\n", p[])
+  if p[] != nil:
+    var j = cast[ptr GcEnv](env)
+    j.traceStack.add(p, desc)
+
+proc nimTraceRefDyn(q: pointer; env: pointer) {.compilerRtl.} =
+  let p = cast[ptr pointer](q)
+  when traceCollector:
+    cprintf("[TraceDyn] raw: %p\n", p)
+    cprintf("[TraceDyn] deref: %p\n", p[])
+  if p[] != nil:
+    var j = cast[ptr GcEnv](env)
+    j.traceStack.add(p, cast[ptr PNimTypeV2](p[])[])
+
+var markerGeneration: int
+
+proc breakCycles(s: Cell; desc: PNimTypeV2) =
+  let markerColor = if (markerGeneration and 1) == 0: colRed
+                    else: colYellow
+  atomicInc markerGeneration
+  when traceCollector:
+    cprintf("[BreakCycles] starting: %p %s RC %ld trace proc %p\n",
+      s, desc.name, s.rc shr rcShift, desc.traceImpl)
+
+  var j: GcEnv
+  init j.traceStack
+  s.setColor markerColor
+  trace(s +! sizeof(RefHeader), desc, j)
+
+  while j.traceStack.len > 0:
+    let (u, desc) = j.traceStack.pop()
+    let p = u[]
+    let t = head(p)
+    if t.color != markerColor:
+      t.setColor markerColor
+      trace(p, desc, j)
+      when traceCollector:
+        cprintf("[BreakCycles] followed: %p RC %ld\n", t, t.rc shr rcShift)
+    else:
+      if (t.rc shr rcShift) > 0:
+        dec t.rc, rcIncrement
+        # mark as a link that the produced destructor does not have to follow:
+        u[] = nil
+        when traceCollector:
+          cprintf("[BreakCycles] niled out: %p RC %ld\n", t, t.rc shr rcShift)
+      else:
+        # anyhow as a link that the produced destructor does not have to follow:
+        u[] = nil
+        when traceCollector:
+          cprintf("[Bug] %p %s RC %ld\n", t, desc.name, t.rc shr rcShift)
+  deinit j.traceStack
+
+proc thinout*[T](x: ref T) {.inline.} =
+  ## turn the subgraph starting with `x` into its spanning tree by
+  ## `nil`'ing out any pointers that would harm the spanning tree
+  ## structure. Any back pointers that introduced cycles
+  ## and thus would keep the graph from being freed are `nil`'ed.
+  ## This is a form of cycle collection that works well with Nim's ARC
+  ## and its associated cost model.
+  proc getDynamicTypeInfo[T](x: T): PNimTypeV2 {.magic: "GetTypeInfoV2", noSideEffect.}
+
+  breakCycles(head(cast[pointer](x)), getDynamicTypeInfo(x[]))
+
+proc thinout*[T: proc](x: T) {.inline.} =
+  proc rawEnv[T: proc](x: T): pointer {.noSideEffect, inline.} =
+    {.emit: """
+    `result` = `x`.ClE_0;
+    """.}
+
+  let p = rawEnv(x)
+  breakCycles(head(p), cast[ptr PNimTypeV2](p)[])
+
+proc nimDecRefIsLastCyclicDyn(p: pointer): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+    if (cell.rc and not rcMask) == 0:
+      result = true
+      #cprintf("[DESTROY] %p\n", p)
+    else:
+      dec cell.rc, rcIncrement
+      # According to Lins it's correct to do nothing else here.
+      #cprintf("[DeCREF] %p\n", p)
+
+proc nimDecRefIsLastCyclicStatic(p: pointer; desc: PNimTypeV2): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+    if (cell.rc and not rcMask) == 0:
+      result = true
+      #cprintf("[DESTROY] %p %s\n", p, desc.name)
+    else:
+      dec cell.rc, rcIncrement
+      #cprintf("[DeCREF] %p %s %ld\n", p, desc.name, cell.rc)
diff --git a/lib/system/debugger.nim b/lib/system/debugger.nim
deleted file mode 100755
index eade1707f..000000000
--- a/lib/system/debugger.nim
+++ /dev/null
@@ -1,303 +0,0 @@
-#
-#
-#            Nimrod's Runtime Library
-#        (c) Copyright 2013 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## This file implements basic features for any debugger.
-
-type
-  TVarSlot* {.compilerproc, final.} = object ## a slot in a frame
-    address*: pointer ## the variable's address
-    typ*: PNimType    ## the variable's type
-    name*: cstring    ## the variable's name; for globals this is "module.name"
-
-  PExtendedFrame = ptr TExtendedFrame
-  TExtendedFrame = object  # If the debugger is enabled the compiler
-                           # provides an extended frame. Of course
-                           # only slots that are
-                           # needed are allocated and not 10_000,
-                           # except for the global data description.
-    f: TFrame
-    slots: array[0..10_000, TVarSlot]
-
-var
-  dbgGlobalData: TExtendedFrame # this reserves much space, but
-                                # for now it is the most practical way
-
-proc dbgRegisterGlobal(name: cstring, address: pointer,
-                       typ: PNimType) {.compilerproc.} =
-  let i = dbgGlobalData.f.len
-  if i >= high(dbgGlobalData.slots):
-    #debugOut("[Warning] cannot register global ")
-    return
-  dbgGlobalData.slots[i].name = name
-  dbgGlobalData.slots[i].typ = typ
-  dbgGlobalData.slots[i].address = address
-  inc(dbgGlobalData.f.len)
-
-proc getLocal*(frame: PFrame; slot: int): TVarSlot {.inline.} =
-  ## retrieves the meta data for the local variable at `slot`. CAUTION: An
-  ## invalid `slot` value causes a corruption!
-  result = cast[PExtendedFrame](frame).slots[slot]
-
-proc getGlobalLen*(): int {.inline.} =
-  ## gets the number of registered globals.
-  result = dbgGlobalData.f.len
-
-proc getGlobal*(slot: int): TVarSlot {.inline.} =
-  ## retrieves the meta data for the global variable at `slot`. CAUTION: An
-  ## invalid `slot` value causes a corruption!
-  result = dbgGlobalData.slots[slot]
-
-# ------------------- breakpoint support ------------------------------------
-
-type
-  TBreakpoint* = object  ## represents a break point
-    low*, high*: int     ## range from low to high; if disabled
-                         ## both low and high are set to their negative values
-    filename*: cstring   ## the filename of the breakpoint
-
-var
-  dbgBP: array[0..127, TBreakpoint] # breakpoints
-  dbgBPlen: int
-  dbgBPbloom: int64  # we use a bloom filter to speed up breakpoint checking
-  
-  dbgFilenames*: array[0..300, cstring] ## registered filenames;
-                                        ## 'nil' terminated
-  dbgFilenameLen: int
-
-proc dbgRegisterFilename(filename: cstring) {.compilerproc.} =
-  # XXX we could check for duplicates here for DLL support
-  dbgFilenames[dbgFilenameLen] = filename
-  inc dbgFilenameLen
-
-proc dbgRegisterBreakpoint(line: int,
-                           filename, name: cstring) {.compilerproc.} =
-  let x = dbgBPlen
-  if x >= high(dbgBP):
-    #debugOut("[Warning] cannot register breakpoint")
-    return
-  inc(dbgBPlen)
-  dbgBP[x].filename = filename
-  dbgBP[x].low = line
-  dbgBP[x].high = line
-  dbgBPbloom = dbgBPbloom or line
-
-proc addBreakpoint*(filename: cstring, lo, hi: int): bool =
-  let x = dbgBPlen
-  if x >= high(dbgBP): return false
-  inc(dbgBPlen)
-  result = true
-  dbgBP[x].filename = filename
-  dbgBP[x].low = lo
-  dbgBP[x].high = hi
-  for line in lo..hi: dbgBPbloom = dbgBPbloom or line
-
-const
-  FileSystemCaseInsensitive = defined(windows) or defined(dos) or defined(os2)
-
-proc fileMatches(c, bp: cstring): bool =
-  # bp = breakpoint filename
-  # c = current filename
-  # we consider it a match if bp is a suffix of c
-  # and the character for the suffix does not exist or
-  # is one of: \  /  :
-  # depending on the OS case does not matter!
-  var blen: int = c_strlen(bp)
-  var clen: int = c_strlen(c)
-  if blen > clen: return false
-  # check for \ /  :
-  if clen-blen-1 >= 0 and c[clen-blen-1] notin {'\\', '/', ':'}:
-    return false
-  var i = 0
-  while i < blen:
-    var x = bp[i]
-    var y = c[i+clen-blen]
-    when FileSystemCaseInsensitive:
-      if x >= 'A' and x <= 'Z': x = chr(ord(x) - ord('A') + ord('a'))
-      if y >= 'A' and y <= 'Z': y = chr(ord(y) - ord('A') + ord('a'))
-    if x != y: return false
-    inc(i)
-  return true
-
-proc canonFilename*(filename: cstring): cstring =
-  ## returns 'nil' if the filename cannot be found.
-  for i in 0 .. <dbgFilenameLen:
-    result = dbgFilenames[i]
-    if fileMatches(result, filename): return result
-  result = nil
-
-iterator listBreakpoints*(): ptr TBreakpoint =
-  ## lists all breakpoints.
-  for i in 0..dbgBPlen-1: yield addr(dbgBP[i])
-
-proc isActive*(b: ptr TBreakpoint): bool = b.low > 0
-proc flip*(b: ptr TBreakpoint) =
-  ## enables or disables 'b' depending on its current state.
-  b.low = -b.low; b.high = -b.high
-
-proc checkBreakpoints*(filename: cstring, line: int): ptr TBreakpoint =
-  ## in which breakpoint (if any) we are.
-  if (dbgBPbloom and line) != line: return nil
-  for b in listBreakpoints():
-    if line >= b.low and line <= b.high and filename == b.filename: return b
-
-# ------------------- watchpoint support ------------------------------------
-
-type
-  THash = int
-  TWatchpoint {.pure, final.} = object
-    name: cstring
-    address: pointer
-    typ: PNimType
-    oldValue: THash
-
-var
-  Watchpoints: array [0..99, TWatchpoint]
-  WatchpointsLen: int
-
-proc `!&`(h: THash, val: int): THash {.inline.} =
-  result = h +% val
-  result = result +% result shl 10
-  result = result xor (result shr 6)
-
-proc `!$`(h: THash): THash {.inline.} =
-  result = h +% h shl 3
-  result = result xor (result shr 11)
-  result = result +% result shl 15
-
-proc hash(Data: Pointer, Size: int): THash =
-  var h: THash = 0
-  var p = cast[cstring](Data)
-  var i = 0
-  var s = size
-  while s > 0:
-    h = h !& ord(p[i])
-    Inc(i)
-    Dec(s)
-  result = !$h
-
-proc hashGcHeader(data: pointer): THash =
-  const headerSize = sizeof(int)*2
-  result = hash(cast[pointer](cast[int](data) -% headerSize), headerSize)
-
-proc genericHashAux(dest: Pointer, mt: PNimType, shallow: bool,
-                    h: THash): THash
-proc genericHashAux(dest: Pointer, n: ptr TNimNode, shallow: bool,
-                    h: THash): THash =
-  var d = cast[TAddress](dest)
-  case n.kind
-  of nkSlot:
-    result = genericHashAux(cast[pointer](d +% n.offset), n.typ, shallow, h)
-  of nkList:
-    result = h
-    for i in 0..n.len-1: 
-      result = result !& genericHashAux(dest, n.sons[i], shallow, result)
-  of nkCase:
-    result = h !& hash(cast[pointer](d +% n.offset), n.typ.size)
-    var m = selectBranch(dest, n)
-    if m != nil: result = genericHashAux(dest, m, shallow, result)
-  of nkNone: sysAssert(false, "genericHashAux")
-
-proc genericHashAux(dest: Pointer, mt: PNimType, shallow: bool, 
-                    h: THash): THash =
-  sysAssert(mt != nil, "genericHashAux 2")
-  case mt.Kind
-  of tyString:
-    var x = cast[ppointer](dest)[]
-    result = h
-    if x != nil:
-      let s = cast[NimString](x)
-      when defined(trackGcHeaders):
-        result = result !& hashGcHeader(x)
-      else:
-        result = result !& hash(x, s.len)
-  of tySequence:
-    var x = cast[ppointer](dest)
-    var dst = cast[taddress](cast[ppointer](dest)[])
-    result = h
-    if dst != 0:
-      when defined(trackGcHeaders):
-        result = result !& hashGcHeader(cast[ppointer](dest)[])
-      else:
-        for i in 0..cast[pgenericseq](dst).len-1:
-          result = result !& genericHashAux(
-            cast[pointer](dst +% i*% mt.base.size +% GenericSeqSize),
-            mt.Base, shallow, result)
-  of tyObject, tyTuple:
-    # we don't need to copy m_type field for tyObject, as they are equal anyway
-    result = genericHashAux(dest, mt.node, shallow, h)
-  of tyArray, tyArrayConstr:
-    let d = cast[TAddress](dest)
-    result = h
-    for i in 0..(mt.size div mt.base.size)-1:
-      result = result !& genericHashAux(cast[pointer](d +% i*% mt.base.size),
-                                        mt.base, shallow, result)
-  of tyRef:
-    when defined(trackGcHeaders):
-      var s = cast[ppointer](dest)[]
-      if s != nil:
-        result = result !& hashGcHeader(s)
-    else:
-      if shallow:
-        result = h !& hash(dest, mt.size)
-      else:
-        result = h
-        var s = cast[ppointer](dest)[]
-        if s != nil:
-          result = result !& genericHashAux(s, mt.base, shallow, result)
-  else:
-    result = h !& hash(dest, mt.size) # hash raw bits
-
-proc genericHash(dest: Pointer, mt: PNimType): int =
-  result = genericHashAux(dest, mt, false, 0)
-  
-proc dbgRegisterWatchpoint(address: pointer, name: cstring,
-                           typ: PNimType) {.compilerproc.} =
-  let L = WatchpointsLen
-  for i in 0.. <L:
-    if Watchpoints[i].name == name:
-      # address may have changed:
-      Watchpoints[i].address = address
-      return
-  if L >= watchPoints.high:
-    #debugOut("[Warning] cannot register watchpoint")
-    return
-  Watchpoints[L].name = name
-  Watchpoints[L].address = address
-  Watchpoints[L].typ = typ
-  Watchpoints[L].oldValue = genericHash(address, typ)
-  inc WatchpointsLen
-
-proc dbgUnregisterWatchpoints*() =
-  WatchpointsLen = 0
-
-var
-  dbgLineHook*: proc () {.nimcall.}
-    ## set this variable to provide a procedure that should be called before
-    ## each executed instruction. This should only be used by debuggers!
-    ## Only code compiled with the ``debugger:on`` switch calls this hook.
-
-  dbgWatchpointHook*: proc (watchpointName: cstring) {.nimcall.}
-  
-proc checkWatchpoints =
-  let L = WatchpointsLen
-  for i in 0.. <L:
-    let newHash = genericHash(Watchpoints[i].address, Watchpoints[i].typ)
-    if newHash != Watchpoints[i].oldValue:
-      dbgWatchpointHook(Watchpoints[i].name)
-      Watchpoints[i].oldValue = newHash
-
-proc endb(line: int, file: cstring) {.compilerproc, noinline.} =
-  # This proc is called before every Nimrod code line!
-  if framePtr == nil: return
-  if dbgWatchpointHook != nil: checkWatchpoints()
-  framePtr.line = line # this is done here for smaller code size!
-  framePtr.filename = file
-  if dbgLineHook != nil: dbgLineHook()
-
-include "system/endb"
diff --git a/lib/system/deepcopy.nim b/lib/system/deepcopy.nim
new file mode 100644
index 000000000..72d35f518
--- /dev/null
+++ b/lib/system/deepcopy.nim
@@ -0,0 +1,206 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+const
+  TableSize = when sizeof(int) <= 2: 0xff else: 0xff_ffff
+
+type
+  PtrTable = ptr object
+    counter, max: int
+    data: array[TableSize, (pointer, pointer)]
+
+template hashPtr(key: pointer): int = cast[int](key) shr 8
+template allocPtrTable: untyped =
+  cast[PtrTable](alloc0(sizeof(int)*2 + sizeof(pointer)*2*cap))
+
+proc rehash(t: PtrTable): PtrTable =
+  let cap = (t.max+1) * 2
+  result = allocPtrTable()
+  result.counter = t.counter
+  result.max = cap-1
+  for i in 0..t.max:
+    let k = t.data[i][0]
+    if k != nil:
+      var h = hashPtr(k)
+      while result.data[h and result.max][0] != nil: inc h
+      result.data[h and result.max] = t.data[i]
+  dealloc t
+
+proc initPtrTable(): PtrTable =
+  const cap = 32
+  result = allocPtrTable()
+  result.counter = 0
+  result.max = cap-1
+
+template deinit(t: PtrTable) = dealloc(t)
+
+proc get(t: PtrTable; key: pointer): pointer =
+  var h = hashPtr(key)
+  while true:
+    let k = t.data[h and t.max][0]
+    if k == nil: break
+    if k == key:
+      return t.data[h and t.max][1]
+    inc h
+
+proc put(t: var PtrTable; key, val: pointer) =
+  if (t.max+1) * 2 < t.counter * 3: t = rehash(t)
+  var h = hashPtr(key)
+  while t.data[h and t.max][0] != nil: inc h
+  t.data[h and t.max] = (key, val)
+  inc t.counter
+
+proc genericDeepCopyAux(dest, src: pointer, mt: PNimType;
+                        tab: var PtrTable) {.benign.}
+proc genericDeepCopyAux(dest, src: pointer, n: ptr TNimNode;
+                        tab: var PtrTable) {.benign.} =
+  var
+    d = cast[int](dest)
+    s = cast[int](src)
+  case n.kind
+  of nkSlot:
+    genericDeepCopyAux(cast[pointer](d +% n.offset),
+                       cast[pointer](s +% n.offset), n.typ, tab)
+  of nkList:
+    for i in 0..n.len-1:
+      genericDeepCopyAux(dest, src, n.sons[i], tab)
+  of nkCase:
+    var dd = selectBranch(dest, n)
+    var m = selectBranch(src, n)
+    # reset if different branches are in use; note different branches also
+    # imply that's not self-assignment (``x = x``)!
+    if m != dd and dd != nil:
+      genericResetAux(dest, dd)
+    copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset),
+            n.typ.size)
+    if m != nil:
+      genericDeepCopyAux(dest, src, m, tab)
+  of nkNone: sysAssert(false, "genericDeepCopyAux")
+
+proc genericDeepCopyAux(dest, src: pointer, mt: PNimType; tab: var PtrTable) =
+  var
+    d = cast[int](dest)
+    s = cast[int](src)
+  sysAssert(mt != nil, "genericDeepCopyAux 2")
+  case mt.kind
+  of tyString:
+    when defined(nimSeqsV2):
+      var x = cast[ptr NimStringV2](dest)
+      var s2 = cast[ptr NimStringV2](s)[]
+      nimAsgnStrV2(x[], s2)
+    else:
+      var x = cast[PPointer](dest)
+      var s2 = cast[PPointer](s)[]
+      if s2 == nil:
+        unsureAsgnRef(x, s2)
+      else:
+        unsureAsgnRef(x, copyDeepString(cast[NimString](s2)))
+  of tySequence:
+    when defined(nimSeqsV2):
+      deepSeqAssignImpl(genericDeepCopyAux, tab)
+    else:
+      var s2 = cast[PPointer](src)[]
+      var seq = cast[PGenericSeq](s2)
+      var x = cast[PPointer](dest)
+      if s2 == nil:
+        unsureAsgnRef(x, s2)
+        return
+      sysAssert(dest != nil, "genericDeepCopyAux 3")
+      unsureAsgnRef(x, newSeq(mt, seq.len))
+      var dst = cast[int](cast[PPointer](dest)[])
+      for i in 0..seq.len-1:
+        genericDeepCopyAux(
+          cast[pointer](dst +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size),
+          cast[pointer](cast[int](s2) +% align(GenericSeqSize, mt.base.align) +% i *% mt.base.size),
+          mt.base, tab)
+  of tyObject:
+    # we need to copy m_type field for tyObject, as it could be empty for
+    # sequence reallocations:
+    if mt.base != nil:
+      genericDeepCopyAux(dest, src, mt.base, tab)
+    else:
+      var pint = cast[ptr PNimType](dest)
+      pint[] = cast[ptr PNimType](src)[]
+    genericDeepCopyAux(dest, src, mt.node, tab)
+  of tyTuple:
+    genericDeepCopyAux(dest, src, mt.node, tab)
+  of tyArray, tyArrayConstr:
+    for i in 0..(mt.size div mt.base.size)-1:
+      genericDeepCopyAux(cast[pointer](d +% i *% mt.base.size),
+                         cast[pointer](s +% i *% mt.base.size), mt.base, tab)
+  of tyRef:
+    let s2 = cast[PPointer](src)[]
+    if s2 == nil:
+      unsureAsgnRef(cast[PPointer](dest), s2)
+    elif mt.base.deepcopy != nil:
+      let z = mt.base.deepcopy(s2)
+      when defined(nimSeqsV2):
+        cast[PPointer](dest)[] = z
+      else:
+        unsureAsgnRef(cast[PPointer](dest), z)
+    else:
+      let z = tab.get(s2)
+      if z == nil:
+        when declared(usrToCell):
+          let x = usrToCell(s2)
+          let realType = x.typ
+          let z = newObj(realType, realType.base.size)
+          unsureAsgnRef(cast[PPointer](dest), z)
+          tab.put(s2, z)
+          genericDeepCopyAux(z, s2, realType.base, tab)
+        else:
+          when false:
+            # addition check disabled
+            let x = usrToCell(s2)
+            let realType = x.typ
+            sysAssert realType == mt, " types do differ"
+          when defined(nimSeqsV2):
+            let typ = if mt.base.kind == tyObject: cast[PNimType](cast[ptr PNimTypeV2](s2)[].typeInfoV1)
+                      else: mt.base
+            let z = nimNewObj(typ.size, typ.align)
+            cast[PPointer](dest)[] = z
+          else:
+            # this version should work for any other GC:
+            let typ = if mt.base.kind == tyObject: cast[ptr PNimType](s2)[] else: mt.base
+            let z = newObj(mt, typ.size)
+            unsureAsgnRef(cast[PPointer](dest), z)
+          tab.put(s2, z)
+          genericDeepCopyAux(z, s2, typ, tab)
+      else:
+        unsureAsgnRef(cast[PPointer](dest), z)
+  of tyPtr:
+    # no cycle check here, but also not really required
+    let s2 = cast[PPointer](src)[]
+    if s2 != nil and mt.base.deepcopy != nil:
+      cast[PPointer](dest)[] = mt.base.deepcopy(s2)
+    else:
+      cast[PPointer](dest)[] = s2
+  else:
+    copyMem(dest, src, mt.size)
+
+proc genericDeepCopy(dest, src: pointer, mt: PNimType) {.compilerproc.} =
+  when not defined(nimSeqsV2): GC_disable()
+  var tab = initPtrTable()
+  genericDeepCopyAux(dest, src, mt, tab)
+  deinit tab
+  when not defined(nimSeqsV2): GC_enable()
+
+proc genericSeqDeepCopy(dest, src: pointer, mt: PNimType) {.compilerproc.} =
+  # also invoked for 'string'
+  var src = src
+  genericDeepCopy(dest, addr(src), mt)
+
+proc genericDeepCopyOpenArray(dest, src: pointer, len: int,
+                            mt: PNimType) {.compilerproc.} =
+  var
+    d = cast[int](dest)
+    s = cast[int](src)
+  for i in 0..len-1:
+    genericDeepCopy(cast[pointer](d +% i *% mt.base.size),
+                    cast[pointer](s +% i *% mt.base.size), mt.base)
diff --git a/lib/system/dollars.nim b/lib/system/dollars.nim
new file mode 100644
index 000000000..89a739d5a
--- /dev/null
+++ b/lib/system/dollars.nim
@@ -0,0 +1,147 @@
+## `$` is Nim's general way of spelling `toString`:idx:.
+runnableExamples:
+  assert $0.1 == "0.1"
+  assert $(-2*3) == "-6"
+
+import std/private/[digitsutils, miscdollars]
+
+when not defined(nimPreviewSlimSystem):
+  import std/formatfloat
+  export addFloat
+
+  func `$`*(x: float | float32): string =
+    ## Outplace version of `addFloat`.
+    result.addFloat(x)
+
+proc `$`*(x: int): string {.raises: [].} =
+  ## Outplace version of `addInt`.
+  result.addInt(x)
+
+proc `$`*(x: int64): string {.raises: [].} =
+  ## Outplace version of `addInt`.
+  result.addInt(x)
+
+proc `$`*(x: uint64): string {.raises: [].} =
+  ## Outplace version of `addInt`.
+  addInt(result, x)
+
+# same as old `ctfeWhitelist` behavior, whether or not this is a good idea.
+template gen(T) =
+  # xxx simplify this by supporting this in compiler: int{lit} | uint64{lit} | int64{lit}
+  func `$`*(x: T{lit}): string {.compileTime.} = result.addInt(x)
+gen(int)
+gen(uint64)
+gen(int64)
+
+
+proc `$`*(x: bool): string {.magic: "BoolToStr", noSideEffect.}
+  ## The stringify operator for a boolean argument. Returns `x`
+  ## converted to the string "false" or "true".
+
+proc `$`*(x: char): string {.magic: "CharToStr", noSideEffect.}
+  ## The stringify operator for a character argument. Returns `x`
+  ## converted to a string.
+  ##   ```Nim
+  ##   assert $'c' == "c"
+  ##   ```
+
+proc `$`*(x: cstring): string {.magic: "CStrToStr", noSideEffect.}
+  ## The stringify operator for a CString argument. Returns `x`
+  ## converted to a string.
+
+proc `$`*(x: string): string {.magic: "StrToStr", noSideEffect.}
+  ## The stringify operator for a string argument. Returns `x`
+  ## as it is. This operator is useful for generic code, so
+  ## that `$expr` also works if `expr` is already a string.
+
+proc `$`*[Enum: enum](x: Enum): string {.magic: "EnumToStr", noSideEffect.}
+  ## The stringify operator for an enumeration argument. This works for
+  ## any enumeration type thanks to compiler magic.
+  ##
+  ## If a `$` operator for a concrete enumeration is provided, this is
+  ## used instead. (In other words: *Overwriting* is possible.)
+
+proc `$`*(t: typedesc): string {.magic: "TypeTrait".}
+  ## Returns the name of the given type.
+  ##
+  ## For more procedures dealing with `typedesc`, see
+  ## `typetraits module <typetraits.html>`_.
+  ##
+  ##   ```Nim
+  ##   doAssert $(typeof(42)) == "int"
+  ##   doAssert $(typeof("Foo")) == "string"
+  ##   static: doAssert $(typeof(@['A', 'B'])) == "seq[char]"
+  ##   ```
+
+proc `$`*[T: tuple](x: T): string =
+  ## Generic `$` operator for tuples that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##   $(23, 45) == "(23, 45)"
+  ##   $(a: 23, b: 45) == "(a: 23, b: 45)"
+  ##   $() == "()"
+  ##   ```
+  tupleObjectDollar(result, x)
+
+when not defined(nimPreviewSlimSystem):
+  import std/objectdollar
+  export objectdollar
+
+proc collectionToString[T](x: T, prefix, separator, suffix: string): string =
+  result = prefix
+  var firstElement = true
+  for value in items(x):
+    if firstElement:
+      firstElement = false
+    else:
+      result.add(separator)
+
+    when value isnot string and value isnot seq and compiles(value.isNil):
+      # this branch should not be necessary
+      if value.isNil:
+        result.add "nil"
+      else:
+        result.addQuoted(value)
+    else:
+      result.addQuoted(value)
+  result.add(suffix)
+
+proc `$`*[T](x: set[T]): string =
+  ## Generic `$` operator for sets that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##   ${23, 45} == "{23, 45}"
+  ##   ```
+  collectionToString(x, "{", ", ", "}")
+
+proc `$`*[T](x: seq[T]): string =
+  ## Generic `$` operator for seqs that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##   $(@[23, 45]) == "@[23, 45]"
+  ##   ```
+  collectionToString(x, "@[", ", ", "]")
+
+proc `$`*[T, U](x: HSlice[T, U]): string =
+  ## Generic `$` operator for slices that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##  $(1 .. 5) == "1 .. 5"
+  ##  ```
+  result = $x.a
+  result.add(" .. ")
+  result.add($x.b)
+
+
+when not defined(nimNoArrayToString):
+  proc `$`*[T, IDX](x: array[IDX, T]): string =
+    ## Generic `$` operator for arrays that is lifted from the components.
+    collectionToString(x, "[", ", ", "]")
+
+proc `$`*[T](x: openArray[T]): string =
+  ## Generic `$` operator for openarrays that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##   $(@[23, 45].toOpenArray(0, 1)) == "[23, 45]"
+  ##   ```
+  collectionToString(x, "[", ", ", "]")
diff --git a/lib/system/dyncalls.nim b/lib/system/dyncalls.nim
index 6a80369b9..2162b234f 100755..100644
--- a/lib/system/dyncalls.nim
+++ b/lib/system/dyncalls.nim
@@ -1,6 +1,6 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2012 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
@@ -8,32 +8,53 @@
 #
 
 # This file implements the ability to call native procs from libraries.
-# It is not possible to do this in a platform independant way, unfortunately.
+# It is not possible to do this in a platform independent way, unfortunately.
 # However, the interface has been designed to take platform differences into
 # account and been ported to all major platforms.
 
 {.push stack_trace: off.}
 
 const
-  NilLibHandle: TLibHandle = nil
-
-proc rawWrite(f: TFile, s: string) = 
-  # we cannot throw an exception here!
-  discard writeBuffer(f, cstring(s), s.len)
+  NilLibHandle: LibHandle = nil
 
 proc nimLoadLibraryError(path: string) =
   # carefully written to avoid memory allocation:
-  stdout.rawWrite("could not load: ")
-  stdout.rawWrite(path)
-  stdout.rawWrite("\n")
-  quit(1)
-
-proc ProcAddrError(name: cstring) {.noinline.} =
+  const prefix = "could not load: "
+  cstderr.rawWrite(prefix)
+  cstderr.rawWrite(path)
+  when not defined(nimDebugDlOpen) and not defined(windows):
+    cstderr.rawWrite("\n(compile with -d:nimDebugDlOpen for more information)")
+  when defined(windows):
+    const badExe = "\n(bad format; library may be wrong architecture)"
+    let loadError = GetLastError()
+    if loadError == ERROR_BAD_EXE_FORMAT:
+      cstderr.rawWrite(badExe)
+    when defined(guiapp):
+      # Because console output is not shown in GUI apps, display the error as a
+      # message box instead:
+      var
+        msg: array[1000, char]
+        msgLeft = msg.len - 1 # leave (at least) one for nullchar
+        msgIdx = 0
+      copyMem(msg[msgIdx].addr, prefix.cstring, prefix.len)
+      msgLeft -= prefix.len
+      msgIdx += prefix.len
+      let pathLen = min(path.len, msgLeft)
+      copyMem(msg[msgIdx].addr, path.cstring, pathLen)
+      msgLeft -= pathLen
+      msgIdx += pathLen
+      if loadError == ERROR_BAD_EXE_FORMAT and msgLeft >= badExe.len:
+        copyMem(msg[msgIdx].addr, badExe.cstring, badExe.len)
+      discard MessageBoxA(nil, msg[0].addr, nil, 0)
+  cstderr.rawWrite("\n")
+  rawQuit(1)
+
+proc procAddrError(name: cstring) {.compilerproc, nonReloadable, hcrInline.} =
   # carefully written to avoid memory allocation:
-  stdout.rawWrite("could not import: ")
-  stdout.write(name)
-  stdout.rawWrite("\n")
-  quit(1)
+  cstderr.rawWrite("could not import: ")
+  cstderr.rawWrite(name)
+  cstderr.rawWrite("\n")
+  rawQuit(1)
 
 # this code was inspired from Lua's source code:
 # Lua - An Extensible Extension Language
@@ -52,27 +73,37 @@ when defined(posix):
   #
 
   # c stuff:
-  var
-    RTLD_NOW {.importc: "RTLD_NOW", header: "<dlfcn.h>".}: int
+  when defined(linux) or defined(macosx):
+    const RTLD_NOW = cint(2)
+  else:
+    var
+      RTLD_NOW {.importc: "RTLD_NOW", header: "<dlfcn.h>".}: cint
 
-  proc dlclose(lib: TLibHandle) {.importc, header: "<dlfcn.h>".}
-  proc dlopen(path: CString, mode: int): TLibHandle {.
+  proc dlclose(lib: LibHandle) {.importc, header: "<dlfcn.h>".}
+  proc dlopen(path: cstring, mode: cint): LibHandle {.
       importc, header: "<dlfcn.h>".}
-  proc dlsym(lib: TLibHandle, name: cstring): TProcAddr {.
+  proc dlsym(lib: LibHandle, name: cstring): ProcAddr {.
       importc, header: "<dlfcn.h>".}
 
   proc dlerror(): cstring {.importc, header: "<dlfcn.h>".}
 
-  proc nimUnloadLibrary(lib: TLibHandle) =
+  proc nimUnloadLibrary(lib: LibHandle) =
     dlclose(lib)
 
-  proc nimLoadLibrary(path: string): TLibHandle =
-    result = dlopen(path, RTLD_NOW)
-    #c_fprintf(c_stdout, "%s\n", dlerror())
-
-  proc nimGetProcAddr(lib: TLibHandle, name: cstring): TProcAddr =
+  proc nimLoadLibrary(path: string): LibHandle =
+    let flags =
+      when defined(globalSymbols): RTLD_NOW or RTLD_GLOBAL
+      else: RTLD_NOW
+    result = dlopen(path, flags)
+    when defined(nimDebugDlOpen):
+      let error = dlerror()
+      if error != nil:
+        cstderr.rawWrite(error)
+        cstderr.rawWrite("\n")
+
+  proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
     result = dlsym(lib, name)
-    if result == nil: ProcAddrError(name)
+    if result == nil: procAddrError(name)
 
 elif defined(windows) or defined(dos):
   #
@@ -80,63 +111,90 @@ elif defined(windows) or defined(dos):
   # Native Windows Implementation
   # =======================================================================
   #
-  type
-    THINSTANCE {.importc: "HINSTANCE".} = pointer
-
-  proc FreeLibrary(lib: THINSTANCE) {.importc, header: "<windows.h>", stdcall.}
+  when defined(cpp):
+    type
+      THINSTANCE {.importc: "HINSTANCE".} = object
+        x: pointer
+    proc getProcAddress(lib: THINSTANCE, name: cstring): ProcAddr {.
+        importcpp: "(void*)GetProcAddress(@)", header: "<windows.h>", stdcall.}
+  else:
+    type
+      THINSTANCE {.importc: "HINSTANCE".} = pointer
+    proc getProcAddress(lib: THINSTANCE, name: cstring): ProcAddr {.
+        importc: "GetProcAddress", header: "<windows.h>", stdcall.}
+
+  proc freeLibrary(lib: THINSTANCE) {.
+      importc: "FreeLibrary", header: "<windows.h>", stdcall.}
   proc winLoadLibrary(path: cstring): THINSTANCE {.
       importc: "LoadLibraryA", header: "<windows.h>", stdcall.}
-  proc GetProcAddress(lib: THINSTANCE, name: cstring): TProcAddr {.
-      importc: "GetProcAddress", header: "<windows.h>", stdcall.}
 
-  proc nimUnloadLibrary(lib: TLibHandle) =
-    FreeLibrary(cast[THINSTANCE](lib))
-
-  proc nimLoadLibrary(path: string): TLibHandle =
-    result = cast[TLibHandle](winLoadLibrary(path))
-
-  proc nimGetProcAddr(lib: TLibHandle, name: cstring): TProcAddr =
-    result = GetProcAddress(cast[THINSTANCE](lib), name)
-    if result == nil: ProcAddrError(name)
-
-elif defined(mac):
-  #
-  # =======================================================================
-  # Native Mac OS X / Darwin Implementation
-  # =======================================================================
-  #
-  {.error: "no implementation for dyncalls yet".}
-
-  proc nimUnloadLibrary(lib: TLibHandle) =
-    NSUnLinkModule(NSModule(lib), NSUNLINKMODULE_OPTION_RESET_LAZY_REFERENCES)
-
-  var
-    dyld_present {.importc: "_dyld_present", header: "<dyld.h>".}: int
-
-  proc nimLoadLibrary(path: string): TLibHandle =
-    var
-      img: NSObjectFileImage
-      ret: NSObjectFileImageReturnCode
-      modul: NSModule
-    # this would be a rare case, but prevents crashing if it happens
-    result = nil
-    if dyld_present != 0:
-      ret = NSCreateObjectFileImageFromFile(path, addr(img))
-      if ret == NSObjectFileImageSuccess:
-        modul = NSLinkModule(img, path, NSLINKMODULE_OPTION_PRIVATE or
-                                        NSLINKMODULE_OPTION_RETURN_ON_ERROR)
-        NSDestroyObjectFileImage(img)
-        result = TLibHandle(modul)
-
-  proc nimGetProcAddr(lib: TLibHandle, name: cstring): TProcAddr =
-    var
-      nss: NSSymbol
-    nss = NSLookupSymbolInModule(NSModule(lib), name)
-    result = TProcAddr(NSAddressOfSymbol(nss))
-    if result == nil: ProcAddrError(name)
+  proc nimUnloadLibrary(lib: LibHandle) =
+    freeLibrary(cast[THINSTANCE](lib))
+
+  proc nimLoadLibrary(path: string): LibHandle =
+    result = cast[LibHandle](winLoadLibrary(path))
+
+  proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
+    result = getProcAddress(cast[THINSTANCE](lib), name)
+    if result != nil: return
+    const decoratedLength = 250
+    var decorated: array[decoratedLength, char]
+    decorated[0] = '_'
+    var m = 1
+    while m < (decoratedLength - 5):
+      if name[m - 1] == '\x00': break
+      decorated[m] = name[m - 1]
+      inc(m)
+    decorated[m] = '@'
+    for i in countup(0, 50):
+      var k = i * 4
+      if k div 100 == 0:
+        if k div 10 == 0:
+          m = m + 1
+        else:
+          m = m + 2
+      else:
+        m = m + 3
+      decorated[m + 1] = '\x00'
+      while true:
+        decorated[m] = chr(ord('0') + (k %% 10))
+        dec(m)
+        k = k div 10
+        if k == 0: break
+      result = getProcAddress(cast[THINSTANCE](lib), cast[cstring](addr decorated))
+      if result != nil: return
+    procAddrError(name)
+
+elif defined(genode):
+
+  proc nimUnloadLibrary(lib: LibHandle) =
+    raiseAssert("nimUnloadLibrary not implemented")
+
+  proc nimLoadLibrary(path: string): LibHandle =
+    raiseAssert("nimLoadLibrary not implemented")
+
+  proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
+    raiseAssert("nimGetProcAddr not implemented")
+
+elif defined(nintendoswitch) or defined(freertos) or defined(zephyr) or defined(nuttx):
+  proc nimUnloadLibrary(lib: LibHandle) =
+    cstderr.rawWrite("nimUnLoadLibrary not implemented")
+    cstderr.rawWrite("\n")
+    rawQuit(1)
+
+  proc nimLoadLibrary(path: string): LibHandle =
+    cstderr.rawWrite("nimLoadLibrary not implemented")
+    cstderr.rawWrite("\n")
+    rawQuit(1)
+
+
+  proc nimGetProcAddr(lib: LibHandle, name: cstring): ProcAddr =
+    cstderr.rawWrite("nimGetProAddr not implemented")
+    cstderr.rawWrite(name)
+    cstderr.rawWrite("\n")
+    rawQuit(1)
 
 else:
   {.error: "no implementation for dyncalls".}
-  
-{.pop.}
 
+{.pop.}
diff --git a/lib/system/embedded.nim b/lib/system/embedded.nim
index aaa3befaa..ea6776f58 100644
--- a/lib/system/embedded.nim
+++ b/lib/system/embedded.nim
@@ -1,6 +1,6 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2012 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
@@ -10,97 +10,52 @@
 
 # Bare-bones implementation of some things for embedded targets.
 
-proc writeToStdErr(msg: CString) = write(stdout, msg)
-
 proc chckIndx(i, a, b: int): int {.inline, compilerproc.}
 proc chckRange(i, a, b: int): int {.inline, compilerproc.}
 proc chckRangeF(x, a, b: float): float {.inline, compilerproc.}
 proc chckNil(p: pointer) {.inline, compilerproc.}
 
-proc pushFrame(s: PFrame) {.compilerRtl, inl, exportc: "nimFrame".} = nil
-proc popFrame {.compilerRtl, inl.} = nil
+proc nimFrame(s: PFrame) {.compilerRtl, inl, exportc: "nimFrame".} = discard
+proc popFrame {.compilerRtl, inl.} = discard
 
-proc setFrame(s: PFrame) {.compilerRtl, inl.} = nil
-proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} = nil
-proc popSafePoint {.compilerRtl, inl.} = nil
-proc pushCurrentException(e: ref E_Base) {.compilerRtl, inl.} = nil
-proc popCurrentException {.compilerRtl, inl.} = nil
+proc setFrame(s: PFrame) {.compilerRtl, inl.} = discard
+when not gotoBasedExceptions:
+  proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} = discard
+  proc popSafePoint {.compilerRtl, inl.} = discard
+proc pushCurrentException(e: ref Exception) {.compilerRtl, inl.} = discard
+proc popCurrentException {.compilerRtl, inl.} = discard
 
 # some platforms have native support for stack traces:
 const
   nativeStackTraceSupported = false
   hasSomeStackTrace = false
 
-proc quitOrDebug() {.inline.} =
-  quit(1)
+proc quitOrDebug() {.noreturn, importc: "abort", header: "<stdlib.h>", nodecl.}
+
+proc raiseException(e: ref Exception, ename: cstring) {.compilerRtl.} =
+  sysFatal(ReraiseDefect, "exception handling is not available")
+
+proc raiseExceptionEx(e: sink(ref Exception), ename, procname, filename: cstring,
+                      line: int) {.compilerRtl.} =
+  sysFatal(ReraiseDefect, "exception handling is not available")
 
-proc raiseException(e: ref E_Base, ename: CString) {.compilerRtl.} =
-  writeToStdErr(ename)
- 
 proc reraiseException() {.compilerRtl.} =
-  writeToStdErr("reraise not supported")
-
-proc WriteStackTrace() = nil
-
-proc setControlCHook(hook: proc () {.noconv.}) =
-  # ugly cast, but should work on all architectures:
-  type TSignalHandler = proc (sig: cint) {.noconv.}
-  c_signal(SIGINT, cast[TSignalHandler](hook))
-
-proc raiseRangeError(val: biggestInt) {.compilerproc, noreturn, noinline.} =
-  writeToStdErr("value out of range")
-
-proc raiseIndexError() {.compilerproc, noreturn, noinline.} =
-  writeToStdErr("index out of bounds")
-
-proc raiseFieldError(f: string) {.compilerproc, noreturn, noinline.} =
-  writeToStdErr("field is not accessible")
-
-proc chckIndx(i, a, b: int): int =
-  if i >= a and i <= b:
-    return i
-  else:
-    raiseIndexError()
-
-proc chckRange(i, a, b: int): int =
-  if i >= a and i <= b:
-    return i
-  else:
-    raiseRangeError(i)
-
-proc chckRange64(i, a, b: int64): int64 {.compilerproc.} =
-  if i >= a and i <= b:
-    return i
-  else:
-    raiseRangeError(i)
-
-proc chckRangeF(x, a, b: float): float =
-  if x >= a and x <= b:
-    return x
-  else:
-    raise newException(EOutOfRange, "value " & $x & " out of range")
-
-proc chckNil(p: pointer) =
-  if p == nil: c_raise(SIGSEGV)
-
-proc chckObj(obj, subclass: PNimType) {.compilerproc.} =
-  # checks if obj is of type subclass:
-  var x = obj
-  if x == subclass: return # optimized fast path
-  while x != subclass:
-    if x == nil:
-      raise newException(EInvalidObjectConversion, "invalid object conversion")
-    x = x.base
-
-proc chckObjAsgn(a, b: PNimType) {.compilerproc, inline.} =
-  if a != b:
-    raise newException(EInvalidObjectAssignment, "invalid object assignment")
-
-proc isObj(obj, subclass: PNimType): bool {.compilerproc.} =
-  # checks if obj is of type subclass:
-  var x = obj
-  if x == subclass: return true # optimized fast path
-  while x != subclass:
-    if x == nil: return false
-    x = x.base
-  return true
+  sysFatal(ReraiseDefect, "no exception to reraise")
+
+proc writeStackTrace() = discard
+
+proc unsetControlCHook() = discard
+proc setControlCHook(hook: proc () {.noconv.}) = discard
+
+proc closureIterSetupExc(e: ref Exception) {.compilerproc, inline.} =
+  sysFatal(ReraiseDefect, "exception handling is not available")
+
+when gotoBasedExceptions:
+  var nimInErrorMode {.threadvar.}: bool
+
+  proc nimErrorFlag(): ptr bool {.compilerRtl, inl.} =
+    result = addr(nimInErrorMode)
+
+  proc nimTestErrorFlag() {.compilerRtl.} =
+    if nimInErrorMode:
+      sysFatal(ReraiseDefect, "exception handling is not available")
diff --git a/lib/system/exceptions.nim b/lib/system/exceptions.nim
new file mode 100644
index 000000000..63588f858
--- /dev/null
+++ b/lib/system/exceptions.nim
@@ -0,0 +1,122 @@
+## Exception and effect types used in Nim code.
+
+type
+  TimeEffect* = object of RootEffect   ## Time effect.
+  IOEffect* = object of RootEffect     ## IO effect.
+  ReadIOEffect* = object of IOEffect   ## Effect describing a read IO operation.
+  WriteIOEffect* = object of IOEffect  ## Effect describing a write IO operation.
+  ExecIOEffect* = object of IOEffect   ## Effect describing an executing IO operation.
+
+type
+  IOError* = object of CatchableError ## \
+    ## Raised if an IO error occurred.
+  EOFError* = object of IOError ## \
+    ## Raised if an IO "end of file" error occurred.
+  OSError* = object of CatchableError ## \
+    ## Raised if an operating system service failed.
+    errorCode*: int32 ## OS-defined error code describing this error.
+  LibraryError* = object of OSError ## \
+    ## Raised if a dynamic library could not be loaded.
+  ResourceExhaustedError* = object of CatchableError ## \
+    ## Raised if a resource request could not be fulfilled.
+  ArithmeticDefect* = object of Defect ## \
+    ## Raised if any kind of arithmetic error occurred.
+  DivByZeroDefect* = object of ArithmeticDefect ## \
+    ## Raised for runtime integer divide-by-zero errors.
+
+  OverflowDefect* = object of ArithmeticDefect ## \
+    ## Raised for runtime integer overflows.
+    ##
+    ## This happens for calculations whose results are too large to fit in the
+    ## provided bits.
+  AccessViolationDefect* = object of Defect ## \
+    ## Raised for invalid memory access errors
+  AssertionDefect* = object of Defect ## \
+    ## Raised when assertion is proved wrong.
+    ##
+    ## Usually the result of using the `assert() template
+    ## <assertions.html#assert.t,untyped,string>`_.
+  ValueError* = object of CatchableError ## \
+    ## Raised for string and object conversion errors.
+  KeyError* = object of ValueError ## \
+    ## Raised if a key cannot be found in a table.
+    ##
+    ## Mostly used by the `tables <tables.html>`_ module, it can also be raised
+    ## by other collection modules like `sets <sets.html>`_ or `strtabs
+    ## <strtabs.html>`_.
+  OutOfMemDefect* = object of Defect ## \
+    ## Raised for unsuccessful attempts to allocate memory.
+  IndexDefect* = object of Defect ## \
+    ## Raised if an array index is out of bounds.
+
+  FieldDefect* = object of Defect ## \
+    ## Raised if a record field is not accessible because its discriminant's
+    ## value does not fit.
+  RangeDefect* = object of Defect ## \
+    ## Raised if a range check error occurred.
+  StackOverflowDefect* = object of Defect ## \
+    ## Raised if the hardware stack used for subroutine calls overflowed.
+  ReraiseDefect* = object of Defect ## \
+    ## Raised if there is no exception to reraise.
+  ObjectAssignmentDefect* = object of Defect ## \
+    ## Raised if an object gets assigned to its parent's object.
+  ObjectConversionDefect* = object of Defect ## \
+    ## Raised if an object is converted to an incompatible object type.
+    ## You can use `of` operator to check if conversion will succeed.
+  FloatingPointDefect* = object of Defect ## \
+    ## Base class for floating point exceptions.
+  FloatInvalidOpDefect* = object of FloatingPointDefect ## \
+    ## Raised by invalid operations according to IEEE.
+    ##
+    ## Raised by `0.0/0.0`, for example.
+  FloatDivByZeroDefect* = object of FloatingPointDefect ## \
+    ## Raised by division by zero.
+    ##
+    ## Divisor is zero and dividend is a finite nonzero number.
+  FloatOverflowDefect* = object of FloatingPointDefect ## \
+    ## Raised for overflows.
+    ##
+    ## The operation produced a result that exceeds the range of the exponent.
+  FloatUnderflowDefect* = object of FloatingPointDefect ## \
+    ## Raised for underflows.
+    ##
+    ## The operation produced a result that is too small to be represented as a
+    ## normal number.
+  FloatInexactDefect* = object of FloatingPointDefect ## \
+    ## Raised for inexact results.
+    ##
+    ## The operation produced a result that cannot be represented with infinite
+    ## precision -- for example: `2.0 / 3.0, log(1.1)`
+    ##
+    ## **Note**: Nim currently does not detect these!
+  DeadThreadDefect* = object of Defect ## \
+    ## Raised if it is attempted to send a message to a dead thread.
+  NilAccessDefect* = object of Defect ## \
+    ## Raised on dereferences of `nil` pointers.
+    ##
+    ## This is only raised if the `segfaults module <segfaults.html>`_ was imported!
+
+when not defined(nimPreviewSlimSystem):
+  type
+    ArithmeticError* {.deprecated: "See corresponding Defect".} = ArithmeticDefect
+    DivByZeroError* {.deprecated: "See corresponding Defect".} = DivByZeroDefect
+    OverflowError* {.deprecated: "See corresponding Defect".} = OverflowDefect
+    AccessViolationError* {.deprecated: "See corresponding Defect".} = AccessViolationDefect
+    AssertionError* {.deprecated: "See corresponding Defect".} = AssertionDefect
+    OutOfMemError* {.deprecated: "See corresponding Defect".} = OutOfMemDefect
+    IndexError* {.deprecated: "See corresponding Defect".} = IndexDefect
+
+    FieldError* {.deprecated: "See corresponding Defect".} = FieldDefect
+    RangeError* {.deprecated: "See corresponding Defect".} = RangeDefect
+    StackOverflowError* {.deprecated: "See corresponding Defect".} = StackOverflowDefect
+    ReraiseError* {.deprecated: "See corresponding Defect".} = ReraiseDefect
+    ObjectAssignmentError* {.deprecated: "See corresponding Defect".} = ObjectAssignmentDefect
+    ObjectConversionError* {.deprecated: "See corresponding Defect".} = ObjectConversionDefect
+    FloatingPointError* {.deprecated: "See corresponding Defect".} = FloatingPointDefect
+    FloatInvalidOpError* {.deprecated: "See corresponding Defect".} = FloatInvalidOpDefect
+    FloatDivByZeroError* {.deprecated: "See corresponding Defect".} = FloatDivByZeroDefect
+    FloatOverflowError* {.deprecated: "See corresponding Defect".} = FloatOverflowDefect
+    FloatUnderflowError* {.deprecated: "See corresponding Defect".} = FloatUnderflowDefect
+    FloatInexactError* {.deprecated: "See corresponding Defect".} = FloatInexactDefect
+    DeadThreadError* {.deprecated: "See corresponding Defect".} = DeadThreadDefect
+    NilAccessError* {.deprecated: "See corresponding Defect".} = NilAccessDefect
diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim
index faaefe083..dae5c4a4a 100755..100644
--- a/lib/system/excpt.nim
+++ b/lib/system/excpt.nim
@@ -1,7 +1,7 @@
 #
 #
-#            Nimrod's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -10,76 +10,175 @@
 # Exception handling code. Carefully coded so that tiny programs which do not
 # use the heap (and nor exceptions) do not include the GC or memory allocator.
 
+import std/private/miscdollars
+import stacktraces
+
+const noStacktraceAvailable = "No stack traceback available\n"
+
 var
-  stackTraceNewLine*: string ## undocumented feature; it is replaced by ``<br>``
-                             ## for CGI applications
+  errorMessageWriter*: (proc(msg: string) {.tags: [WriteIOEffect], benign,
+                                            nimcall.})
+    ## Function that will be called
+    ## instead of `stdmsg.write` when printing stacktrace.
+    ## Unstable API.
 
-template stackTraceNL: expr =
-  (if IsNil(stackTraceNewLine): "\n" else: stackTraceNewLine)
+when defined(windows):
+  proc GetLastError(): int32 {.header: "<windows.h>", nodecl.}
+  const ERROR_BAD_EXE_FORMAT = 193
 
 when not defined(windows) or not defined(guiapp):
-  proc writeToStdErr(msg: CString) = write(stdout, msg)
-
+  proc writeToStdErr(msg: cstring) = rawWrite(cstderr, msg)
+  proc writeToStdErr(msg: cstring, length: int) =
+    rawWriteString(cstderr, msg, length)
 else:
-  proc MessageBoxA(hWnd: cint, lpText, lpCaption: cstring, uType: int): int32 {.
+  proc MessageBoxA(hWnd: pointer, lpText, lpCaption: cstring, uType: int): int32 {.
     header: "<windows.h>", nodecl.}
+  proc writeToStdErr(msg: cstring) =
+    discard MessageBoxA(nil, msg, nil, 0)
+  proc writeToStdErr(msg: cstring, length: int) =
+    discard MessageBoxA(nil, msg, nil, 0)
+
+proc writeToStdErr(msg: string) {.inline.} =
+  # fix bug #13115: handles correctly '\0' unlike default implicit conversion to cstring
+  writeToStdErr(msg.cstring, msg.len)
+
+proc showErrorMessage(data: cstring, length: int) {.gcsafe, raises: [].} =
+  var toWrite = true
+  if errorMessageWriter != nil:
+    try:
+      errorMessageWriter($data)
+      toWrite = false
+    except:
+      discard
+  if toWrite:
+    when defined(genode):
+      # stderr not available by default, use the LOG session
+      echo data
+    else:
+      writeToStdErr(data, length)
 
-  proc writeToStdErr(msg: CString) =
-    discard MessageBoxA(0, msg, nil, 0)
+proc showErrorMessage2(data: string) {.inline.} =
+  showErrorMessage(data.cstring, data.len)
 
-proc registerSignalHandler()
+proc chckIndx(i, a, b: int): int {.inline, compilerproc, benign.}
+proc chckRange(i, a, b: int): int {.inline, compilerproc, benign.}
+proc chckRangeF(x, a, b: float): float {.inline, compilerproc, benign.}
+proc chckNil(p: pointer) {.noinline, compilerproc, benign.}
 
-proc chckIndx(i, a, b: int): int {.inline, compilerproc.}
-proc chckRange(i, a, b: int): int {.inline, compilerproc.}
-proc chckRangeF(x, a, b: float): float {.inline, compilerproc.}
-proc chckNil(p: pointer) {.inline, compilerproc.}
+type
+  GcFrame = ptr GcFrameHeader
+  GcFrameHeader {.compilerproc.} = object
+    len: int
+    prev: ptr GcFrameHeader
+
+when NimStackTraceMsgs:
+  var frameMsgBuf* {.threadvar.}: string
+
+when not defined(nimV2):
+  var
+    framePtr {.threadvar.}: PFrame
 
 var
-  framePtr {.rtlThreadVar.}: PFrame
-  excHandler {.rtlThreadVar.}: PSafePoint
-    # list of exception handlers
-    # a global variable for the root of all try blocks
-  currException {.rtlThreadVar.}: ref E_Base
+  currException {.threadvar.}: ref Exception
 
-proc pushFrame(s: PFrame) {.compilerRtl, inl, exportc: "nimFrame".} =
-  s.prev = framePtr
-  framePtr = s
+when not gotoBasedExceptions:
+  var
+    excHandler {.threadvar.}: PSafePoint
+      # list of exception handlers
+      # a global variable for the root of all try blocks
+    gcFramePtr {.threadvar.}: GcFrame
+
+when gotoBasedExceptions:
+  type
+    FrameState = tuple[framePtr: PFrame,
+                      currException: ref Exception]
+else:
+  type
+    FrameState = tuple[gcFramePtr: GcFrame, framePtr: PFrame,
+                      excHandler: PSafePoint, currException: ref Exception]
+
+proc getFrameState*(): FrameState {.compilerRtl, inl.} =
+  when gotoBasedExceptions:
+    return (framePtr, currException)
+  else:
+    return (gcFramePtr, framePtr, excHandler, currException)
+
+proc setFrameState*(state: FrameState) {.compilerRtl, inl.} =
+  when gotoBasedExceptions:
+    framePtr = state.framePtr
+    currException = state.currException
+  else:
+    gcFramePtr = state.gcFramePtr
+    framePtr = state.framePtr
+    excHandler = state.excHandler
+    currException = state.currException
+
+proc getFrame*(): PFrame {.compilerRtl, inl.} = framePtr
 
 proc popFrame {.compilerRtl, inl.} =
   framePtr = framePtr.prev
 
-proc setFrame(s: PFrame) {.compilerRtl, inl.} =
+when false:
+  proc popFrameOfAddr(s: PFrame) {.compilerRtl.} =
+    var it = framePtr
+    if it == s:
+      framePtr = framePtr.prev
+    else:
+      while it != nil:
+        if it == s:
+          framePtr = it.prev
+          break
+        it = it.prev
+
+proc setFrame*(s: PFrame) {.compilerRtl, inl.} =
   framePtr = s
 
-proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} =
-  s.hasRaiseAction = false
-  s.prev = excHandler
-  excHandler = s
+when not gotoBasedExceptions:
+  proc getGcFrame*(): GcFrame {.compilerRtl, inl.} = gcFramePtr
+  proc popGcFrame*() {.compilerRtl, inl.} = gcFramePtr = gcFramePtr.prev
+  proc setGcFrame*(s: GcFrame) {.compilerRtl, inl.} = gcFramePtr = s
+  proc pushGcFrame*(s: GcFrame) {.compilerRtl, inl.} =
+    s.prev = gcFramePtr
+    zeroMem(cast[pointer](cast[int](s)+%sizeof(GcFrameHeader)), s.len*sizeof(pointer))
+    gcFramePtr = s
+
+  proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} =
+    s.prev = excHandler
+    excHandler = s
 
-proc popSafePoint {.compilerRtl, inl.} =
-  excHandler = excHandler.prev
+  proc popSafePoint {.compilerRtl, inl.} =
+    excHandler = excHandler.prev
 
-proc pushCurrentException(e: ref E_Base) {.compilerRtl, inl.} = 
-  e.parent = currException
+proc pushCurrentException(e: sink(ref Exception)) {.compilerRtl, inl.} =
+  e.up = currException
   currException = e
+  #showErrorMessage2 "A"
 
 proc popCurrentException {.compilerRtl, inl.} =
-  currException = currException.parent
+  currException = currException.up
+  #showErrorMessage2 "B"
+
+proc popCurrentExceptionEx(id: uint) {.compilerRtl.} =
+  discard "only for bootstrapping compatbility"
+
+proc closureIterSetupExc(e: ref Exception) {.compilerproc, inline.} =
+  currException = e
 
 # some platforms have native support for stack traces:
 const
-  nativeStackTraceSupported = (defined(macosx) or defined(linux)) and 
-                              not nimrodStackTrace
-  hasSomeStackTrace = nimrodStackTrace or 
-    defined(nativeStackTrace) and nativeStackTraceSupported
+  nativeStackTraceSupported = (defined(macosx) or defined(linux)) and
+                              not NimStackTrace
+  hasSomeStackTrace = NimStackTrace or defined(nimStackTraceOverride) or
+    (defined(nativeStackTrace) and nativeStackTraceSupported)
+
 
 when defined(nativeStacktrace) and nativeStackTraceSupported:
   type
-    TDl_info {.importc: "Dl_info", header: "<dlfcn.h>", 
+    TDl_info {.importc: "Dl_info", header: "<dlfcn.h>",
                final, pure.} = object
-      dli_fname: CString
+      dli_fname: cstring
       dli_fbase: pointer
-      dli_sname: CString
+      dli_sname: cstring
       dli_saddr: pointer
 
   proc backtrace(symbols: ptr pointer, size: int): int {.
@@ -89,17 +188,17 @@ when defined(nativeStacktrace) and nativeStackTraceSupported:
 
   when not hasThreadSupport:
     var
-      tempAddresses: array [0..127, pointer] # should not be alloc'd on stack
+      tempAddresses: array[maxStackTraceLines, pointer] # should not be alloc'd on stack
       tempDlInfo: TDl_info
 
   proc auxWriteStackTraceWithBacktrace(s: var string) =
     when hasThreadSupport:
       var
-        tempAddresses: array [0..127, pointer] # but better than a threadvar
+        tempAddresses: array[maxStackTraceLines, pointer] # but better than a threadvar
         tempDlInfo: TDl_info
     # This is allowed to be expensive since it only happens during crashes
     # (but this way you don't need manual stack tracing)
-    var size = backtrace(cast[ptr pointer](addr(tempAddresses)), 
+    var size = backtrace(cast[ptr pointer](addr(tempAddresses)),
                          len(tempAddresses))
     var enabled = false
     for i in 0..size-1:
@@ -113,7 +212,7 @@ when defined(nativeStacktrace) and nativeStackTraceSupported:
             add(s, tempDlInfo.dli_sname)
         else:
           add(s, '?')
-        add(s, stackTraceNL)
+        add(s, "\n")
       else:
         if dlresult != 0 and tempDlInfo.dli_sname != nil and
             c_strcmp(tempDlInfo.dli_sname, "signalHandler") == 0'i32:
@@ -121,264 +220,486 @@ when defined(nativeStacktrace) and nativeStackTraceSupported:
           # interested in
           enabled = true
 
-when not hasThreadSupport:
+when hasSomeStackTrace and not hasThreadSupport:
   var
-    tempFrames: array [0..127, PFrame] # should not be alloc'd on stack
-  
-proc auxWriteStackTrace(f: PFrame, s: var string) =
-  when hasThreadSupport:
-    var
-      tempFrames: array [0..127, PFrame] # but better than a threadvar
-  const
-    firstCalls = 32
+    tempFrames: array[maxStackTraceLines, PFrame] # should not be alloc'd on stack
+
+template reraisedFrom(z): untyped =
+  StackTraceEntry(procname: nil, line: z, filename: nil)
+
+proc auxWriteStackTrace(f: PFrame; s: var seq[StackTraceEntry]) =
   var
     it = f
     i = 0
-    total = 0
-  # setup long head:
-  while it != nil and i <= high(tempFrames)-firstCalls:
-    tempFrames[i] = it
+  while it != nil:
     inc(i)
-    inc(total)
     it = it.prev
-  # go up the stack to count 'total':
-  var b = it
+  var last = i-1
+  when true: # not defined(gcDestructors):
+    if s.len == 0:
+      s = newSeq[StackTraceEntry](i)
+    else:
+      last = s.len + i - 1
+      s.setLen(last+1)
+  it = f
   while it != nil:
-    inc(total)
+    s[last] = StackTraceEntry(procname: it.procname,
+                              line: it.line,
+                              filename: it.filename)
+    when NimStackTraceMsgs:
+      let first = if it.prev == nil: 0 else: it.prev.frameMsgLen
+      if it.frameMsgLen > first:
+        s[last].frameMsg.setLen(it.frameMsgLen - first)
+        # somehow string slicing not available here
+        for i in first .. it.frameMsgLen-1:
+          s[last].frameMsg[i-first] = frameMsgBuf[i]
     it = it.prev
-  var skipped = 0
-  if total > len(tempFrames):
-    # skip N
-    skipped = total-i-firstCalls+1
-    for j in 1..skipped:
-      if b != nil: b = b.prev
-    # create '...' entry:
-    tempFrames[i] = nil
-    inc(i)
-  # setup short tail:
-  while b != nil and i <= high(tempFrames):
-    tempFrames[i] = b
-    inc(i)
-    b = b.prev
-  for j in countdown(i-1, 0):
-    if tempFrames[j] == nil: 
-      add(s, "(")
-      add(s, $skipped)
-      add(s, " calls omitted) ...")
+    dec last
+
+template addFrameEntry(s: var string, f: StackTraceEntry|PFrame) =
+  var oldLen = s.len
+  s.toLocation(f.filename, f.line, 0)
+  for k in 1..max(1, 25-(s.len-oldLen)): add(s, ' ')
+  add(s, f.procname)
+  when NimStackTraceMsgs:
+    when typeof(f) is StackTraceEntry:
+      add(s, f.frameMsg)
     else:
-      var oldLen = s.len
-      add(s, tempFrames[j].filename)
-      if tempFrames[j].line > 0:
-        add(s, '(')
-        add(s, $tempFrames[j].line)
-        add(s, ')')
-      for k in 1..max(1, 25-(s.len-oldLen)): add(s, ' ')
-      add(s, tempFrames[j].procname)
-    add(s, stackTraceNL)
+      var first = if f.prev == nil: 0 else: f.prev.frameMsgLen
+      for i in first..<f.frameMsgLen: add(s, frameMsgBuf[i])
+  add(s, "\n")
+
+proc `$`(stackTraceEntries: seq[StackTraceEntry]): string =
+  when defined(nimStackTraceOverride):
+    let s = addDebuggingInfo(stackTraceEntries)
+  else:
+    let s = stackTraceEntries
+
+  result = newStringOfCap(2000)
+  for i in 0 .. s.len-1:
+    if s[i].line == reraisedFromBegin: result.add "[[reraised from:\n"
+    elif s[i].line == reraisedFromEnd: result.add "]]\n"
+    else: addFrameEntry(result, s[i])
 
 when hasSomeStackTrace:
+
+  proc auxWriteStackTrace(f: PFrame, s: var string) =
+    when hasThreadSupport:
+      var
+        tempFrames: array[maxStackTraceLines, PFrame] # but better than a threadvar
+    const
+      firstCalls = 32
+    var
+      it = f
+      i = 0
+      total = 0
+    # setup long head:
+    while it != nil and i <= high(tempFrames)-firstCalls:
+      tempFrames[i] = it
+      inc(i)
+      inc(total)
+      it = it.prev
+    # go up the stack to count 'total':
+    var b = it
+    while it != nil:
+      inc(total)
+      it = it.prev
+    var skipped = 0
+    if total > len(tempFrames):
+      # skip N
+      skipped = total-i-firstCalls+1
+      for j in 1..skipped:
+        if b != nil: b = b.prev
+      # create '...' entry:
+      tempFrames[i] = nil
+      inc(i)
+    # setup short tail:
+    while b != nil and i <= high(tempFrames):
+      tempFrames[i] = b
+      inc(i)
+      b = b.prev
+    for j in countdown(i-1, 0):
+      if tempFrames[j] == nil:
+        add(s, "(")
+        add(s, $skipped)
+        add(s, " calls omitted) ...\n")
+      else:
+        addFrameEntry(s, tempFrames[j])
+
+  proc stackTraceAvailable*(): bool
+
   proc rawWriteStackTrace(s: var string) =
-    when nimrodStackTrace:
+    when defined(nimStackTraceOverride):
+      add(s, "Traceback (most recent call last, using override)\n")
+      auxWriteStackTraceWithOverride(s)
+    elif NimStackTrace:
       if framePtr == nil:
-        add(s, "No stack traceback available")
-        add(s, stackTraceNL)
+        add(s, noStacktraceAvailable)
       else:
-        add(s, "Traceback (most recent call last)")
-        add(s, stackTraceNL)
+        add(s, "Traceback (most recent call last)\n")
         auxWriteStackTrace(framePtr, s)
     elif defined(nativeStackTrace) and nativeStackTraceSupported:
-      add(s, "Traceback from system (most recent call last)")
-      add(s, stackTraceNL)
+      add(s, "Traceback from system (most recent call last)\n")
       auxWriteStackTraceWithBacktrace(s)
     else:
-      add(s, "No stack traceback available\n")
+      add(s, noStacktraceAvailable)
+
+  proc rawWriteStackTrace(s: var seq[StackTraceEntry]) =
+    when defined(nimStackTraceOverride):
+      auxWriteStackTraceWithOverride(s)
+    elif NimStackTrace:
+      auxWriteStackTrace(framePtr, s)
+    else:
+      s = @[]
 
-proc quitOrDebug() {.inline.} =
-  when not defined(endb):
-    quit(1)
+  proc stackTraceAvailable(): bool =
+    when defined(nimStackTraceOverride):
+      result = true
+    elif NimStackTrace:
+      if framePtr == nil:
+        result = false
+      else:
+        result = true
+    elif defined(nativeStackTrace) and nativeStackTraceSupported:
+      result = true
+    else:
+      result = false
+else:
+  proc stackTraceAvailable*(): bool = result = false
+
+var onUnhandledException*: (proc (errorMsg: string) {.
+  nimcall, gcsafe.}) ## Set this error \
+  ## handler to override the existing behaviour on an unhandled exception.
+  ##
+  ## The default is to write a stacktrace to `stderr` and then call `quit(1)`.
+  ## Unstable API.
+
+proc reportUnhandledErrorAux(e: ref Exception) {.nodestroy, gcsafe.} =
+  when hasSomeStackTrace:
+    var buf = newStringOfCap(2000)
+    if e.trace.len == 0:
+      rawWriteStackTrace(buf)
+    else:
+      var trace = $e.trace
+      add(buf, trace)
+      {.gcsafe.}:
+        `=destroy`(trace)
+    add(buf, "Error: unhandled exception: ")
+    add(buf, e.msg)
+    add(buf, " [")
+    add(buf, $e.name)
+    add(buf, "]\n")
+
+    if onUnhandledException != nil:
+      onUnhandledException(buf)
+    else:
+      showErrorMessage2(buf)
+    {.gcsafe.}:
+      `=destroy`(buf)
   else:
-    endbStep() # call the debugger
+    # ugly, but avoids heap allocations :-)
+    template xadd(buf, s, slen) =
+      if L + slen < high(buf):
+        copyMem(addr(buf[L]), (when s is cstring: s else: cstring(s)), slen)
+        inc L, slen
+    template add(buf, s) =
+      xadd(buf, s, s.len)
+    var buf: array[0..2000, char]
+    var L = 0
+    if e.trace.len != 0:
+      var trace = $e.trace
+      add(buf, trace)
+      {.gcsafe.}:
+        `=destroy`(trace)
+    add(buf, "Error: unhandled exception: ")
+    add(buf, e.msg)
+    add(buf, " [")
+    xadd(buf, e.name, e.name.len)
+    add(buf, "]\n")
+    if onUnhandledException != nil:
+      onUnhandledException($cast[cstring](buf.addr))
+    else:
+      showErrorMessage(cast[cstring](buf.addr), L)
+
+proc reportUnhandledError(e: ref Exception) {.nodestroy, gcsafe.} =
+  if unhandledExceptionHook != nil:
+    unhandledExceptionHook(e)
+  when hostOS != "any":
+    reportUnhandledErrorAux(e)
+
+when not gotoBasedExceptions:
+  proc nimLeaveFinally() {.compilerRtl.} =
+    when defined(cpp) and not defined(noCppExceptions) and not gotoBasedExceptions:
+      {.emit: "throw;".}
+    else:
+      if excHandler != nil:
+        c_longjmp(excHandler.context, 1)
+      else:
+        reportUnhandledError(currException)
+        rawQuit(1)
+
+when gotoBasedExceptions:
+  var nimInErrorMode {.threadvar.}: bool
+
+  proc nimErrorFlag(): ptr bool {.compilerRtl, inl.} =
+    result = addr(nimInErrorMode)
+
+  proc nimTestErrorFlag() {.compilerRtl.} =
+    ## This proc must be called before `currException` is destroyed.
+    ## It also must be called at the end of every thread to ensure no
+    ## error is swallowed.
+    if nimInErrorMode and currException != nil:
+      reportUnhandledError(currException)
+      currException = nil
+      rawQuit(1)
+
+proc raiseExceptionAux(e: sink(ref Exception)) {.nodestroy.} =
+  when defined(nimPanics):
+    if e of Defect:
+      reportUnhandledError(e)
+      rawQuit(1)
 
-proc raiseExceptionAux(e: ref E_Base) =
   if localRaiseHook != nil:
     if not localRaiseHook(e): return
   if globalRaiseHook != nil:
     if not globalRaiseHook(e): return
-  if excHandler != nil:
-    if not excHandler.hasRaiseAction or excHandler.raiseAction(e):
+  when defined(cpp) and not defined(noCppExceptions) and not gotoBasedExceptions:
+    if e == currException:
+      {.emit: "throw;".}
+    else:
       pushCurrentException(e)
-      c_longjmp(excHandler.context, 1)
-  elif e[] of EOutOfMemory:
-    writeToStdErr(e.name)
-    quitOrDebug()
+      {.emit: "throw `e`;".}
+  elif quirkyExceptions or gotoBasedExceptions:
+    pushCurrentException(e)
+    when gotoBasedExceptions:
+      inc nimInErrorMode
   else:
-    when hasSomeStackTrace:
-      var buf = newStringOfCap(2000)
-      if isNil(e.trace): rawWriteStackTrace(buf)
-      else: add(buf, e.trace)
-      add(buf, "Error: unhandled exception: ")
-      if not isNil(e.msg): add(buf, e.msg)
-      add(buf, " [")
-      add(buf, $e.name)
-      add(buf, "]\n")
-      writeToStdErr(buf)
+    if excHandler != nil:
+      pushCurrentException(e)
+      c_longjmp(excHandler.context, 1)
     else:
-      # ugly, but avoids heap allocations :-)
-      template xadd(buf, s, slen: expr) =
-        if L + slen < high(buf):
-          copyMem(addr(buf[L]), cstring(s), slen)
-          inc L, slen
-      template add(buf, s: expr) =
-        xadd(buf, s, s.len)
-      var buf: array [0..2000, char]
-      var L = 0
-      add(buf, "Error: unhandled exception: ")
-      if not isNil(e.msg): add(buf, e.msg)
-      add(buf, " [")
-      xadd(buf, e.name, c_strlen(e.name))
-      add(buf, "]\n")
-      writeToStdErr(buf)
-    quitOrDebug()
-
-proc raiseException(e: ref E_Base, ename: CString) {.compilerRtl.} =
-  e.name = ename
+      reportUnhandledError(e)
+      rawQuit(1)
+
+proc raiseExceptionEx(e: sink(ref Exception), ename, procname, filename: cstring,
+                      line: int) {.compilerRtl, nodestroy.} =
+  if e.name.isNil: e.name = ename
   when hasSomeStackTrace:
-    e.trace = ""
-    rawWriteStackTrace(e.trace)
+    when defined(nimStackTraceOverride):
+      if e.trace.len == 0:
+        rawWriteStackTrace(e.trace)
+      else:
+        e.trace.add reraisedFrom(reraisedFromBegin)
+        auxWriteStackTraceWithOverride(e.trace)
+        e.trace.add reraisedFrom(reraisedFromEnd)
+    elif NimStackTrace:
+      if e.trace.len == 0:
+        rawWriteStackTrace(e.trace)
+      elif framePtr != nil:
+        e.trace.add reraisedFrom(reraisedFromBegin)
+        auxWriteStackTrace(framePtr, e.trace)
+        e.trace.add reraisedFrom(reraisedFromEnd)
+  else:
+    if procname != nil and filename != nil:
+      e.trace.add StackTraceEntry(procname: procname, filename: filename, line: line)
   raiseExceptionAux(e)
 
+proc raiseException(e: sink(ref Exception), ename: cstring) {.compilerRtl.} =
+  raiseExceptionEx(e, ename, nil, nil, 0)
+
 proc reraiseException() {.compilerRtl.} =
   if currException == nil:
-    raise newException(ENoExceptionToReraise, "no exception to reraise")
+    sysFatal(ReraiseDefect, "no exception to reraise")
   else:
-    raiseExceptionAux(currException)
+    when gotoBasedExceptions:
+      inc nimInErrorMode
+    else:
+      raiseExceptionAux(currException)
+
+proc threadTrouble() =
+  # also forward declared, it is 'raises: []' hence the try-except.
+  try:
+    if currException != nil: reportUnhandledError(currException)
+  except:
+    discard
+  rawQuit 1
 
-proc WriteStackTrace() =
+proc writeStackTrace() =
   when hasSomeStackTrace:
     var s = ""
     rawWriteStackTrace(s)
-    writeToStdErr(s)
   else:
-    writeToStdErr("No stack traceback available\n")
+    let s = noStacktraceAvailable
+  cast[proc (s: string) {.noSideEffect, tags: [], nimcall, raises: [].}](showErrorMessage2)(s)
 
 proc getStackTrace(): string =
   when hasSomeStackTrace:
     result = ""
     rawWriteStackTrace(result)
   else:
-    result = "No stack traceback available\n"
+    result = noStacktraceAvailable
 
-proc getStackTrace(e: ref E_Base): string =
-  if not isNil(e) and not isNil(e.trace):
-    result = e.trace
+proc getStackTrace(e: ref Exception): string =
+  if not isNil(e):
+    result = $e.trace
   else:
     result = ""
 
-when defined(endb):
-  var
-    dbgAborting: bool # whether the debugger wants to abort
-
-proc signalHandler(sig: cint) {.exportc: "signalHandler", noconv.} =
-  template processSignal(s, action: expr) {.immediate.} =
-    if s == SIGINT: action("SIGINT: Interrupted by Ctrl-C.\n")
-    elif s == SIGSEGV: 
-      action("SIGSEGV: Illegal storage access. (Attempt to read from nil?)\n")
-    elif s == SIGABRT:
-      when defined(endb):
-        if dbgAborting: return # the debugger wants to abort
-      action("SIGABRT: Abnormal termination.\n")
-    elif s == SIGFPE: action("SIGFPE: Arithmetic error.\n")
-    elif s == SIGILL: action("SIGILL: Illegal operation.\n")
-    elif s == SIGBUS: 
-      action("SIGBUS: Illegal storage access. (Attempt to read from nil?)\n")
-    else: action("unknown signal\n")
-
-  # print stack trace and quit
-  when hasSomeStackTrace:
-    GC_disable()
-    var buf = newStringOfCap(2000)
-    rawWriteStackTrace(buf)
-    processSignal(sig, buf.add) # nice hu? currying a la nimrod :-)
-    writeToStdErr(buf)
-    GC_enable()
-  else:
-    var msg: cstring
-    template asgn(y: expr) = msg = y
-    processSignal(sig, asgn)
-    writeToStdErr(msg)
-  when defined(endb): dbgAborting = True
-  quit(1) # always quit when SIGABRT
-
-proc registerSignalHandler() =
-  c_signal(SIGINT, signalHandler)
-  c_signal(SIGSEGV, signalHandler)
-  c_signal(SIGABRT, signalHandler)
-  c_signal(SIGFPE, signalHandler)
-  c_signal(SIGILL, signalHandler)
-  c_signal(SIGBUS, signalHandler)
-
-when not defined(noSignalHandler):
-  registerSignalHandler() # call it in initialization section
-
-proc setControlCHook(hook: proc () {.noconv.}) =
-  # ugly cast, but should work on all architectures:
-  type TSignalHandler = proc (sig: cint) {.noconv.}
-  c_signal(SIGINT, cast[TSignalHandler](hook))
+proc getStackTraceEntries*(e: ref Exception): lent seq[StackTraceEntry] =
+  ## Returns the attached stack trace to the exception `e` as
+  ## a `seq`. This is not yet available for the JS backend.
+  e.trace
 
-proc raiseRangeError(val: biggestInt) {.compilerproc, noreturn, noinline.} =
-  raise newException(EOutOfRange, "value " & $val & " out of range")
+proc getStackTraceEntries*(): seq[StackTraceEntry] =
+  ## Returns the stack trace entries for the current stack trace.
+  ## This is not yet available for the JS backend.
+  when hasSomeStackTrace:
+    rawWriteStackTrace(result)
 
-proc raiseIndexError() {.compilerproc, noreturn, noinline.} =
-  raise newException(EInvalidIndex, "index out of bounds")
+const nimCallDepthLimit {.intdefine.} = 2000
+
+proc callDepthLimitReached() {.noinline.} =
+  writeStackTrace()
+  let msg = "Error: call depth limit reached in a debug build (" &
+      $nimCallDepthLimit & " function calls). You can change it with " &
+      "-d:nimCallDepthLimit=<int> but really try to avoid deep " &
+      "recursions instead.\n"
+  showErrorMessage2(msg)
+  rawQuit(1)
+
+proc nimFrame(s: PFrame) {.compilerRtl, inl, raises: [].} =
+  if framePtr == nil:
+    s.calldepth = 0
+    when NimStackTraceMsgs: s.frameMsgLen = 0
+  else:
+    s.calldepth = framePtr.calldepth+1
+    when NimStackTraceMsgs: s.frameMsgLen = framePtr.frameMsgLen
+  s.prev = framePtr
+  framePtr = s
+  if s.calldepth == nimCallDepthLimit: callDepthLimitReached()
 
-proc raiseFieldError(f: string) {.compilerproc, noreturn, noinline.} =
-  raise newException(EInvalidField, f & " is not accessible")
+when defined(cpp) and appType != "lib" and not gotoBasedExceptions and
+    not defined(js) and not defined(nimscript) and
+    hostOS != "standalone" and hostOS != "any" and not defined(noCppExceptions) and
+    not quirkyExceptions:
 
-proc chckIndx(i, a, b: int): int =
-  if i >= a and i <= b:
-    return i
-  else:
-    raiseIndexError()
+  type
+    StdException {.importcpp: "std::exception", header: "<exception>".} = object
+
+  proc what(ex: StdException): cstring {.importcpp: "((char *)#.what())", nodecl.}
+
+  proc setTerminate(handler: proc() {.noconv.})
+    {.importc: "std::set_terminate", header: "<exception>".}
+
+  setTerminate proc() {.noconv.} =
+    # Remove ourself as a handler, reinstalling the default handler.
+    setTerminate(nil)
+
+    var msg = "Unknown error in unexpected exception handler"
+    try:
+      {.emit: "#if !defined(_MSC_VER) || (_MSC_VER >= 1923)".}
+      raise
+      {.emit: "#endif".}
+    except Exception:
+      msg = currException.getStackTrace() & "Error: unhandled exception: " &
+        currException.msg & " [" & $currException.name & "]"
+    except StdException as e:
+      msg = "Error: unhandled cpp exception: " & $e.what()
+    except:
+      msg = "Error: unhandled unknown cpp exception"
+
+    {.emit: "#if defined(_MSC_VER) && (_MSC_VER < 1923)".}
+    msg = "Error: unhandled unknown cpp exception"
+    {.emit: "#endif".}
+
+    when defined(genode):
+      # stderr not available by default, use the LOG session
+      echo msg
+    else:
+      writeToStdErr msg & "\n"
+
+    rawQuit 1
+
+when not defined(noSignalHandler) and not defined(useNimRtl):
+  type Sighandler = proc (a: cint) {.noconv, benign.}
+    # xxx factor with ansi_c.CSighandlerT, posix.Sighandler
+
+  proc signalHandler(sign: cint) {.exportc: "signalHandler", noconv.} =
+    template processSignal(s, action: untyped) {.dirty.} =
+      if s == SIGINT: action("SIGINT: Interrupted by Ctrl-C.\n")
+      elif s == SIGSEGV:
+        action("SIGSEGV: Illegal storage access. (Attempt to read from nil?)\n")
+      elif s == SIGABRT:
+        action("SIGABRT: Abnormal termination.\n")
+      elif s == SIGFPE: action("SIGFPE: Arithmetic error.\n")
+      elif s == SIGILL: action("SIGILL: Illegal operation.\n")
+      elif (when declared(SIGBUS): s == SIGBUS else: false):
+        action("SIGBUS: Illegal storage access. (Attempt to read from nil?)\n")
+      else:
+        block platformSpecificSignal:
+          when declared(SIGPIPE):
+            if s == SIGPIPE:
+              action("SIGPIPE: Pipe closed.\n")
+              break platformSpecificSignal
+          action("unknown signal\n")
+
+    # print stack trace and quit
+    when defined(memtracker):
+      logPendingOps()
+    when hasSomeStackTrace:
+      when not usesDestructors: GC_disable()
+      var buf = newStringOfCap(2000)
+      rawWriteStackTrace(buf)
+      processSignal(sign, buf.add) # nice hu? currying a la Nim :-)
+      showErrorMessage2(buf)
+      when not usesDestructors: GC_enable()
+    else:
+      var msg: cstring
+      template asgn(y) =
+        msg = y
+      processSignal(sign, asgn)
+      # xxx use string for msg instead of cstring, and here use showErrorMessage2(msg)
+      # unless there's a good reason to use cstring in signal handler to avoid
+      # using gc?
+      showErrorMessage(msg, msg.len)
+
+    when defined(posix):
+      # reset the signal handler to OS default
+      c_signal(sign, SIG_DFL)
+
+      # re-raise the signal, which will arrive once this handler exit.
+      # this lets the OS perform actions like core dumping and will
+      # also return the correct exit code to the shell.
+      discard c_raise(sign)
+    else:
+      rawQuit(1)
+
+  var SIG_IGN {.importc: "SIG_IGN", header: "<signal.h>".}: Sighandler
+
+  proc registerSignalHandler() =
+    # xxx `signal` is deprecated and has many caveats, we should use `sigaction` instead, e.g.
+    # https://stackoverflow.com/questions/231912/what-is-the-difference-between-sigaction-and-signal
+    c_signal(SIGINT, signalHandler)
+    c_signal(SIGSEGV, signalHandler)
+    c_signal(SIGABRT, signalHandler)
+    c_signal(SIGFPE, signalHandler)
+    c_signal(SIGILL, signalHandler)
+    when declared(SIGBUS):
+      c_signal(SIGBUS, signalHandler)
+    when declared(SIGPIPE):
+      when defined(nimLegacySigpipeHandler):
+        c_signal(SIGPIPE, signalHandler)
+      else:
+        c_signal(SIGPIPE, SIG_IGN)
 
-proc chckRange(i, a, b: int): int =
-  if i >= a and i <= b:
-    return i
-  else:
-    raiseRangeError(i)
+  registerSignalHandler() # call it in initialization section
 
-proc chckRange64(i, a, b: int64): int64 {.compilerproc.} =
-  if i >= a and i <= b:
-    return i
-  else:
-    raiseRangeError(i)
+proc setControlCHook(hook: proc () {.noconv.}) =
+  # ugly cast, but should work on all architectures:
+  when declared(Sighandler):
+    c_signal(SIGINT, cast[Sighandler](hook))
 
-proc chckRangeF(x, a, b: float): float =
-  if x >= a and x <= b:
-    return x
-  else:
-    raise newException(EOutOfRange, "value " & $x & " out of range")
-
-proc chckNil(p: pointer) =
-  if p == nil: c_raise(SIGSEGV)
-
-proc chckObj(obj, subclass: PNimType) {.compilerproc.} =
-  # checks if obj is of type subclass:
-  var x = obj
-  if x == subclass: return # optimized fast path
-  while x != subclass:
-    if x == nil:
-      raise newException(EInvalidObjectConversion, "invalid object conversion")
-    x = x.base
-
-proc chckObjAsgn(a, b: PNimType) {.compilerproc, inline.} =
-  if a != b:
-    raise newException(EInvalidObjectAssignment, "invalid object assignment")
-
-proc isObj(obj, subclass: PNimType): bool {.compilerproc.} =
-  # checks if obj is of type subclass:
-  var x = obj
-  if x == subclass: return true # optimized fast path
-  while x != subclass:
-    if x == nil: return false
-    x = x.base
-  return true
+when not defined(noSignalHandler) and not defined(useNimRtl):
+  proc unsetControlCHook() =
+    # proc to unset a hook set by setControlCHook
+    c_signal(SIGINT, signalHandler)
diff --git a/lib/system/fatal.nim b/lib/system/fatal.nim
new file mode 100644
index 000000000..25c05e52d
--- /dev/null
+++ b/lib/system/fatal.nim
@@ -0,0 +1,58 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2019 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+{.push profiler: off.}
+
+const
+  gotoBasedExceptions = compileOption("exceptions", "goto")
+  quirkyExceptions = compileOption("exceptions", "quirky")
+
+when hostOS == "standalone":
+  include "$projectpath/panicoverride"
+
+  func sysFatal(exceptn: typedesc[Defect], message: string) {.inline.} =
+    panic(message)
+
+  func sysFatal(exceptn: typedesc[Defect], message, arg: string) {.inline.} =
+    rawoutput(message)
+    panic(arg)
+
+elif quirkyExceptions and not defined(nimscript):
+  import ansi_c
+
+  func name(t: typedesc): string {.magic: "TypeTrait".}
+
+  func sysFatal(exceptn: typedesc[Defect], message, arg: string) {.inline, noreturn.} =
+    when nimvm:
+      # TODO when doAssertRaises works in CT, add a test for it
+      raise (ref exceptn)(msg: message & arg)
+    else:
+      {.noSideEffect.}:
+        writeStackTrace()
+        var buf = newStringOfCap(200)
+        add(buf, "Error: unhandled exception: ")
+        add(buf, message)
+        add(buf, arg)
+        add(buf, " [")
+        add(buf, name exceptn)
+        add(buf, "]\n")
+        cstderr.rawWrite buf
+      rawQuit 1
+
+  func sysFatal(exceptn: typedesc[Defect], message: string) {.inline, noreturn.} =
+    sysFatal(exceptn, message, "")
+
+else:
+  func sysFatal(exceptn: typedesc[Defect], message: string) {.inline, noreturn.} =
+    raise (ref exceptn)(msg: message)
+
+  func sysFatal(exceptn: typedesc[Defect], message, arg: string) {.inline, noreturn.} =
+    raise (ref exceptn)(msg: message & arg)
+
+{.pop.}
diff --git a/lib/system/formatfloat.nim b/lib/system/formatfloat.nim
new file mode 100644
index 000000000..70dd857d5
--- /dev/null
+++ b/lib/system/formatfloat.nim
@@ -0,0 +1,6 @@
+when not defined(nimPreviewSlimSystem):
+  import std/formatfloat
+  export formatfloat
+  {.deprecated: "use `std/formatfloat`".}
+else:
+  {.error: "use `std/formatfloat`".}
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index 0ab5f4d94..9289c7f55 100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -1,7 +1,7 @@
 #
 #
-#            Nimrod's Runtime Library
-#        (c) Copyright 2013 Andreas Rumpf
+#            Nim's Runtime Library
+#        (c) Copyright 2016 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -9,27 +9,76 @@
 
 #            Garbage Collector
 #
-# The basic algorithm is *Deferrent Reference Counting* with cycle detection.
-# This is achieved by combining a Deutsch-Bobrow garbage collector
-# together with Christoper's partial mark-sweep garbage collector.
-#
-# Special care has been taken to avoid recursion as far as possible to avoid
-# stack overflows when traversing deep datastructures. It is well-suited
-# for soft real time applications (like games).
+# Refcounting + Mark&Sweep. Complex algorithms avoided.
+# Been there, done that, didn't work.
+
+#[
+
+A *cell* is anything that is traced by the GC
+(sequences, refs, strings, closures).
+
+The basic algorithm is *Deferrent Reference Counting* with cycle detection.
+References on the stack are not counted for better performance and easier C
+code generation.
+
+Each cell has a header consisting of a RC and a pointer to its type
+descriptor. However the program does not know about these, so they are placed at
+negative offsets. In the GC code the type `PCell` denotes a pointer
+decremented by the right offset, so that the header can be accessed easily. It
+is extremely important that `pointer` is not confused with a `PCell`.
+
+In Nim the compiler cannot always know if a reference
+is stored on the stack or not. This is caused by var parameters.
+Consider this example:
+
+  ```Nim
+  proc setRef(r: var ref TNode) =
+    new(r)
+
+  proc usage =
+    var
+      r: ref TNode
+    setRef(r) # here we should not update the reference counts, because
+              # r is on the stack
+    setRef(r.left) # here we should update the refcounts!
+  ```
+
+We have to decide at runtime whether the reference is on the stack or not.
+The generated code looks roughly like this:
+
+  ```C
+  void setref(TNode** ref) {
+    unsureAsgnRef(ref, newObj(TNode_TI, sizeof(TNode)))
+  }
+  void usage(void) {
+    setRef(&r)
+    setRef(&r->left)
+  }
+  ```
+
+Note that for systems with a continuous stack (which most systems have)
+the check whether the ref is on the stack is very cheap (only two
+comparisons).
+]#
+
 {.push profiler:off.}
 
 const
   CycleIncrease = 2 # is a multiplicative increase
-  InitialCycleThreshold = 4*1024*1024 # X MB because cycle checking is slow
-  ZctThreshold = 500  # we collect garbage if the ZCT's size
-                      # reaches this threshold
-                      # this seems to be a good value
+  InitialCycleThreshold = when defined(nimCycleBreaker): high(int)
+                          else: 4*1024*1024 # X MB because cycle checking is slow
+  InitialZctThreshold = 500  # we collect garbage if the ZCT's size
+                             # reaches this threshold
+                             # this seems to be a good value
   withRealTime = defined(useRealtimeGC)
 
-when withRealTime and not defined(getTicks):
+when withRealTime and not declared(getTicks):
   include "system/timers"
 when defined(memProfiler):
-  proc nimProfile(requestedSize: int)
+  proc nimProfile(requestedSize: int) {.benign.}
+
+when hasThreadSupport:
+  import std/sharedlist
 
 const
   rcIncrement = 0b1000 # so that lowest 3 bits are not touched
@@ -41,188 +90,198 @@ const
   rcShift = 3      # shift by rcShift to get the reference counter
   colorMask = 0b011
 type
-  TWalkOp = enum
-    waZctDecRef, waPush, waCycleDecRef, waMarkGray, waScan, waScanBlack, 
-    waCollectWhite
+  WalkOp = enum
+    waMarkGlobal,    # part of the backup/debug mark&sweep
+    waMarkPrecise,   # part of the backup/debug mark&sweep
+    waZctDecRef, waPush
+    #, waDebug
 
-  TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall.}
+  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign, raises: [].}
     # A ref type can have a finalizer that is called before the object's
     # storage is freed.
 
-  TGcStat {.final, pure.} = object
+  GcStat {.final, pure.} = object
     stackScans: int          # number of performed stack scans (for statistics)
     cycleCollections: int    # number of performed full collections
     maxThreshold: int        # max threshold that has been set
     maxStackSize: int        # max stack size
     maxStackCells: int       # max stack cells in ``decStack``
-    cycleTableSize: int      # max entries in cycle table  
+    cycleTableSize: int      # max entries in cycle table
     maxPause: int64          # max measured GC pause in nanoseconds
-  
-  TGcHeap {.final, pure.} = object # this contains the zero count and
-                                   # non-zero count table
-    stackBottom: pointer
+
+  GcStack {.final, pure.} = object
+    when nimCoroutines:
+      prev: ptr GcStack
+      next: ptr GcStack
+      maxStackSize: int      # Used to track statistics because we can not use
+                             # GcStat.maxStackSize when multiple stacks exist.
+    bottom: pointer
+
+    when withRealTime or nimCoroutines:
+      pos: pointer           # Used with `withRealTime` only for code clarity, see GC_Step().
+    when withRealTime:
+      bottomSaved: pointer
+
+  GcHeap {.final, pure.} = object # this contains the zero count and
+                                  # non-zero count table
+    stack: GcStack
+    when nimCoroutines:
+      activeStack: ptr GcStack    # current executing coroutine stack.
     cycleThreshold: int
-    zct: TCellSeq            # the zero count table
-    decStack: TCellSeq       # cells in the stack that are to decref again
-    cycleRoots: TCellSet
-    tempStack: TCellSeq      # temporary stack for recursion elimination
+    zctThreshold: int
+    when useCellIds:
+      idGenerator: int
+    zct: CellSeq             # the zero count table
+    decStack: CellSeq        # cells in the stack that are to decref again
+    tempStack: CellSeq       # temporary stack for recursion elimination
     recGcLock: int           # prevent recursion via finalizers; no thread lock
     when withRealTime:
-      maxPause: TNanos       # max allowed pause in nanoseconds; active if > 0
-    region: TMemRegion       # garbage collected region
-    stat: TGcStat
+      maxPause: Nanos        # max allowed pause in nanoseconds; active if > 0
+    region: MemRegion        # garbage collected region
+    stat: GcStat
+    marked: CellSet
+    additionalRoots: CellSeq # dummy roots for GC_ref/unref
+    when hasThreadSupport:
+      toDispose: SharedList[pointer]
+    gcThreadId: int
 
 var
-  gch {.rtlThreadVar.}: TGcHeap
+  gch {.rtlThreadVar.}: GcHeap
 
 when not defined(useNimRtl):
-  InstantiateForRegion(gch.region)
-
-template acquire(gch: TGcHeap) = 
-  when hasThreadSupport and hasSharedHeap:
-    AcquireSys(HeapLock)
-
-template release(gch: TGcHeap) = 
-  when hasThreadSupport and hasSharedHeap:
-    releaseSys(HeapLock)
+  instantiateForRegion(gch.region)
 
 template gcAssert(cond: bool, msg: string) =
   when defined(useGcAssert):
     if not cond:
-      echo "[GCASSERT] ", msg
-      quit 1
-
-proc addZCT(s: var TCellSeq, c: PCell) {.noinline.} =
+      cstderr.rawWrite "[GCASSERT] "
+      cstderr.rawWrite msg
+      when defined(logGC):
+        cstderr.rawWrite "[GCASSERT] statistics:\L"
+        cstderr.rawWrite GC_getStatistics()
+      GC_disable()
+      writeStackTrace()
+      #var x: ptr int
+      #echo x[]
+      rawQuit 1
+
+proc addZCT(s: var CellSeq, c: PCell) {.noinline.} =
   if (c.refcount and ZctFlag) == 0:
     c.refcount = c.refcount or ZctFlag
     add(s, c)
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[TAddress](cell)+%TAddress(sizeof(TCell)))
+  result = cast[pointer](cast[int](cell)+%ByteAddress(sizeof(Cell)))
 
 proc usrToCell(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[TAddress](usr)-%TAddress(sizeof(TCell)))
-
-proc canbeCycleRoot(c: PCell): bool {.inline.} =
-  result = ntfAcyclic notin c.typ.flags
+  result = cast[PCell](cast[int](usr)-%ByteAddress(sizeof(Cell)))
 
 proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
   # used for code generation concerning debugging
   result = usrToCell(c).typ
 
 proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
-  result = int(usrToCell(p).refcount) shr rcShift
+  result = usrToCell(p).refcount shr rcShift
 
 # this that has to equals zero, otherwise we have to round up UnitsPerPage:
 when BitsPerPage mod (sizeof(int)*8) != 0:
   {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
 
-template color(c): expr = c.refCount and colorMask
+template color(c): untyped = c.refCount and colorMask
 template setColor(c, col) =
   when col == rcBlack:
-    c.refcount = c.refCount and not colorMask
+    c.refcount = c.refcount and not colorMask
   else:
-    c.refcount = c.refCount and not colorMask or col
+    c.refcount = c.refcount and not colorMask or col
+
+when defined(logGC):
+  proc writeCell(msg: cstring, c: PCell) =
+    var kind = -1
+    var typName: cstring = "nil"
+    if c.typ != nil:
+      kind = ord(c.typ.kind)
+      when defined(nimTypeNames):
+        if not c.typ.name.isNil:
+          typName = c.typ.name
+
+    when leakDetector:
+      c_printf("[GC] %s: %p %d %s rc=%ld from %s(%ld)\n",
+                msg, c, kind, typName, c.refcount shr rcShift, c.filename, c.line)
+    else:
+      c_printf("[GC] %s: %p %d %s rc=%ld; thread=%ld\n",
+                msg, c, kind, typName, c.refcount shr rcShift, gch.gcThreadId)
 
-proc writeCell(msg: CString, c: PCell) =
-  var kind = -1
-  if c.typ != nil: kind = ord(c.typ.kind)
-  when leakDetector:
-    c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n",
-              msg, c, kind, c.refcount shr rcShift, c.filename, c.line)
-  else:
-    c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld; color=%ld\n",
-              msg, c, kind, c.refcount shr rcShift, c.color)
+template logCell(msg: cstring, c: PCell) =
+  when defined(logGC):
+    writeCell(msg, c)
 
-template gcTrace(cell, state: expr): stmt {.immediate.} =
+template gcTrace(cell, state: untyped) =
   when traceGC: traceCell(cell, state)
 
 # forward declarations:
-proc collectCT(gch: var TGcHeap)
-proc IsOnStack*(p: pointer): bool {.noinline.}
-proc forAllChildren(cell: PCell, op: TWalkOp)
-proc doOperation(p: pointer, op: TWalkOp)
-proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp)
+proc collectCT(gch: var GcHeap) {.benign, raises: [].}
+proc isOnStack(p: pointer): bool {.noinline, benign, raises: [].}
+proc forAllChildren(cell: PCell, op: WalkOp) {.benign, raises: [].}
+proc doOperation(p: pointer, op: WalkOp) {.benign, raises: [].}
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign, raises: [].}
 # we need the prototype here for debugging purposes
 
-when hasThreadSupport and hasSharedHeap:
-  template `--`(x: expr): expr = atomicDec(x, rcIncrement) <% rcIncrement
-  template `++`(x: expr): stmt = discard atomicInc(x, rcIncrement)
-else:
-  template `--`(x: expr): expr = 
-    Dec(x, rcIncrement)
-    x <% rcIncrement
-  template `++`(x: expr): stmt = Inc(x, rcIncrement)
-
-proc prepareDealloc(cell: PCell) =
-  if cell.typ.finalizer != nil:
-    # the finalizer could invoke something that
-    # allocates memory; this could trigger a garbage
-    # collection. Since we are already collecting we
-    # prevend recursive entering here by a lock.
-    # XXX: we should set the cell's children to nil!
-    inc(gch.recGcLock)
-    (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
-    dec(gch.recGcLock)
+proc incRef(c: PCell) {.inline.} =
+  gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
+  c.refcount = c.refcount +% rcIncrement
+  # and not colorMask
+  logCell("incRef", c)
 
-proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} = 
-  # we MUST access gch as a global here, because this crosses DLL boundaries!
-  when hasThreadSupport and hasSharedHeap:
-    AcquireSys(HeapLock)
-  if c.color != rcPurple:
-    c.setColor(rcPurple)
-    incl(gch.cycleRoots, c)
-  when hasThreadSupport and hasSharedHeap:
-    ReleaseSys(HeapLock)
+proc nimGCref(p: pointer) {.compilerproc.} =
+  # we keep it from being collected by pretending it's not even allocated:
+  let c = usrToCell(p)
+  add(gch.additionalRoots, c)
+  incRef(c)
 
 proc rtlAddZCT(c: PCell) {.rtl, inl.} =
   # we MUST access gch as a global here, because this crosses DLL boundaries!
-  when hasThreadSupport and hasSharedHeap:
-    AcquireSys(HeapLock)
   addZCT(gch.zct, c)
-  when hasThreadSupport and hasSharedHeap:
-    ReleaseSys(HeapLock)
 
 proc decRef(c: PCell) {.inline.} =
   gcAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
   gcAssert(c.refcount >=% rcIncrement, "decRef")
-  if --c.refcount:
+  c.refcount = c.refcount -% rcIncrement
+  if c.refcount <% rcIncrement:
     rtlAddZCT(c)
-  elif canBeCycleRoot(c):
-    # unfortunately this is necessary here too, because a cycle might just
-    # have been broken up and we could recycle it.
-    rtlAddCycleRoot(c)
-    #writeCell("decRef", c)
-
-proc incRef(c: PCell) {.inline.} = 
-  gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
-  c.refcount = c.refCount +% rcIncrement
-  # and not colorMask
-  #writeCell("incRef", c)
-  if canBeCycleRoot(c):
-    rtlAddCycleRoot(c)
-
-proc nimGCref(p: pointer) {.compilerProc, inline.} = incRef(usrToCell(p))
-proc nimGCunref(p: pointer) {.compilerProc, inline.} = decRef(usrToCell(p))
-
-proc GC_addCycleRoot*[T](p: ref T) {.inline.} =
-  ## adds 'p' to the cycle candidate set for the cycle collector. It is
-  ## necessary if you used the 'acyclic' pragma for optimization
-  ## purposes and need to break cycles manually.
-  rtlAddCycleRoot(usrToCell(cast[pointer](p)))
+  logCell("decRef", c)
+
+proc nimGCunref(p: pointer) {.compilerproc.} =
+  let cell = usrToCell(p)
+  var L = gch.additionalRoots.len-1
+  var i = L
+  let d = gch.additionalRoots.d
+  while i >= 0:
+    if d[i] == cell:
+      d[i] = d[L]
+      dec gch.additionalRoots.len
+      break
+    dec(i)
+  decRef(usrToCell(p))
+
+include gc_common
+
+template beforeDealloc(gch: var GcHeap; c: PCell; msg: typed) =
+  when false:
+    for i in 0..gch.decStack.len-1:
+      if gch.decStack.d[i] == c:
+        sysAssert(false, msg)
 
-proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} =
+proc nimGCunrefNoCycle(p: pointer) {.compilerproc, inline.} =
   sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle")
-  var c = usrToCell(p)
-  gcAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr")
-  if --c.refcount:
-    rtlAddZCT(c)
-    sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2")
+  decRef(usrToCell(p))
   sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5")
 
-proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} =
+proc nimGCunrefRC1(p: pointer) {.compilerproc, inline.} =
+  decRef(usrToCell(p))
+
+proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
   # the code generator calls this proc!
   gcAssert(not isOnStack(dest), "asgnRef")
   # BUGFIX: first incRef then decRef!
@@ -230,23 +289,14 @@ proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} =
   if dest[] != nil: decRef(usrToCell(dest[]))
   dest[] = src
 
-proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerProc, inline.} =
-  # the code generator calls this proc if it is known at compile time that no 
-  # cycle is possible.
-  if src != nil:
-    var c = usrToCell(src)
-    ++c.refcount
-  if dest[] != nil: 
-    var c = usrToCell(dest[])
-    if --c.refcount:
-      rtlAddZCT(c)
-  dest[] = src
+proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline,
+  deprecated: "old compiler compat".} = asgnRef(dest, src)
 
-proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerProc.} =
+proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc.} =
   # unsureAsgnRef updates the reference counters only if dest is not on the
-  # stack. It is used by the code generator if it cannot decide wether a
+  # stack. It is used by the code generator if it cannot decide whether a
   # reference is in the stack or not (this can happen for var parameters).
-  if not IsOnStack(dest):
+  if not isOnStack(dest):
     if src != nil: incRef(usrToCell(src))
     # XXX finally use assembler for the stack checking instead!
     # the test for '!= nil' is correct, but I got tired of the segfaults
@@ -254,15 +304,16 @@ proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerProc.} =
     if cast[int](dest[]) >=% PageSize: decRef(usrToCell(dest[]))
   else:
     # can't be an interior pointer if it's a stack location!
-    gcAssert(interiorAllocatedPtr(gch.region, dest) == nil, 
+    gcAssert(interiorAllocatedPtr(gch.region, dest) == nil,
              "stack loc AND interior pointer")
   dest[] = src
 
 proc initGC() =
   when not defined(useNimRtl):
     when traceGC:
-      for i in low(TCellState)..high(TCellState): init(states[i])
+      for i in low(CellState)..high(CellState): init(states[i])
     gch.cycleThreshold = InitialCycleThreshold
+    gch.zctThreshold = InitialZctThreshold
     gch.stat.stackScans = 0
     gch.stat.cycleCollections = 0
     gch.stat.maxThreshold = 0
@@ -272,11 +323,22 @@ proc initGC() =
     # init the rt
     init(gch.zct)
     init(gch.tempStack)
-    init(gch.cycleRoots)
     init(gch.decStack)
+    init(gch.marked)
+    init(gch.additionalRoots)
+    when hasThreadSupport:
+      init(gch.toDispose)
+    gch.gcThreadId = atomicInc(gHeapidGenerator) - 1
+    gcAssert(gch.gcThreadId >= 0, "invalid computed thread ID")
 
-proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
-  var d = cast[TAddress](dest)
+proc cellsetReset(s: var CellSet) =
+  deinit(s)
+  init(s)
+
+{.push stacktrace:off.}
+
+proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
+  var d = cast[int](dest)
   case n.kind
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
   of nkList:
@@ -284,9 +346,9 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
       # inlined for speed
       if n.sons[i].kind == nkSlot:
         if n.sons[i].typ.kind in {tyRef, tyString, tySequence}:
-          doOperation(cast[ppointer](d +% n.sons[i].offset)[], op)
+          doOperation(cast[PPointer](d +% n.sons[i].offset)[], op)
         else:
-          forAllChildrenAux(cast[pointer](d +% n.sons[i].offset), 
+          forAllChildrenAux(cast[pointer](d +% n.sons[i].offset),
                             n.sons[i].typ, op)
       else:
         forAllSlotsAux(dest, n.sons[i], op)
@@ -295,45 +357,45 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
     if m != nil: forAllSlotsAux(dest, m, op)
   of nkNone: sysAssert(false, "forAllSlotsAux")
 
-proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) =
-  var d = cast[TAddress](dest)
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
+  var d = cast[int](dest)
   if dest == nil: return # nothing to do
   if ntfNoRefs notin mt.flags:
-    case mt.Kind
+    case mt.kind
     of tyRef, tyString, tySequence: # leaf:
-      doOperation(cast[ppointer](d)[], op)
+      doOperation(cast[PPointer](d)[], op)
     of tyObject, tyTuple:
       forAllSlotsAux(dest, mt.node, op)
     of tyArray, tyArrayConstr, tyOpenArray:
       for i in 0..(mt.size div mt.base.size)-1:
         forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op)
-    else: nil
+    else: discard
 
-proc forAllChildren(cell: PCell, op: TWalkOp) =
-  gcAssert(cell != nil, "forAllChildren: 1")
-  gcAssert(cell.typ != nil, "forAllChildren: 2")
-  gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3"
+proc forAllChildren(cell: PCell, op: WalkOp) =
+  gcAssert(cell != nil, "forAllChildren: cell is nil")
+  gcAssert(isAllocatedPtr(gch.region, cell), "forAllChildren: pointer not part of the heap")
+  gcAssert(cell.typ != nil, "forAllChildren: cell.typ is nil")
+  gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: unknown GC'ed type"
   let marker = cell.typ.marker
   if marker != nil:
     marker(cellToUsr(cell), op.int)
   else:
-    case cell.typ.Kind
+    case cell.typ.kind
     of tyRef: # common case
       forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
     of tySequence:
-      var d = cast[TAddress](cellToUsr(cell))
+      var d = cast[int](cellToUsr(cell))
       var s = cast[PGenericSeq](d)
       if s != nil:
         for i in 0..s.len-1:
-          forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +%
-            GenericSeqSize), cell.typ.base, op)
-    else: nil
+          forAllChildrenAux(cast[pointer](d +% align(GenericSeqSize, cell.typ.base.align) +% i *% cell.typ.base.size), cell.typ.base, op)
+    else: discard
 
-proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
+proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} =
   # we check the last 8 entries (cache line) for a slot that could be reused.
   # In 63% of all cases we succeed here! But we have to optimize the heck
   # out of this small linear search so that ``newObj`` is not slowed down.
-  # 
+  #
   # Slots to try          cache hit
   # 1                     32%
   # 4                     59%
@@ -344,7 +406,7 @@ proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
   var d = gch.zct.d
   when true:
     # loop unrolled for performance:
-    template replaceZctEntry(i: expr) =
+    template replaceZctEntry(i: untyped) =
       c = d[i]
       if c.refcount >=% rcIncrement:
         c.refcount = c.refcount and not ZctFlag
@@ -373,121 +435,139 @@ proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
         return
     add(gch.zct, res)
 
-proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
+{.push stackTrace: off, profiler:off.}
+proc gcInvariant*() =
+  sysAssert(allocInv(gch.region), "injected")
+  when declared(markForDebug):
+    markForDebug(gch)
+{.pop.}
+
+template setFrameInfo(c: PCell) =
+  when leakDetector:
+    if framePtr != nil and framePtr.prev != nil:
+      c.filename = framePtr.prev.filename
+      c.line = framePtr.prev.line
+    else:
+      c.filename = nil
+      c.line = 0
+
+proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   # generates a new object and sets its reference counter to 0
-  acquire(gch)
+  incTypeSize typ, size
+  sysAssert(allocInv(gch.region), "rawNewObj begin")
   gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
   collectCT(gch)
-  sysAssert(allocInv(gch.region), "rawNewObj begin")
-  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
-  gcAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
+  #gcAssert typ.kind in {tyString, tySequence} or size >= typ.base.size, "size too small"
+  gcAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
-  when leakDetector and not hasThreadSupport:
-    if framePtr != nil and framePtr.prev != nil:
-      res.filename = framePtr.prev.filename
-      res.line = framePtr.prev.line
+  setFrameInfo(res)
   # refcount is zero, color is black, but mark it to be in the ZCT
   res.refcount = ZctFlag
   sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
   # its refcount is zero, so add it to the ZCT:
   addNewObjToZCT(res, gch)
-  when logGC: writeCell("new cell", res)
+  logCell("new cell", res)
+  track("rawNewObj", res, size)
   gcTrace(res, csAllocated)
-  release(gch)
+  when useCellIds:
+    inc gch.idGenerator
+    res.id = gch.idGenerator * 1000_000 + gch.gcThreadId
   result = cellToUsr(res)
   sysAssert(allocInv(gch.region), "rawNewObj end")
 
-{.pop.}
+{.pop.} # .stackTrace off
+{.pop.} # .profiler off
 
-proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
+proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  result = rawNewObj(typ, size, gch)
+  when defined(memProfiler): nimProfile(size)
+
+proc newObj(typ: PNimType, size: int): pointer {.compilerRtl, noinline.} =
   result = rawNewObj(typ, size, gch)
   zeroMem(result, size)
   when defined(memProfiler): nimProfile(size)
 
+{.push overflowChecks: on.}
 proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
   # `newObj` already uses locks, so no need for them here.
-  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
+  let size = align(GenericSeqSize, typ.base.align) + len * typ.base.size
   result = newObj(typ, size)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
   when defined(memProfiler): nimProfile(size)
+{.pop.}
 
-proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
+proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl, noinline.} =
   # generates a new object and sets its reference counter to 1
+  incTypeSize typ, size
   sysAssert(allocInv(gch.region), "newObjRC1 begin")
-  acquire(gch)
   gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
   collectCT(gch)
   sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")
-  
-  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
+
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
   sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
-  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  sysAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
-  when leakDetector and not hasThreadSupport:
-    if framePtr != nil and framePtr.prev != nil:
-      res.filename = framePtr.prev.filename
-      res.line = framePtr.prev.line
+  setFrameInfo(res)
   res.refcount = rcIncrement # refcount is 1
   sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
-  when logGC: writeCell("new cell", res)
+  logCell("new cell", res)
+  track("newObjRC1", res, size)
   gcTrace(res, csAllocated)
-  release(gch)
+  when useCellIds:
+    inc gch.idGenerator
+    res.id = gch.idGenerator * 1000_000 + gch.gcThreadId
   result = cellToUsr(res)
   zeroMem(result, size)
   sysAssert(allocInv(gch.region), "newObjRC1 end")
   when defined(memProfiler): nimProfile(size)
 
+{.push overflowChecks: on.}
 proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
+  let size = align(GenericSeqSize, typ.base.align) + len * typ.base.size
   result = newObjRC1(typ, size)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
   when defined(memProfiler): nimProfile(size)
-  
-proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
-  acquire(gch)
+{.pop.}
+
+proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
   collectCT(gch)
   var ol = usrToCell(old)
   sysAssert(ol.typ != nil, "growObj: 1")
   gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
   sysAssert(allocInv(gch.region), "growObj begin")
 
-  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell)))
-  var elemSize = 1
-  if ol.typ.kind != tyString: elemSize = ol.typ.base.size
-  
-  var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
-  copyMem(res, ol, oldsize + sizeof(TCell))
-  zeroMem(cast[pointer](cast[TAddress](res)+% oldsize +% sizeof(TCell)),
+  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
+  var elemSize,elemAlign = 1
+  if ol.typ.kind != tyString:
+    elemSize = ol.typ.base.size
+    elemAlign = ol.typ.base.align
+  incTypeSize ol.typ, newsize
+
+  var oldsize = align(GenericSeqSize, elemAlign) + cast[PGenericSeq](old).len * elemSize
+  copyMem(res, ol, oldsize + sizeof(Cell))
+  zeroMem(cast[pointer](cast[int](res) +% oldsize +% sizeof(Cell)),
           newsize-oldsize)
-  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
-  sysAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
-  #if res.refcount <% rcIncrement:
-  #  add(gch.zct, res)
-  #else: # XXX: what to do here?
-  #  decRef(ol)
-  if (ol.refcount and ZctFlag) != 0:
-    var j = gch.zct.len-1
-    var d = gch.zct.d
-    while j >= 0: 
-      if d[j] == ol:
-        d[j] = res
-        break
-      dec(j)
-  if canBeCycleRoot(ol): excl(gch.cycleRoots, ol)
-  when logGC:
-    writeCell("growObj old cell", ol)
-    writeCell("growObj new cell", res)
+  sysAssert((cast[int](res) and (MemAlign-1)) == 0, "growObj: 3")
+  # This can be wrong for intermediate temps that are nevertheless on the
+  # heap because of lambda lifting:
+  #gcAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
+  logCell("growObj old cell", ol)
+  logCell("growObj new cell", res)
   gcTrace(ol, csZctFreed)
   gcTrace(res, csAllocated)
-  when reallyDealloc: rawDealloc(gch.region, ol)
-  else:
-    sysAssert(ol.typ != nil, "growObj: 5")
-    zeroMem(ol, sizeof(TCell))
-  release(gch)
+  track("growObj old", ol, 0)
+  track("growObj new", res, newsize)
+  # since we steal the old seq's contents, we set the old length to 0.
+  cast[PGenericSeq](old).len = 0
+  when useCellIds:
+    inc gch.idGenerator
+    res.id = gch.idGenerator * 1000_000 + gch.gcThreadId
   result = cellToUsr(res)
   sysAssert(allocInv(gch.region), "growObj end")
   when defined(memProfiler): nimProfile(newsize-oldsize)
@@ -495,57 +575,72 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
 proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
   result = growObj(old, newsize, gch)
 
-{.push profiler:off.}
+{.push profiler:off, stackTrace:off.}
 
 # ---------------- cycle collector -------------------------------------------
 
-proc freeCyclicCell(gch: var TGcHeap, c: PCell) =
+proc freeCyclicCell(gch: var GcHeap, c: PCell) =
   prepareDealloc(c)
   gcTrace(c, csCycFreed)
-  when logGC: writeCell("cycle collector dealloc cell", c)
-  when reallyDealloc: rawDealloc(gch.region, c)
+  track("cycle collector dealloc cell", c, 0)
+  logCell("cycle collector dealloc cell", c)
+  when reallyDealloc:
+    sysAssert(allocInv(gch.region), "free cyclic cell")
+    beforeDealloc(gch, c, "freeCyclicCell: stack trash")
+    rawDealloc(gch.region, c)
   else:
     gcAssert(c.typ != nil, "freeCyclicCell")
-    zeroMem(c, sizeof(TCell))
-
-proc markGray(s: PCell) =
-  if s.color != rcGray:
-    setColor(s, rcGray)
-    forAllChildren(s, waMarkGray)
-
-proc scanBlack(s: PCell) =
-  s.setColor(rcBlack)
-  forAllChildren(s, waScanBlack)
-
-proc scan(s: PCell) =
-  if s.color == rcGray:
-    if s.refcount >=% rcIncrement:
-      scanBlack(s)
-    else:
-      s.setColor(rcWhite)
-      forAllChildren(s, waScan)
-  
-proc collectWhite(s: PCell) =
-  if s.color == rcWhite and s notin gch.cycleRoots:
-    s.setcolor(rcBlack)
-    forAllChildren(s, waCollectWhite)
-    freeCyclicCell(gch, s)
-
-proc MarkRoots(gch: var TGcHeap) =
-  var tabSize = 0
-  for s in elements(gch.cycleRoots):
-    #writeCell("markRoot", s)
-    inc tabSize
-    if s.color == rcPurple and s.refCount >=% rcIncrement:
-      markGray(s)
+    zeroMem(c, sizeof(Cell))
+
+proc sweep(gch: var GcHeap) =
+  for x in allObjects(gch.region):
+    if isCell(x):
+      # cast to PCell is correct here:
+      var c = cast[PCell](x)
+      if c notin gch.marked: freeCyclicCell(gch, c)
+
+proc markS(gch: var GcHeap, c: PCell) =
+  gcAssert isAllocatedPtr(gch.region, c), "markS: foreign heap root detected A!"
+  incl(gch.marked, c)
+  gcAssert gch.tempStack.len == 0, "stack not empty!"
+  forAllChildren(c, waMarkPrecise)
+  while gch.tempStack.len > 0:
+    dec gch.tempStack.len
+    var d = gch.tempStack.d[gch.tempStack.len]
+    gcAssert isAllocatedPtr(gch.region, d), "markS: foreign heap root detected B!"
+    if not containsOrIncl(gch.marked, d):
+      forAllChildren(d, waMarkPrecise)
+
+proc markGlobals(gch: var GcHeap) {.raises: [].} =
+  if gch.gcThreadId == 0:
+    for i in 0 .. globalMarkersLen-1: globalMarkers[i]()
+  for i in 0 .. threadLocalMarkersLen-1: threadLocalMarkers[i]()
+  let d = gch.additionalRoots.d
+  for i in 0 .. gch.additionalRoots.len-1: markS(gch, d[i])
+
+when logGC:
+  var
+    cycleCheckA: array[100, PCell]
+    cycleCheckALen = 0
+
+  proc alreadySeen(c: PCell): bool =
+    for i in 0 .. cycleCheckALen-1:
+      if cycleCheckA[i] == c: return true
+    if cycleCheckALen == len(cycleCheckA):
+      gcAssert(false, "cycle detection overflow")
+      rawQuit 1
+    cycleCheckA[cycleCheckALen] = c
+    inc cycleCheckALen
+
+  proc debugGraph(s: PCell) =
+    if alreadySeen(s):
+      writeCell("child cell (already seen) ", s)
     else:
-      excl(gch.cycleRoots, s)
-      # (s.color == rcBlack and rc == 0) as 1 condition:
-      if s.refcount == 0:
-        freeCyclicCell(gch, s)
-  gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize)
+      writeCell("cell {", s)
+      forAllChildren(s, waDebug)
+      c_printf("}\n")
 
-proc doOperation(p: pointer, op: TWalkOp) =
+proc doOperation(p: pointer, op: WalkOp) =
   if p == nil: return
   var c: PCell = usrToCell(p)
   gcAssert(c != nil, "doOperation: 1")
@@ -554,307 +649,145 @@ proc doOperation(p: pointer, op: TWalkOp) =
   case op
   of waZctDecRef:
     #if not isAllocatedPtr(gch.region, c):
-    #  return
-    #  c_fprintf(c_stdout, "[GC] decref bug: %p", c) 
+    #  c_printf("[GC] decref bug: %p", c)
     gcAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef")
     gcAssert(c.refcount >=% rcIncrement, "doOperation 2")
-    #c.refcount = c.refcount -% rcIncrement
-    when logGC: writeCell("decref (from doOperation)", c)
+    logCell("decref (from doOperation)", c)
+    track("waZctDecref", p, 0)
     decRef(c)
-    #if c.refcount <% rcIncrement: addZCT(gch.zct, c)
   of waPush:
     add(gch.tempStack, c)
-  of waCycleDecRef:
-    gcAssert(c.refcount >=% rcIncrement, "doOperation 3")
-    c.refcount = c.refcount -% rcIncrement
-  of waMarkGray:
-    gcAssert(c.refcount >=% rcIncrement, "waMarkGray")
-    c.refcount = c.refcount -% rcIncrement
-    markGray(c)
-  of waScan: scan(c)
-  of waScanBlack:
-    c.refcount = c.refcount +% rcIncrement
-    if c.color != rcBlack:
-      scanBlack(c)
-  of waCollectWhite: collectWhite(c)
+  of waMarkGlobal:
+    markS(gch, c)
+  of waMarkPrecise:
+    add(gch.tempStack, c)
+  #of waDebug: debugGraph(c)
 
 proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
-  doOperation(d, TWalkOp(op))
-
-proc CollectZCT(gch: var TGcHeap): bool
+  doOperation(d, WalkOp(op))
 
-proc collectRoots(gch: var TGcHeap) =
-  for s in elements(gch.cycleRoots):
-    excl(gch.cycleRoots, s)
-    collectWhite(s)
+proc collectZCT(gch: var GcHeap): bool {.benign, raises: [].}
 
-proc collectCycles(gch: var TGcHeap) =
+proc collectCycles(gch: var GcHeap) {.raises: [].} =
+  when hasThreadSupport:
+    for c in gch.toDispose:
+      nimGCunref(c)
   # ensure the ZCT 'color' is not used:
   while gch.zct.len > 0: discard collectZCT(gch)
-  markRoots(gch)
-  # scanRoots:
-  for s in elements(gch.cycleRoots): scan(s)
-  collectRoots(gch)
-
-  Deinit(gch.cycleRoots)
-  Init(gch.cycleRoots)
-  # alive cycles need to be kept in 'cycleRoots' if they are referenced
-  # from the stack; otherwise the write barrier will add the cycle root again
-  # anyway:
-  when false:
-    var d = gch.decStack.d
-    var cycleRootsLen = 0
-    for i in 0..gch.decStack.len-1:
-      var c = d[i]
-      gcAssert isAllocatedPtr(gch.region, c), "addBackStackRoots"
-      gcAssert c.refcount >=% rcIncrement, "addBackStackRoots: dead cell"
-      if canBeCycleRoot(c):
-        #if c notin gch.cycleRoots: 
-        inc cycleRootsLen
-        incl(gch.cycleRoots, c)
-      gcAssert c.typ != nil, "addBackStackRoots 2"
-    if cycleRootsLen != 0:
-      cfprintf(cstdout, "cycle roots: %ld\n", cycleRootsLen)
-
-proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
+  cellsetReset(gch.marked)
+  var d = gch.decStack.d
+  for i in 0..gch.decStack.len-1:
+    sysAssert isAllocatedPtr(gch.region, d[i]), "collectCycles"
+    markS(gch, d[i])
+  markGlobals(gch)
+  sweep(gch)
+
+proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
   sysAssert(allocInv(gch.region), "gcMark begin")
-  var cell = usrToCell(p)
-  var c = cast[TAddress](cell)
+  var c = cast[int](p)
   if c >% PageSize:
     # fast check: does it look like a cell?
-    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
+    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, p))
     if objStart != nil:
       # mark the cell:
-      objStart.refcount = objStart.refcount +% rcIncrement
+      incRef(objStart)
       add(gch.decStack, objStart)
     when false:
+      let cell = usrToCell(p)
       if isAllocatedPtr(gch.region, cell):
         sysAssert false, "allocated pointer but not interior?"
         # mark the cell:
-        cell.refcount = cell.refcount +% rcIncrement
+        incRef(cell)
         add(gch.decStack, cell)
   sysAssert(allocInv(gch.region), "gcMark end")
 
-proc markThreadStacks(gch: var TGcHeap) = 
-  when hasThreadSupport and hasSharedHeap:
-    {.error: "not fully implemented".}
-    var it = threadList
-    while it != nil:
-      # mark registers: 
-      for i in 0 .. high(it.registers): gcMark(gch, it.registers[i])
-      var sp = cast[TAddress](it.stackBottom)
-      var max = cast[TAddress](it.stackTop)
-      # XXX stack direction?
-      # XXX unroll this loop:
-      while sp <=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp +% sizeof(pointer)
-      it = it.next
-
-# ----------------- stack management --------------------------------------
-#  inspired from Smart Eiffel
-
-when defined(sparc):
-  const stackIncreases = false
-elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
-     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
-  const stackIncreases = true
-else:
-  const stackIncreases = false
-
-when not defined(useNimRtl):
-  {.push stack_trace: off.}
-  proc setStackBottom(theStackBottom: pointer) =
-    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
-    # the first init must be the one that defines the stack bottom:
-    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
-    else:
-      var a = cast[TAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[TAddress](gch.stackBottom)
-      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
-      when stackIncreases:
-        gch.stackBottom = cast[pointer](min(a, b))
-      else:
-        gch.stackBottom = cast[pointer](max(a, b))
-  {.pop.}
-
-proc stackSize(): int {.noinline.} =
-  var stackTop {.volatile.}: pointer
-  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
-
-when defined(sparc): # For SPARC architecture.
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[TAddress](gch.stackBottom)
-    var a = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
-    when defined(sparcv9):
-      asm  """"flushw \n" """
-    else:
-      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
-
-    var
-      max = gch.stackBottom
-      sp: PPointer
-      stackTop: array[0..1, pointer]
-    sp = addr(stackTop[0])
-    # Addresses decrease as the stack grows.
-    while sp <= max:
-      gcMark(gch, sp[])
-      sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer))
-
-elif defined(ELATE):
-  {.error: "stack marking code is to be written for this architecture".}
-
-elif stackIncreases:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses increase as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var a = cast[TAddress](gch.stackBottom)
-    var b = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-
-  var
-    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-      # a little hack to get the size of a TJmpBuf in the generated C code
-      # in a platform independant way
-
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[TAddress](gch.stackBottom)
-      var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
-      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
-      # otherwise sp would be below the registers structure).
-      while sp >=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp -% sizeof(pointer)
-
-else:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses decrease as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[TAddress](gch.stackBottom)
-    var a = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
-    # We use a jmp_buf buffer that is in the C stack.
-    # Used to traverse the stack and registers assuming
-    # that 'setjmp' will save registers in the C stack.
-    type PStackSlice = ptr array [0..7, pointer]
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[TAddress](gch.stackBottom)
-      var sp = cast[TAddress](addr(registers))
-      # loop unrolled:
-      while sp <% max - 8*sizeof(pointer):
-        gcMark(gch, cast[PStackSlice](sp)[0])
-        gcMark(gch, cast[PStackSlice](sp)[1])
-        gcMark(gch, cast[PStackSlice](sp)[2])
-        gcMark(gch, cast[PStackSlice](sp)[3])
-        gcMark(gch, cast[PStackSlice](sp)[4])
-        gcMark(gch, cast[PStackSlice](sp)[5])
-        gcMark(gch, cast[PStackSlice](sp)[6])
-        gcMark(gch, cast[PStackSlice](sp)[7])
-        sp = sp +% sizeof(pointer)*8
-      # last few entries:
-      while sp <=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp +% sizeof(pointer)
-
-# ----------------------------------------------------------------------------
-# end of non-portable code
-# ----------------------------------------------------------------------------
-
-proc CollectZCT(gch: var TGcHeap): bool =
-  # Note: Freeing may add child objects to the ZCT! So essentially we do 
-  # deep freeing, which is bad for incremental operation. In order to 
+#[
+  This method is conditionally marked with an attribute so that it gets ignored by the LLVM ASAN
+  (Address SANitizer) intrumentation as it will raise false errors due to the implementation of
+  garbage collection that is used by Nim. For more information, please see the documentation of
+  `CLANG_NO_SANITIZE_ADDRESS` in `lib/nimbase.h`.
+ ]#
+proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl,
+    codegenDecl: "CLANG_NO_SANITIZE_ADDRESS N_LIB_PRIVATE $# $#$#".} =
+  forEachStackSlot(gch, gcMark)
+
+proc collectZCT(gch: var GcHeap): bool =
+  # Note: Freeing may add child objects to the ZCT! So essentially we do
+  # deep freeing, which is bad for incremental operation. In order to
   # avoid a deep stack, we move objects to keep the ZCT small.
   # This is performance critical!
   const workPackage = 100
   var L = addr(gch.zct.len)
-  
-  when withRealtime:
+
+  when withRealTime:
     var steps = workPackage
-    var t0: TTicks
+    var t0: Ticks
     if gch.maxPause > 0: t0 = getticks()
   while L[] > 0:
     var c = gch.zct.d[0]
     sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr")
     # remove from ZCT:
     gcAssert((c.refcount and ZctFlag) == ZctFlag, "collectZCT")
-    
+
     c.refcount = c.refcount and not ZctFlag
     gch.zct.d[0] = gch.zct.d[L[] - 1]
     dec(L[])
-    when withRealtime: dec steps
-    if c.refcount <% rcIncrement: 
+    when withRealTime: dec steps
+    if c.refcount <% rcIncrement:
       # It may have a RC > 0, if it is in the hardware stack or
       # it has not been removed yet from the ZCT. This is because
-      # ``incref`` does not bother to remove the cell from the ZCT 
+      # ``incref`` does not bother to remove the cell from the ZCT
       # as this might be too slow.
       # In any case, it should be removed from the ZCT. But not
       # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
-      if canBeCycleRoot(c): excl(gch.cycleRoots, c)
-      when logGC: writeCell("zct dealloc cell", c)
+      logCell("zct dealloc cell", c)
+      track("zct dealloc cell", c, 0)
       gcTrace(c, csZctFreed)
       # We are about to free the object, call the finalizer BEFORE its
       # children are deleted as well, because otherwise the finalizer may
       # access invalid memory. This is done by prepareDealloc():
       prepareDealloc(c)
       forAllChildren(c, waZctDecRef)
-      when reallyDealloc: rawDealloc(gch.region, c)
+      when reallyDealloc:
+        sysAssert(allocInv(gch.region), "collectZCT: rawDealloc")
+        beforeDealloc(gch, c, "collectZCT: stack trash")
+        rawDealloc(gch.region, c)
       else:
         sysAssert(c.typ != nil, "collectZCT 2")
-        zeroMem(c, sizeof(TCell))
-    when withRealtime:
+        zeroMem(c, sizeof(Cell))
+    when withRealTime:
       if steps == 0:
         steps = workPackage
         if gch.maxPause > 0:
           let duration = getticks() - t0
-          # the GC's measuring is not accurate and needs some cleanup actions 
+          # the GC's measuring is not accurate and needs some cleanup actions
           # (stack unmarking), so subtract some short amount of time in
           # order to miss deadlines less often:
           if duration >= gch.maxPause - 50_000:
             return false
   result = true
 
-proc unmarkStackAndRegisters(gch: var TGcHeap) =
+proc unmarkStackAndRegisters(gch: var GcHeap) =
   var d = gch.decStack.d
   for i in 0..gch.decStack.len-1:
     sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters"
     decRef(d[i])
-    #var c = d[i]
-    # XXX no need for an atomic dec here:
-    #if --c.refcount:
-    #  addZCT(gch.zct, c)
-    #sysAssert c.typ != nil, "unmarkStackAndRegisters 2"
   gch.decStack.len = 0
 
-proc collectCTBody(gch: var TGcHeap) =
-  when withRealtime:
+proc collectCTBody(gch: var GcHeap) {.raises: [].} =
+  when withRealTime:
     let t0 = getticks()
   sysAssert(allocInv(gch.region), "collectCT: begin")
-  
-  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+
+  when nimCoroutines:
+    for stack in gch.stack.items():
+      gch.stat.maxStackSize = max(gch.stat.maxStackSize, stack.stackSize())
+  else:
+    gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
   sysAssert(gch.decStack.len == 0, "collectCT")
   prepareForInteriorPointerChecking(gch.region)
   markStackAndRegisters(gch)
-  markThreadStacks(gch)
   gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
   inc(gch.stat.stackScans)
   if collectZCT(gch):
@@ -864,79 +797,97 @@ proc collectCTBody(gch: var TGcHeap) =
         #discard collectZCT(gch)
         inc(gch.stat.cycleCollections)
         gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
-                                 cycleIncrease)
+                                 CycleIncrease)
         gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
   unmarkStackAndRegisters(gch)
   sysAssert(allocInv(gch.region), "collectCT: end")
-  
-  when withRealtime:
+
+  when withRealTime:
     let duration = getticks() - t0
     gch.stat.maxPause = max(gch.stat.maxPause, duration)
     when defined(reportMissedDeadlines):
       if gch.maxPause > 0 and duration > gch.maxPause:
-        c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration)
+        c_printf("[GC] missed deadline: %ld\n", duration)
 
-proc collectCT(gch: var TGcHeap) =
-  if (gch.zct.len >= ZctThreshold or (cycleGC and
-      getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and 
+proc collectCT(gch: var GcHeap) =
+  if (gch.zct.len >= gch.zctThreshold or (cycleGC and
+      getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
       gch.recGcLock == 0:
+    when false:
+      prepareForInteriorPointerChecking(gch.region)
+      cellsetReset(gch.marked)
+      markForDebug(gch)
     collectCTBody(gch)
+    gch.zctThreshold = max(InitialZctThreshold, gch.zct.len * CycleIncrease)
 
-when withRealtime:
-  proc toNano(x: int): TNanos {.inline.} =
+proc GC_collectZct*() =
+  ## Collect the ZCT (zero count table). Unstable, experimental API for
+  ## testing purposes.
+  ## DO NOT USE!
+  collectCTBody(gch)
+
+when withRealTime:
+  proc toNano(x: int): Nanos {.inline.} =
     result = x * 1000
 
   proc GC_setMaxPause*(MaxPauseInUs: int) =
     gch.maxPause = MaxPauseInUs.toNano
 
-  proc GC_step(gch: var TGcHeap, us: int, strongAdvice: bool) =
-    acquire(gch)
+  proc GC_step(gch: var GcHeap, us: int, strongAdvice: bool) =
     gch.maxPause = us.toNano
-    if (gch.zct.len >= ZctThreshold or (cycleGC and
-        getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or 
+    if (gch.zct.len >= gch.zctThreshold or (cycleGC and
+        getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or
         strongAdvice:
       collectCTBody(gch)
-    release(gch)
+      gch.zctThreshold = max(InitialZctThreshold, gch.zct.len * CycleIncrease)
+
+  proc GC_step*(us: int, strongAdvice = false, stackSize = -1) {.noinline.} =
+    if stackSize >= 0:
+      var stackTop {.volatile.}: pointer
+      gch.getActiveStack().pos = addr(stackTop)
+
+      for stack in gch.stack.items():
+        stack.bottomSaved = stack.bottom
+        when stackIncreases:
+          stack.bottom = cast[pointer](
+            cast[int](stack.pos) - sizeof(pointer) * 6 - stackSize)
+        else:
+          stack.bottom = cast[pointer](
+            cast[int](stack.pos) + sizeof(pointer) * 6 + stackSize)
+
+    GC_step(gch, us, strongAdvice)
 
-  proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice)
+    if stackSize >= 0:
+      for stack in gch.stack.items():
+        stack.bottom = stack.bottomSaved
 
 when not defined(useNimRtl):
-  proc GC_disable() = 
-    when hasThreadSupport and hasSharedHeap:
-      discard atomicInc(gch.recGcLock, 1)
-    else:
-      inc(gch.recGcLock)
+  proc GC_disable() =
+    inc(gch.recGcLock)
   proc GC_enable() =
-    if gch.recGcLock > 0: 
-      when hasThreadSupport and hasSharedHeap:
-        discard atomicDec(gch.recGcLock, 1)
-      else:
-        dec(gch.recGcLock)
+    when defined(nimDoesntTrackDefects):
+      if gch.recGcLock <= 0:
+        raise newException(AssertionDefect,
+            "API usage error: GC_enable called but GC is already enabled")
+    dec(gch.recGcLock)
 
-  proc GC_setStrategy(strategy: TGC_Strategy) =
-    case strategy
-    of gcThroughput: nil
-    of gcResponsiveness: nil
-    of gcOptimizeSpace: nil
-    of gcOptimizeTime: nil
+  proc GC_setStrategy(strategy: GC_Strategy) =
+    discard
 
   proc GC_enableMarkAndSweep() =
     gch.cycleThreshold = InitialCycleThreshold
 
   proc GC_disableMarkAndSweep() =
-    gch.cycleThreshold = high(gch.cycleThreshold)-1
+    gch.cycleThreshold = high(typeof(gch.cycleThreshold))-1
     # set to the max value to suppress the cycle detector
 
   proc GC_fullCollect() =
-    acquire(gch)
     var oldThreshold = gch.cycleThreshold
     gch.cycleThreshold = 0 # forces cycle collection
     collectCT(gch)
     gch.cycleThreshold = oldThreshold
-    release(gch)
 
   proc GC_getStatistics(): string =
-    GC_disable()
     result = "[GC] total memory: " & $(getTotalMem()) & "\n" &
              "[GC] occupied memory: " & $(getOccupiedMem()) & "\n" &
              "[GC] stack scans: " & $gch.stat.stackScans & "\n" &
@@ -945,8 +896,16 @@ when not defined(useNimRtl):
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
              "[GC] zct capacity: " & $gch.zct.cap & "\n" &
              "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
-             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" &
-             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
-    GC_enable()
+             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) & "\n"
+    when nimCoroutines:
+      result.add "[GC] number of stacks: " & $gch.stack.len & "\n"
+      for stack in items(gch.stack):
+        result.add "[GC]   stack " & stack.bottom.repr & "[GC]     max stack size " & cast[pointer](stack.maxStackSize).repr & "\n"
+    else:
+      # this caused memory leaks, see #10488 ; find a way without `repr`
+      # maybe using a local copy of strutils.toHex or snprintf
+      when defined(logGC):
+        result.add "[GC] stack bottom: " & gch.stack.bottom.repr
+      result.add "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
 
-{.pop.}
+{.pop.} # profiler: off, stackTrace: off
diff --git a/lib/system/gc2.nim b/lib/system/gc2.nim
deleted file mode 100755
index 05c291371..000000000
--- a/lib/system/gc2.nim
+++ /dev/null
@@ -1,1386 +0,0 @@
-#
-#
-#            Nimrod's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-#            Garbage Collector
-#
-# The basic algorithm is *Deferrent Reference Counting* with cycle detection.
-# This is achieved by combining a Deutsch-Bobrow garbage collector
-# together with Christoper's partial mark-sweep garbage collector.
-#
-# Special care has been taken to avoid recursion as far as possible to avoid
-# stack overflows when traversing deep datastructures. It is well-suited
-# for soft real time applications (like games).
-{.push profiler:off.}
-
-const
-  CycleIncrease = 2 # is a multiplicative increase
-  InitialCycleThreshold = 4*1024*1024 # X MB because cycle checking is slow
-  ZctThreshold = 500  # we collect garbage if the ZCT's size
-                      # reaches this threshold
-                      # this seems to be a good value
-  withRealTime = defined(useRealtimeGC)
-
-when withRealTime and not defined(getTicks):
-  include "system/timers"
-when defined(memProfiler):
-  proc nimProfile(requestedSize: int)
-
-const
-  rcShift = 6 # the reference count is shifted so we can use
-              # the least significat bits for additinal flags:
-
-  rcAlive = 0b00000           # object is reachable.
-                              # color *black* in the original paper
-                              
-  rcCycleCandidate = 0b00001  # possible root of a cycle. *purple*
-
-  rcDecRefApplied = 0b00010   # the first dec-ref phase of the
-                              # collector was already applied to this
-                              # object. *gray*
-                              
-  rcMaybeDead = 0b00011       # this object is a candidate for deletion
-                              # during the collect cycles algorithm.
-                              # *white*.
-                              
-  rcReallyDead = 0b00100      # this is proved to be garbage
-  
-  rcRetiredBuffer = 0b00101   # this is a seq or string buffer that
-                              # was replaced by a resize operation.
-                              # see growObj for details
-
-  rcColorMask = TRefCount(0b00111)
-
-  rcZct = 0b01000             # already added to ZCT
-  rcInCycleRoots = 0b10000    # already buffered as cycle candidate
-  rcHasStackRef = 0b100000    # the object had a stack ref in the last
-                              # cycle collection
-
-  rcMarkBit = rcHasStackRef   # this is currently used for leak detection
-                              # when traceGC is on
-
-  rcBufferedAnywhere = rcZct or rcInCycleRoots
-
-  rcIncrement = 1 shl rcShift # don't touch the color bits
-
-const
-  NewObjectsAreCycleRoots = true
-    # the alternative is to use the old strategy of adding cycle roots
-    # in incRef (in the compiler itself, this doesn't change much)
-
-  IncRefRemovesCandidates = false
-    # this is safe only if we can reliably track the fact that the object
-    # has stack references. This could be easily done by adding another bit
-    # to the refcount field and setting it up in unmarkStackAndRegisters.
-    # The bit must also be set for new objects that are not rc1 and it must be
-    # examined in the decref loop in collectCycles.
-    # XXX: not implemented yet as tests didn't show any improvement from this
-   
-  MarkingSkipsAcyclicObjects = true
-    # Acyclic objects can be safely ignored in the mark and scan phases, 
-    # because they cannot contribute to the internal count.
-    # XXX: if we generate specialized `markCyclic` and `markAcyclic`
-    # procs we can further optimize this as there won't be need for any
-    # checks in the code
-  
-  MinimumStackMarking = false
-    # Try to scan only the user stack and ignore the part of the stack
-    # belonging to the GC itself. see setStackTop for further info.
-    # XXX: still has problems in release mode in the compiler itself.
-    # investigate how it affects growObj
-
-  CollectCyclesStats = false
-
-type
-  TWalkOp = enum
-    waPush
-
-  TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall.}
-    # A ref type can have a finalizer that is called before the object's
-    # storage is freed.
-
-  TGcStat {.final, pure.} = object
-    stackScans: int          # number of performed stack scans (for statistics)
-    cycleCollections: int    # number of performed full collections
-    maxThreshold: int        # max threshold that has been set
-    maxStackSize: int        # max stack size
-    maxStackCells: int       # max stack cells in ``decStack``
-    cycleTableSize: int      # max entries in cycle table  
-    maxPause: int64          # max measured GC pause in nanoseconds
-  
-  TGcHeap {.final, pure.} = object # this contains the zero count and
-                                   # non-zero count table
-    stackBottom: pointer
-    stackTop: pointer
-    cycleThreshold: int
-    zct: TCellSeq            # the zero count table
-    decStack: TCellSeq       # cells in the stack that are to decref again
-    cycleRoots: TCellSeq
-    tempStack: TCellSeq      # temporary stack for recursion elimination
-    freeStack: TCellSeq      # objects ready to be freed
-    recGcLock: int           # prevent recursion via finalizers; no thread lock
-    cycleRootsTrimIdx: int   # Trimming is a light-weight collection of the 
-                             # cycle roots table that uses a cheap linear scan
-                             # to find only possitively dead objects.
-                             # One strategy is to perform it only for new objects
-                             # allocated between the invocations of CollectZCT.
-                             # This index indicates the start of the range of
-                             # such new objects within the table.
-    when withRealTime:
-      maxPause: TNanos       # max allowed pause in nanoseconds; active if > 0
-    region: TMemRegion       # garbage collected region
-    stat: TGcStat
-
-var
-  gch* {.rtlThreadVar.}: TGcHeap
-
-when not defined(useNimRtl):
-  InstantiateForRegion(gch.region)
-
-template acquire(gch: TGcHeap) = 
-  when hasThreadSupport and hasSharedHeap:
-    AcquireSys(HeapLock)
-
-template release(gch: TGcHeap) = 
-  when hasThreadSupport and hasSharedHeap:
-    releaseSys(HeapLock)
-
-template setColor(c: PCell, color) =
-  c.refcount = (c.refcount and not rcColorMask) or color
-
-template color(c: PCell): expr =
-  c.refcount and rcColorMask
-
-template isBitDown(c: PCell, bit): expr =
-  (c.refcount and bit) == 0
-
-template isBitUp(c: PCell, bit): expr =
-  (c.refcount and bit) != 0
-
-template setBit(c: PCell, bit): expr =
-  c.refcount = c.refcount or bit
-
-template isDead(c: Pcell): expr =
-  c.isBitUp(rcReallyDead) # also covers rcRetiredBuffer
-
-template clearBit(c: PCell, bit): expr =
-  c.refcount = c.refcount and (not TRefCount(bit))
-
-when debugGC:
-  var gcCollectionIdx = 0
-
-  proc colorStr(c: PCell): cstring =
-    let color = c.color
-    case color
-    of rcAlive: return "alive"
-    of rcMaybeDead: return "maybedead"
-    of rcCycleCandidate: return "candidate"
-    of rcDecRefApplied: return "marked"
-    of rcRetiredBuffer: return "retired"
-    of rcReallyDead: return "dead"
-    else: return "unknown?"
-  
-  proc inCycleRootsStr(c: PCell): cstring =
-    if c.isBitUp(rcInCycleRoots): result = "cycleroot"
-    else: result = ""
-
-  proc inZctStr(c: PCell): cstring =
-    if c.isBitUp(rcZct): result = "zct"
-    else: result = ""
-
-  proc writeCell*(msg: CString, c: PCell, force = false) =
-    var kind = -1
-    if c.typ != nil: kind = ord(c.typ.kind)
-    when trackAllocationSource:
-      c_fprintf(c_stdout, "[GC %d] %s: %p %d rc=%ld %s %s %s from %s(%ld)\n",
-                gcCollectionIdx,
-                msg, c, kind, c.refcount shr rcShift,
-                c.colorStr, c.inCycleRootsStr, c.inZctStr,
-                c.filename, c.line)
-    else:
-      c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n",
-                msg, c, kind, c.refcount shr rcShift)
-
-proc addZCT(zct: var TCellSeq, c: PCell) {.noinline.} =
-  if c.isBitDown(rcZct):
-    c.setBit rcZct
-    zct.add c
-
-template setStackTop(gch) =
-  # This must be called immediately after we enter the GC code
-  # to minimize the size of the scanned stack. The stack consumed
-  # by the GC procs may amount to 200-400 bytes depending on the
-  # build settings and this contributes to false-positives
-  # in the conservative stack marking
-  when MinimumStackMarking:
-    var stackTop {.volatile.}: pointer
-    gch.stackTop = addr(stackTop)
-
-template addCycleRoot(cycleRoots: var TCellSeq, c: PCell) =
-  if c.color != rcCycleCandidate:
-    c.setColor rcCycleCandidate
-    
-    # the object may be buffered already. for example, consider:
-    # decref; incref; decref
-    if c.isBitDown(rcInCycleRoots):
-      c.setBit rcInCycleRoots
-      cycleRoots.add c
-
-proc cellToUsr(cell: PCell): pointer {.inline.} =
-  # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[TAddress](cell)+%TAddress(sizeof(TCell)))
-
-proc usrToCell*(usr: pointer): PCell {.inline.} =
-  # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[TAddress](usr)-%TAddress(sizeof(TCell)))
-
-proc canbeCycleRoot(c: PCell): bool {.inline.} =
-  result = ntfAcyclic notin c.typ.flags
-
-proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
-  # used for code generation concerning debugging
-  result = usrToCell(c).typ
-
-proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
-  result = int(usrToCell(p).refcount) shr rcShift
-
-# this that has to equals zero, otherwise we have to round up UnitsPerPage:
-when BitsPerPage mod (sizeof(int)*8) != 0:
-  {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
-
-# forward declarations:
-proc collectCT(gch: var TGcHeap)
-proc IsOnStack*(p: pointer): bool {.noinline.}
-proc forAllChildren(cell: PCell, op: TWalkOp)
-proc doOperation(p: pointer, op: TWalkOp)
-proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp)
-# we need the prototype here for debugging purposes
-
-proc prepareDealloc(cell: PCell) =
-  if cell.typ.finalizer != nil:
-    # the finalizer could invoke something that
-    # allocates memory; this could trigger a garbage
-    # collection. Since we are already collecting we
-    # prevend recursive entering here by a lock.
-    # XXX: we should set the cell's children to nil!
-    inc(gch.recGcLock)
-    (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
-    dec(gch.recGcLock)
-
-when traceGC:
-  # traceGC is a special switch to enable extensive debugging
-  type
-    TCellState = enum
-      csAllocated, csFreed
-  var
-    states: array[TCellState, TCellSet]
-
-  proc traceCell(c: PCell, state: TCellState) =
-    case state
-    of csAllocated:
-      if c in states[csAllocated]:
-        writeCell("attempt to alloc an already allocated cell", c)
-        sysAssert(false, "traceCell 1")
-      excl(states[csFreed], c)
-      # writecell("allocated", c)
-    of csFreed:
-      if c in states[csFreed]:
-        writeCell("attempt to free a cell twice", c)
-        sysAssert(false, "traceCell 2")
-      if c notin states[csAllocated]:
-        writeCell("attempt to free not an allocated cell", c)
-        sysAssert(false, "traceCell 3")
-      excl(states[csAllocated], c)
-      # writecell("freed", c)
-    incl(states[state], c)
-
-  proc computeCellWeight(c: PCell): int =
-    var x: TCellSet
-    x.init
-
-    let startLen = gch.tempStack.len
-    c.forAllChildren waPush
-    
-    while startLen != gch.tempStack.len:
-      dec gch.tempStack.len
-      var c = gch.tempStack.d[gch.tempStack.len]
-      if c in states[csFreed]: continue
-      inc result
-      if c notin x:
-        x.incl c
-        c.forAllChildren waPush
-
-  template markChildrenRec(cell) =
-    let startLen = gch.tempStack.len
-    cell.forAllChildren waPush
-    let isMarked = cell.isBitUp(rcMarkBit)
-    while startLen != gch.tempStack.len:
-      dec gch.tempStack.len
-      var c = gch.tempStack.d[gch.tempStack.len]
-      if c in states[csFreed]: continue
-      if c.isBitDown(rcMarkBit):
-        c.setBit rcMarkBit
-        c.forAllChildren waPush
-    if c.isBitUp(rcMarkBit) and not isMarked:
-      writecell("cyclic cell", cell)
-      cprintf "Weight %d\n", cell.computeCellWeight
-      
-  proc writeLeakage(onlyRoots: bool) =
-    if onlyRoots:
-      for c in elements(states[csAllocated]):
-        if c notin states[csFreed]:
-          markChildrenRec(c)
-    var f = 0
-    var a = 0
-    for c in elements(states[csAllocated]):
-      inc a
-      if c in states[csFreed]: inc f
-      elif c.isBitDown(rcMarkBit):
-        writeCell("leak", c)
-        cprintf "Weight %d\n", c.computeCellWeight
-    cfprintf(cstdout, "Allocations: %ld; freed: %ld\n", a, f)
-
-template gcTrace(cell, state: expr): stmt {.immediate.} =
-  when logGC: writeCell($state, cell)
-  when traceGC: traceCell(cell, state)
-
-template WithHeapLock(blk: stmt): stmt =
-  when hasThreadSupport and hasSharedHeap: AcquireSys(HeapLock)
-  blk
-  when hasThreadSupport and hasSharedHeap: ReleaseSys(HeapLock)
-
-proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} = 
-  # we MUST access gch as a global here, because this crosses DLL boundaries!
-  WithHeapLock: addCycleRoot(gch.cycleRoots, c)
-
-proc rtlAddZCT(c: PCell) {.rtl, inl.} =
-  # we MUST access gch as a global here, because this crosses DLL boundaries!
-  WithHeapLock: addZCT(gch.zct, c)
-
-type
-  TCyclicMode = enum
-    Cyclic,
-    Acyclic,
-    MaybeCyclic
-
-  TReleaseType = enum
-    AddToZTC
-    FreeImmediately
-
-  THeapType = enum
-    LocalHeap
-    SharedHeap
-
-template `++` (rc: TRefCount, heapType: THeapType): stmt =
-  when heapType == SharedHeap:
-    discard atomicInc(rc, rcIncrement)
-  else:
-    inc rc, rcIncrement
-
-template `--`(rc: TRefCount): expr =
-  dec rc, rcIncrement
-  rc <% rcIncrement
-
-template `--` (rc: TRefCount, heapType: THeapType): expr =
-  (when heapType == SharedHeap: atomicDec(rc, rcIncrement) <% rcIncrement
-   else: --rc)
-
-template doDecRef(cc: PCell,
-                  heapType = LocalHeap,
-                  cycleFlag = MaybeCyclic): stmt =
-  var c = cc
-  sysAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr")
-  # XXX: move this elesewhere
-
-  sysAssert(c.refcount >=% rcIncrement, "decRef")
-  if c.refcount--(heapType):
-    # this is the last reference from the heap
-    # add to a zero-count-table that will be matched against stack pointers
-    rtlAddZCT(c)
-  else:
-    when cycleFlag != Acyclic:
-      if cycleFlag == Cyclic or canBeCycleRoot(c):
-        # a cycle may have been broken
-        rtlAddCycleRoot(c)
-
-template doIncRef(cc: PCell,
-                 heapType = LocalHeap,
-                 cycleFlag = MaybeCyclic): stmt =
-  var c = cc
-  c.refcount++(heapType)
-  when cycleFlag != Acyclic:
-    when NewObjectsAreCycleRoots:
-      if canbeCycleRoot(c):
-        addCycleRoot(gch.cycleRoots, c)
-    elif IncRefRemovesCandidates:
-      c.setColor rcAlive
-  # XXX: this is not really atomic enough!
-  
-proc nimGCref(p: pointer) {.compilerProc, inline.} = doIncRef(usrToCell(p))
-proc nimGCunref(p: pointer) {.compilerProc, inline.} = doDecRef(usrToCell(p))
-
-proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} =
-  sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle")
-  var c = usrToCell(p)
-  sysAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr")
-  if c.refcount--(LocalHeap):
-    rtlAddZCT(c)
-    sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2")
-  sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5")
-
-template doAsgnRef(dest: ppointer, src: pointer,
-                  heapType = LocalHeap, cycleFlag = MaybeCyclic): stmt =
-  sysAssert(not isOnStack(dest), "asgnRef")
-  # BUGFIX: first incRef then decRef!
-  if src != nil: doIncRef(usrToCell(src), heapType, cycleFlag)
-  if dest[] != nil: doDecRef(usrToCell(dest[]), heapType, cycleFlag)
-  dest[] = src
-
-proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} =
-  # the code generator calls this proc!
-  doAsgnRef(dest, src, LocalHeap, MaybeCyclic)
-
-proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerProc, inline.} =
-  # the code generator calls this proc if it is known at compile time that no 
-  # cycle is possible.
-  doAsgnRef(dest, src, LocalHeap, Acyclic)
-
-proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerProc.} =
-  # unsureAsgnRef updates the reference counters only if dest is not on the
-  # stack. It is used by the code generator if it cannot decide wether a
-  # reference is in the stack or not (this can happen for var parameters).
-  if not IsOnStack(dest):
-    if src != nil: doIncRef(usrToCell(src))
-    # XXX we must detect a shared heap here
-    # better idea may be to just eliminate the need for unsureAsgnRef
-    #
-    # XXX finally use assembler for the stack checking instead!
-    # the test for '!= nil' is correct, but I got tired of the segfaults
-    # resulting from the crappy stack checking:
-    if cast[int](dest[]) >=% PageSize: doDecRef(usrToCell(dest[]))
-  else:
-    # can't be an interior pointer if it's a stack location!
-    sysAssert(interiorAllocatedPtr(gch.region, dest)==nil,
-              "stack loc AND interior pointer")
-  dest[] = src
-
-when hasThreadSupport and hasSharedHeap:
-  # shared heap version of the above procs
-  proc asgnRefSh(dest: ppointer, src: pointer) {.compilerProc, inline.} =
-    doAsgnRef(dest, src, SharedHeap, MaybeCyclic)
-
-  proc asgnRefNoCycleSh(dest: ppointer, src: pointer) {.compilerProc, inline.} =
-    doAsgnRef(dest, src, SharedHeap, Acyclic)
-
-proc initGC() =
-  when not defined(useNimRtl):
-    when traceGC:
-      for i in low(TCellState)..high(TCellState): Init(states[i])
-    gch.cycleThreshold = InitialCycleThreshold
-    gch.stat.stackScans = 0
-    gch.stat.cycleCollections = 0
-    gch.stat.maxThreshold = 0
-    gch.stat.maxStackSize = 0
-    gch.stat.maxStackCells = 0
-    gch.stat.cycleTableSize = 0
-    # init the rt
-    init(gch.zct)
-    init(gch.tempStack)
-    init(gch.freeStack)
-    Init(gch.cycleRoots)
-    Init(gch.decStack)
-
-proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
-  var d = cast[TAddress](dest)
-  case n.kind
-  of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
-  of nkList:
-    for i in 0..n.len-1:
-      # inlined for speed
-      if n.sons[i].kind == nkSlot:
-        if n.sons[i].typ.kind in {tyRef, tyString, tySequence}:
-          doOperation(cast[ppointer](d +% n.sons[i].offset)[], op)
-        else:
-          forAllChildrenAux(cast[pointer](d +% n.sons[i].offset), 
-                            n.sons[i].typ, op)
-      else:
-        forAllSlotsAux(dest, n.sons[i], op)
-  of nkCase:
-    var m = selectBranch(dest, n)
-    if m != nil: forAllSlotsAux(dest, m, op)
-  of nkNone: sysAssert(false, "forAllSlotsAux")
-
-proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) =
-  var d = cast[TAddress](dest)
-  if dest == nil: return # nothing to do
-  if ntfNoRefs notin mt.flags:
-    case mt.Kind
-    of tyRef, tyString, tySequence: # leaf:
-      doOperation(cast[ppointer](d)[], op)
-    of tyObject, tyTuple:
-      forAllSlotsAux(dest, mt.node, op)
-    of tyArray, tyArrayConstr, tyOpenArray:
-      for i in 0..(mt.size div mt.base.size)-1:
-        forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op)
-    else: nil
-
-proc forAllChildren(cell: PCell, op: TWalkOp) =
-  sysAssert(cell != nil, "forAllChildren: 1")
-  sysAssert(cell.typ != nil, "forAllChildren: 2")
-  sysAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3"
-  let marker = cell.typ.marker
-  if marker != nil:
-    marker(cellToUsr(cell), op.int)
-  else:
-    case cell.typ.Kind
-    of tyRef: # common case
-      forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
-    of tySequence:
-      var d = cast[TAddress](cellToUsr(cell))
-      var s = cast[PGenericSeq](d)
-      if s != nil:
-        let baseAddr = d +% GenericSeqSize
-        for i in 0..s.len-1:
-          forAllChildrenAux(cast[pointer](baseAddr +% i *% cell.typ.base.size),
-                            cell.typ.base, op)
-    else: nil
-
-proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} =
-  # we check the last 8 entries (cache line) for a slot that could be reused.
-  # In 63% of all cases we succeed here! But we have to optimize the heck
-  # out of this small linear search so that ``newObj`` is not slowed down.
-  # 
-  # Slots to try          cache hit
-  # 1                     32%
-  # 4                     59%
-  # 8                     63%
-  # 16                    66%
-  # all slots             68%
-  var L = gch.zct.len
-  var d = gch.zct.d
-  when true:
-    # loop unrolled for performance:
-    template replaceZctEntry(i: expr) =
-      c = d[i]
-      if c.refcount >=% rcIncrement:
-        c.clearBit(rcZct)
-        d[i] = res
-        return
-    if L > 8:
-      var c: PCell
-      replaceZctEntry(L-1)
-      replaceZctEntry(L-2)
-      replaceZctEntry(L-3)
-      replaceZctEntry(L-4)
-      replaceZctEntry(L-5)
-      replaceZctEntry(L-6)
-      replaceZctEntry(L-7)
-      replaceZctEntry(L-8)
-      add(gch.zct, res)
-    else:
-      d[L] = res
-      inc(gch.zct.len)
-  else:
-    for i in countdown(L-1, max(0, L-8)):
-      var c = d[i]
-      if c.refcount >=% rcIncrement:
-        c.clearBit(rcZct)
-        d[i] = res
-        return
-    add(gch.zct, res)
-
-proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap, rc1: bool): pointer =
-  # generates a new object and sets its reference counter to 0
-  acquire(gch)
-  sysAssert(allocInv(gch.region), "rawNewObj begin")
-  sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
-  
-  collectCT(gch)
-  sysAssert(allocInv(gch.region), "rawNewObj after collect")
-
-  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
-  sysAssert(allocInv(gch.region), "rawNewObj after rawAlloc")
-
-  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
-  
-  res.typ = typ
-  
-  when trackAllocationSource and not hasThreadSupport:
-    if framePtr != nil and framePtr.prev != nil and framePtr.prev.prev != nil:
-      res.filename = framePtr.prev.prev.filename
-      res.line = framePtr.prev.prev.line
-    else:
-      res.filename = "nofile"
-  
-  if rc1:
-    res.refcount = rcIncrement # refcount is 1
-  else:
-    # its refcount is zero, so add it to the ZCT:
-    res.refcount = rcZct
-    addNewObjToZCT(res, gch)
-
-    if NewObjectsAreCycleRoots and canBeCycleRoot(res):
-      res.setBit(rcInCycleRoots)
-      res.setColor rcCycleCandidate
-      gch.cycleRoots.add res
-    
-  sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3")
-  
-  when logGC: writeCell("new cell", res)
-  gcTrace(res, csAllocated)
-  release(gch)
-  result = cellToUsr(res)
-  zeroMem(result, size)
-  when defined(memProfiler): nimProfile(size)
-  sysAssert(allocInv(gch.region), "rawNewObj end")
-
-{.pop.}
-
-proc freeCell(gch: var TGcHeap, c: PCell) =
-  # prepareDealloc(c)
-  gcTrace(c, csFreed)
-
-  when reallyDealloc: rawDealloc(gch.region, c)
-  else:
-    sysAssert(c.typ != nil, "collectCycles")
-    zeroMem(c, sizeof(TCell))
-
-template eraseAt(cells: var TCellSeq, at: int): stmt =
-  cells.d[at] = cells.d[cells.len - 1]
-  dec cells.len
-
-template trimAt(roots: var TCellSeq, at: int): stmt =
-  # This will remove a cycle root candidate during trimming.
-  # a candidate is removed either because it received a refup and
-  # it's no longer a candidate or because it received further refdowns
-  # and now it's dead for sure.
-  let c = roots.d[at]
-  c.clearBit(rcInCycleRoots)
-  roots.eraseAt(at)
-  if c.isBitUp(rcReallyDead) and c.refcount <% rcIncrement:
-    # This case covers both dead objects and retired buffers
-    # That's why we must also check the refcount (it may be
-    # kept possitive by stack references).
-    freeCell(gch, c)
-
-proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  setStackTop(gch)
-  result = rawNewObj(typ, size, gch, false)
-  
-proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  setStackTop(gch)
-  # `rawNewObj` already uses locks, so no need for them here.
-  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
-  result = rawNewObj(typ, size, gch, false)
-  cast[PGenericSeq](result).len = len
-  cast[PGenericSeq](result).reserved = len
-
-proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  setStackTop(gch)
-  result = rawNewObj(typ, size, gch, true)
-
-proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  setStackTop(gch)
-  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
-  result = rawNewObj(typ, size, gch, true)
-  cast[PGenericSeq](result).len = len
-  cast[PGenericSeq](result).reserved = len
-
-proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
-  acquire(gch)
-  collectCT(gch)
-  var ol = usrToCell(old)
-  sysAssert(ol.typ != nil, "growObj: 1")
-  sysAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
-  sysAssert(allocInv(gch.region), "growObj begin")
-
-  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell)))
-  var elemSize = if ol.typ.kind != tyString: ol.typ.base.size
-                 else: 1
-  
-  var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
-  
-  # XXX: This should happen outside
-  # call user-defined move code
-  # call user-defined default constructor
-  copyMem(res, ol, oldsize + sizeof(TCell))
-  zeroMem(cast[pointer](cast[TAddress](res)+% oldsize +% sizeof(TCell)),
-          newsize-oldsize)
-
-  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
-  sysAssert(res.refcount shr rcShift <=% 1, "growObj: 4")
-  
-  when false:
-    if ol.isBitUp(rcZct):
-      var j = gch.zct.len-1
-      var d = gch.zct.d
-      while j >= 0: 
-        if d[j] == ol:
-          d[j] = res
-          break
-        dec(j)
-    
-    if ol.isBitUp(rcInCycleRoots):
-      for i in 0 .. <gch.cycleRoots.len:
-        if gch.cycleRoots.d[i] == ol:
-          eraseAt(gch.cycleRoots, i)
-
-    freeCell(gch, ol)
-  
-  else:
-    # the new buffer inherits the GC state of the old one
-    if res.isBitUp(rcZct): gch.zct.add res
-    if res.isBitUp(rcInCycleRoots): gch.cycleRoots.add res
-
-    # Pay attention to what's going on here! We're not releasing the old memory.
-    # This is because at this point there may be an interior pointer pointing
-    # into this buffer somewhere on the stack (due to `var` parameters now and
-    # and `let` and `var:var` stack locations in the future).
-    # We'll release the memory in the next GC cycle. If we release it here,
-    # we cannot guarantee that no memory will be corrupted when only safe
-    # language features are used. Accessing the memory after the seq/string
-    # has been invalidated may still result in logic errors in the user code.
-    # We may improve on that by protecting the page in debug builds or
-    # by providing a warning when we detect a stack pointer into it.
-    let bufferFlags = ol.refcount and rcBufferedAnywhere
-    if bufferFlags == 0:
-      # we need this in order to collect it safely later
-      ol.refcount = rcRetiredBuffer or rcZct
-      gch.zct.add ol
-    else:
-      ol.refcount = rcRetiredBuffer or bufferFlags
-
-    when logGC:
-      writeCell("growObj old cell", ol)
-      writeCell("growObj new cell", res)
-
-  gcTrace(res, csAllocated)
-  release(gch)
-  result = cellToUsr(res)
-  sysAssert(allocInv(gch.region), "growObj end")
-  when defined(memProfiler): nimProfile(newsize-oldsize)
-
-proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
-  setStackTop(gch)
-  result = growObj(old, newsize, gch)
-
-{.push profiler:off.}
-
-# ---------------- cycle collector -------------------------------------------
-
-proc doOperation(p: pointer, op: TWalkOp) =
-  if p == nil: return
-  var c: PCell = usrToCell(p)
-  sysAssert(c != nil, "doOperation: 1")
-  gch.tempStack.add c
-  
-proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
-  doOperation(d, TWalkOp(op))
-
-type
-  TRecursionType = enum 
-    FromChildren,
-    FromRoot
-
-proc CollectZCT(gch: var TGcHeap): bool
-
-template pseudoRecursion(typ: TRecursionType, body: stmt): stmt =
-  #
-
-proc trimCycleRoots(gch: var TGcHeap, startIdx = gch.cycleRootsTrimIdx) =
-  var i = startIdx
-  while i < gch.cycleRoots.len:
-    if gch.cycleRoots.d[i].color != rcCycleCandidate:
-      gch.cycleRoots.trimAt i
-    else:
-      inc i
-
-  gch.cycleRootsTrimIdx = gch.cycleRoots.len
-
-# we now use a much simpler and non-recursive algorithm for cycle removal
-proc collectCycles(gch: var TGcHeap) =
-  if gch.cycleRoots.len == 0: return
-  gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, gch.cycleRoots.len)
-
-  when CollectCyclesStats:
-    let l0 = gch.cycleRoots.len
-    let tStart = getTicks()
-
-  var
-    decrefs = 0
-    increfs = 0
-    collected = 0
-    maybedeads = 0
-
-  template ignoreObject(c: PCell): expr =
-    # This controls which objects will be ignored in the mark and scan stages
-    (when MarkingSkipsAcyclicObjects: not canbeCycleRoot(c) else: false)
-    # not canbeCycleRoot(c)
-    # false
-    # c.isBitUp(rcHasStackRef)
-
-  template earlyMarkAliveRec(cell) =
-    let startLen = gch.tempStack.len
-    cell.setColor rcAlive
-    cell.forAllChildren waPush
-    
-    while startLen != gch.tempStack.len:
-      dec gch.tempStack.len
-      var c = gch.tempStack.d[gch.tempStack.len]
-      if c.color != rcAlive:
-        c.setColor rcAlive
-        c.forAllChildren waPush
-  
-  template earlyMarkAlive(stackRoots) =
-    # This marks all objects reachable from the stack as alive before any
-    # of the other stages is executed. Such objects cannot be garbage and
-    # they don't need to participate in the recursive decref/incref.
-    for i in 0 .. <stackRoots.len:
-      var c = stackRoots.d[i]
-      # c.setBit rcHasStackRef
-      earlyMarkAliveRec(c)
-
-  earlyMarkAlive(gch.decStack)
-  
-  when CollectCyclesStats:
-    let tAfterEarlyMarkAlive = getTicks()
-
-  template recursiveDecRef(cell) =
-    let startLen = gch.tempStack.len
-    cell.setColor rcDecRefApplied
-    cell.forAllChildren waPush
-    
-    while startLen != gch.tempStack.len:
-      dec gch.tempStack.len
-      var c = gch.tempStack.d[gch.tempStack.len]
-      if ignoreObject(c): continue
-
-      sysAssert(c.refcount >=% rcIncrement, "recursive dec ref")
-      dec c.refcount, rcIncrement
-      inc decrefs
-      if c.color != rcDecRefApplied:
-        c.setColor rcDecRefApplied
-        c.forAllChildren waPush
- 
-  template markRoots(roots) =
-    var i = 0
-    while i < roots.len:
-      if roots.d[i].color == rcCycleCandidate:
-        recursiveDecRef(roots.d[i])
-        inc i
-      else:
-        roots.trimAt i
-  
-  markRoots(gch.cycleRoots)
-  
-  when CollectCyclesStats:
-    let tAfterMark = getTicks()
-    c_printf "COLLECT CYCLES %d: %d/%d\n", gcCollectionIdx, gch.cycleRoots.len, l0
-  
-  template recursiveMarkAlive(cell) =
-    let startLen = gch.tempStack.len
-    cell.setColor rcAlive
-    cell.forAllChildren waPush
-    
-    while startLen != gch.tempStack.len:
-      dec gch.tempStack.len
-      var c = gch.tempStack.d[gch.tempStack.len]
-      if ignoreObject(c): continue
-      inc c.refcount, rcIncrement
-      inc increfs
-      
-      if c.color != rcAlive:
-        c.setColor rcAlive
-        c.forAllChildren waPush
- 
-  template scanRoots(roots) =
-    for i in 0 .. <roots.len:
-      let startLen = gch.tempStack.len
-      gch.tempStack.add roots.d[i]
-      
-      while startLen != gch.tempStack.len:
-        dec gch.tempStack.len
-        var c = gch.tempStack.d[gch.tempStack.len]
-        if ignoreObject(c): continue
-        if c.color == rcDecRefApplied:
-          if c.refcount >=% rcIncrement:
-            recursiveMarkAlive(c)
-          else:
-            # note that this is not necessarily the ultimate
-            # destiny of the object. we may still mark it alive
-            # later if we encounter another node from where it's
-            # reachable.
-            c.setColor rcMaybeDead
-            inc maybedeads
-            c.forAllChildren waPush
-  
-  scanRoots(gch.cycleRoots)
-  
-  when CollectCyclesStats:
-    let tAfterScan = getTicks()
-
-  template collectDead(roots) =
-    for i in 0 .. <roots.len:
-      var c = roots.d[i]
-      c.clearBit(rcInCycleRoots)
-
-      let startLen = gch.tempStack.len
-      gch.tempStack.add c
-      
-      while startLen != gch.tempStack.len:
-        dec gch.tempStack.len
-        var c = gch.tempStack.d[gch.tempStack.len]
-        when MarkingSkipsAcyclicObjects:
-          if not canbeCycleRoot(c):
-            # This is an acyclic object reachable from a dead cyclic object
-            # We must do a normal decref here that may add the acyclic object
-            # to the ZCT
-            doDecRef(c, LocalHeap, Cyclic)
-            continue
-        if c.color == rcMaybeDead and not c.isBitUp(rcInCycleRoots):
-          c.setColor(rcReallyDead)
-          inc collected
-          c.forAllChildren waPush
-          # we need to postpone the actual deallocation in order to allow
-          # the finalizers to run while the data structures are still intact
-          gch.freeStack.add c
-          prepareDealloc(c)
-
-    for i in 0 .. <gch.freeStack.len:
-      freeCell(gch, gch.freeStack.d[i])
-
-  collectDead(gch.cycleRoots)
-  
-  when CollectCyclesStats:
-    let tFinal = getTicks()
-    cprintf "times:\n  early mark alive: %d ms\n  mark: %d ms\n  scan: %d ms\n  collect: %d ms\n  decrefs: %d\n  increfs: %d\n  marked dead: %d\n  collected: %d\n",
-      (tAfterEarlyMarkAlive - tStart)  div 1_000_000,
-      (tAfterMark - tAfterEarlyMarkAlive) div 1_000_000,
-      (tAfterScan - tAfterMark) div 1_000_000,
-      (tFinal - tAfterScan) div 1_000_000,
-      decrefs,
-      increfs,
-      maybedeads,
-      collected
-
-  Deinit(gch.cycleRoots)
-  Init(gch.cycleRoots)
-
-  Deinit(gch.freeStack)
-  Init(gch.freeStack)
-
-  when MarkingSkipsAcyclicObjects:
-    # Collect the acyclic objects that became unreachable due to collected
-    # cyclic objects. 
-    discard CollectZCT(gch)
-    # CollectZCT may add new cycle candidates and we may decide to loop here
-    # if gch.cycleRoots.len > 0: repeat
-
-var gcDebugging* = false
-
-var seqdbg* : proc (s: PGenericSeq) {.cdecl.}
-
-proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
-  # the addresses are not as cells on the stack, so turn them to cells:
-  sysAssert(allocInv(gch.region), "gcMark begin")
-  var cell = usrToCell(p)
-  var c = cast[TAddress](cell)
-  if c >% PageSize:
-    # fast check: does it look like a cell?
-    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
-    if objStart != nil:
-      # mark the cell:
-      if objStart.color != rcReallyDead:
-        if gcDebugging:
-          # writeCell("marking ", objStart)
-        else:
-          inc objStart.refcount, rcIncrement
-          gch.decStack.add objStart
-      else:
-        # With incremental clean-up, objects spend some time
-        # in various lists before being deallocated.
-        # We just found a reference on the stack to an object,
-        # which we have previously labeled as unreachable.
-        # This is either a bug in the GC or a pure accidental
-        # coincidence due to the conservative stack marking.
-        when debugGC:
-          # writeCell("marking dead object", objStart)
-    when false:
-      if isAllocatedPtr(gch.region, cell):
-        sysAssert false, "allocated pointer but not interior?"
-        # mark the cell:
-        inc cell.refcount, rcIncrement
-        add(gch.decStack, cell)
-  sysAssert(allocInv(gch.region), "gcMark end")
-
-proc markThreadStacks(gch: var TGcHeap) = 
-  when hasThreadSupport and hasSharedHeap:
-    {.error: "not fully implemented".}
-    var it = threadList
-    while it != nil:
-      # mark registers: 
-      for i in 0 .. high(it.registers): gcMark(gch, it.registers[i])
-      var sp = cast[TAddress](it.stackBottom)
-      var max = cast[TAddress](it.stackTop)
-      # XXX stack direction?
-      # XXX unroll this loop:
-      while sp <=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp +% sizeof(pointer)
-      it = it.next
-
-# ----------------- stack management --------------------------------------
-#  inspired from Smart Eiffel
-
-when defined(sparc):
-  const stackIncreases = false
-elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
-     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
-  const stackIncreases = true
-else:
-  const stackIncreases = false
-
-when not defined(useNimRtl):
-  {.push stack_trace: off.}
-  proc setStackBottom(theStackBottom: pointer) =
-    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
-    # the first init must be the one that defines the stack bottom:
-    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
-    else:
-      var a = cast[TAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[TAddress](gch.stackBottom)
-      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
-      when stackIncreases:
-        gch.stackBottom = cast[pointer](min(a, b))
-      else:
-        gch.stackBottom = cast[pointer](max(a, b))
-  {.pop.}
-
-proc stackSize(): int {.noinline.} =
-  var stackTop {.volatile.}: pointer
-  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
-
-var
-  jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-    # a little hack to get the size of a TJmpBuf in the generated C code
-    # in a platform independant way
-
-when defined(sparc): # For SPARC architecture.
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[TAddress](gch.stackBottom)
-    var a = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
-    when defined(sparcv9):
-      asm  """"flushw \n" """
-    else:
-      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
-
-    var
-      max = gch.stackBottom
-      sp: PPointer
-      stackTop: array[0..1, pointer]
-    sp = addr(stackTop[0])
-    # Addresses decrease as the stack grows.
-    while sp <= max:
-      gcMark(gch, sp[])
-      sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer))
-
-elif defined(ELATE):
-  {.error: "stack marking code is to be written for this architecture".}
-
-elif stackIncreases:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses increase as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var a = cast[TAddress](gch.stackBottom)
-    var b = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-  
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[TAddress](gch.stackBottom)
-      var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
-      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
-      # otherwise sp would be below the registers structure).
-      while sp >=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp -% sizeof(pointer)
-
-else:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses decrease as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[TAddress](gch.stackBottom)
-    var a = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
-    # We use a jmp_buf buffer that is in the C stack.
-    # Used to traverse the stack and registers assuming
-    # that 'setjmp' will save registers in the C stack.
-    type PStackSlice = ptr array [0..7, pointer]
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      when MinimumStackMarking:
-        # mark the registers
-        var jmpbufPtr = cast[TAddress](addr(registers))
-        var jmpbufEnd = jmpbufPtr +% jmpbufSize
-      
-        while jmpbufPtr <=% jmpbufEnd:
-          gcMark(gch, cast[ppointer](jmpbufPtr)[])
-          jmpbufPtr = jmpbufPtr +% sizeof(pointer)
-
-        var sp = cast[TAddress](gch.stackTop)
-      else:
-        var sp = cast[TAddress](addr(registers))
-      # mark the user stack
-      var max = cast[TAddress](gch.stackBottom)
-      # loop unrolled:
-      while sp <% max - 8*sizeof(pointer):
-        gcMark(gch, cast[PStackSlice](sp)[0])
-        gcMark(gch, cast[PStackSlice](sp)[1])
-        gcMark(gch, cast[PStackSlice](sp)[2])
-        gcMark(gch, cast[PStackSlice](sp)[3])
-        gcMark(gch, cast[PStackSlice](sp)[4])
-        gcMark(gch, cast[PStackSlice](sp)[5])
-        gcMark(gch, cast[PStackSlice](sp)[6])
-        gcMark(gch, cast[PStackSlice](sp)[7])
-        sp = sp +% sizeof(pointer)*8
-      # last few entries:
-      while sp <=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp +% sizeof(pointer)
-
-# ----------------------------------------------------------------------------
-# end of non-portable code
-# ----------------------------------------------------------------------------
-
-proc releaseCell(gch: var TGcHeap, cell: PCell) =
-  if cell.color != rcReallyDead:
-    prepareDealloc(cell)
-    cell.setColor rcReallyDead
-
-    let l1 = gch.tempStack.len
-    cell.forAllChildren waPush
-    let l2 = gch.tempStack.len
-    for i in l1 .. <l2:
-      var cc = gch.tempStack.d[i]
-      if cc.refcount--(LocalHeap):
-        releaseCell(gch, cc)
-      else:
-        if canbeCycleRoot(cc):
-          addCycleRoot(gch.cycleRoots, cc)
-
-    gch.tempStack.len = l1
-
-  if cell.isBitDown(rcBufferedAnywhere):
-    freeCell(gch, cell)
-  # else:
-  # This object is either buffered in the cycleRoots list and we'll leave
-  # it there to be collected in the next collectCycles or it's pending in
-  # the ZCT:
-  # (e.g. we are now cleaning the 15th object, but this one is 18th in the
-  #  list. Note that this can happen only if we reached this point by the
-  #  recursion).
-  # We can ignore it now as the ZCT cleaner will reach it soon.
-
-proc CollectZCT(gch: var TGcHeap): bool =
-  const workPackage = 100
-  var L = addr(gch.zct.len)
-  
-  when withRealtime:
-    var steps = workPackage
-    var t0: TTicks
-    if gch.maxPause > 0: t0 = getticks()
-  
-  while L[] > 0:
-    var c = gch.zct.d[0]
-    sysAssert c.isBitUp(rcZct), "CollectZCT: rcZct missing!"
-    sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr")
-    
-    # remove from ZCT:    
-    c.clearBit(rcZct)
-    gch.zct.d[0] = gch.zct.d[L[] - 1]
-    dec(L[])
-    when withRealtime: dec steps
-    if c.refcount <% rcIncrement:
-      # It may have a RC > 0, if it is in the hardware stack or
-      # it has not been removed yet from the ZCT. This is because
-      # ``incref`` does not bother to remove the cell from the ZCT 
-      # as this might be too slow.
-      # In any case, it should be removed from the ZCT. But not
-      # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
-      if c.color == rcRetiredBuffer:
-        if c.isBitDown(rcInCycleRoots):
-          freeCell(gch, c)
-      else:
-        # if c.color == rcReallyDead: writeCell("ReallyDead in ZCT?", c)
-        releaseCell(gch, c)
-    when withRealtime:
-      if steps == 0:
-        steps = workPackage
-        if gch.maxPause > 0:
-          let duration = getticks() - t0
-          # the GC's measuring is not accurate and needs some cleanup actions 
-          # (stack unmarking), so subtract some short amount of time in to
-          # order to miss deadlines less often:
-          if duration >= gch.maxPause - 50_000:
-            return false
-  result = true
-  gch.trimCycleRoots
-  #deInit(gch.zct)
-  #init(gch.zct)
-
-proc unmarkStackAndRegisters(gch: var TGcHeap) =
-  var d = gch.decStack.d
-  for i in 0 .. <gch.decStack.len:
-    sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters"
-    # XXX: just call doDecRef?
-    var c = d[i]
-    sysAssert c.typ != nil, "unmarkStackAndRegisters 2"
-    
-    if c.color == rcRetiredBuffer:
-      continue
-
-    # XXX no need for an atomic dec here:
-    if c.refcount--(LocalHeap):
-      # the object survived only because of a stack reference
-      # it still doesn't have heap refernces
-      addZCT(gch.zct, c)
-    
-    if canbeCycleRoot(c):
-      # any cyclic object reachable from the stack can be turned into
-      # a leak if it's orphaned through the stack reference
-      # that's because the write-barrier won't be executed for stack
-      # locations
-      addCycleRoot(gch.cycleRoots, c)
-
-  gch.decStack.len = 0
-
-proc collectCTBody(gch: var TGcHeap) =
-  when withRealtime:
-    let t0 = getticks()
-  when debugGC: inc gcCollectionIdx
-  sysAssert(allocInv(gch.region), "collectCT: begin")
-  
-  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
-  sysAssert(gch.decStack.len == 0, "collectCT")
-  prepareForInteriorPointerChecking(gch.region)
-  markStackAndRegisters(gch)
-  markThreadStacks(gch)
-  gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
-  inc(gch.stat.stackScans)
-  if collectZCT(gch):
-    when cycleGC:
-      if getOccupiedMem(gch.region) >= gch.cycleThreshold or alwaysCycleGC:
-        collectCycles(gch)
-        sysAssert gch.zct.len == 0, "zct is not null after collect cycles"
-        inc(gch.stat.cycleCollections)
-        gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
-                                 cycleIncrease)
-        gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
-  unmarkStackAndRegisters(gch)
-  sysAssert(allocInv(gch.region), "collectCT: end")
-  
-  when withRealtime:
-    let duration = getticks() - t0
-    gch.stat.maxPause = max(gch.stat.maxPause, duration)
-    when defined(reportMissedDeadlines):
-      if gch.maxPause > 0 and duration > gch.maxPause:
-        c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration)
-
-proc collectCT(gch: var TGcHeap) =
-  if (gch.zct.len >= ZctThreshold or (cycleGC and
-      getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and 
-      gch.recGcLock == 0:
-    collectCTBody(gch)
-
-when withRealtime:
-  proc toNano(x: int): TNanos {.inline.} =
-    result = x * 1000
-
-  proc GC_setMaxPause*(MaxPauseInUs: int) =
-    gch.maxPause = MaxPauseInUs.toNano
-
-  proc GC_step(gch: var TGcHeap, us: int, strongAdvice: bool) =
-    acquire(gch)
-    gch.maxPause = us.toNano
-    if (gch.zct.len >= ZctThreshold or (cycleGC and
-        getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or 
-        strongAdvice:
-      collectCTBody(gch)
-    release(gch)
-
-  proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice)
-
-when not defined(useNimRtl):
-  proc GC_disable() = 
-    when hasThreadSupport and hasSharedHeap:
-      discard atomicInc(gch.recGcLock, 1)
-    else:
-      inc(gch.recGcLock)
-  proc GC_enable() =
-    if gch.recGcLock > 0: 
-      when hasThreadSupport and hasSharedHeap:
-        discard atomicDec(gch.recGcLock, 1)
-      else:
-        dec(gch.recGcLock)
-
-  proc GC_setStrategy(strategy: TGC_Strategy) =
-    case strategy
-    of gcThroughput: nil
-    of gcResponsiveness: nil
-    of gcOptimizeSpace: nil
-    of gcOptimizeTime: nil
-
-  proc GC_enableMarkAndSweep() =
-    gch.cycleThreshold = InitialCycleThreshold
-
-  proc GC_disableMarkAndSweep() =
-    gch.cycleThreshold = high(gch.cycleThreshold)-1
-    # set to the max value to suppress the cycle detector
-
-  proc GC_fullCollect() =
-    setStackTop(gch)
-    acquire(gch)
-    var oldThreshold = gch.cycleThreshold
-    gch.cycleThreshold = 0 # forces cycle collection
-    collectCT(gch)
-    gch.cycleThreshold = oldThreshold
-    release(gch)
-
-  proc GC_getStatistics(): string =
-    GC_disable()
-    result = "[GC] total memory: " & $(getTotalMem()) & "\n" &
-             "[GC] occupied memory: " & $(getOccupiedMem()) & "\n" &
-             "[GC] stack scans: " & $gch.stat.stackScans & "\n" &
-             "[GC] stack cells: " & $gch.stat.maxStackCells & "\n" &
-             "[GC] cycle collections: " & $gch.stat.cycleCollections & "\n" &
-             "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
-             "[GC] zct capacity: " & $gch.zct.cap & "\n" &
-             "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
-             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" &
-             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
-    when traceGC: writeLeakage(true)
-    GC_enable()
-
-{.pop.}
diff --git a/lib/system/gc_common.nim b/lib/system/gc_common.nim
new file mode 100644
index 000000000..eb0884560
--- /dev/null
+++ b/lib/system/gc_common.nim
@@ -0,0 +1,482 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Rokas Kupstys
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+type
+  ForeignCell* = object
+    data*: pointer
+    owner: ptr GcHeap
+
+proc protect*(x: pointer): ForeignCell =
+  nimGCref(x)
+  result.data = x
+  result.owner = addr(gch)
+
+when defined(nimTypeNames):
+  type InstancesInfo = array[400, (cstring, int, int)]
+  proc sortInstances(a: var InstancesInfo; n: int) =
+    # we use shellsort here; fast and simple
+    var h = 1
+    while true:
+      h = 3 * h + 1
+      if h > n: break
+    while true:
+      h = h div 3
+      for i in countup(h, n - 1):
+        var v = a[i]
+        var j = i
+        while a[j - h][2] < v[2]:
+          a[j] = a[j - h]
+          j = j - h
+          if j < h: break
+        a[j] = v
+      if h == 1: break
+
+  iterator dumpHeapInstances*(): tuple[name: cstring; count: int; sizes: int] =
+    ## Iterate over summaries of types on heaps.
+    ## This data may be inaccurate if allocations
+    ## are made by the iterator body.
+    if strDesc.nextType == nil:
+      strDesc.nextType = nimTypeRoot
+      strDesc.name = "string"
+      nimTypeRoot = addr strDesc
+    var it = nimTypeRoot
+    while it != nil:
+      if (it.instances > 0 or it.sizes != 0):
+        yield (it.name, it.instances, it.sizes)
+      it = it.nextType
+
+  proc dumpNumberOfInstances* =
+    var a: InstancesInfo
+    var n = 0
+    var totalAllocated = 0
+    for it in dumpHeapInstances():
+      a[n] = it
+      inc n
+      inc totalAllocated, it.sizes
+    sortInstances(a, n)
+    for i in 0 .. n-1:
+      c_fprintf(cstdout, "[Heap] %s: #%ld; bytes: %ld\n", a[i][0], a[i][1], a[i][2])
+    c_fprintf(cstdout, "[Heap] total number of bytes: %ld\n", totalAllocated)
+    when defined(nimTypeNames):
+      let (allocs, deallocs) = getMemCounters()
+      c_fprintf(cstdout, "[Heap] allocs/deallocs: %ld/%ld\n", allocs, deallocs)
+
+  when defined(nimGcRefLeak):
+    proc oomhandler() =
+      c_fprintf(cstdout, "[Heap] ROOTS: #%ld\n", gch.additionalRoots.len)
+      writeLeaks()
+
+    outOfMemHook = oomhandler
+
+template decTypeSize(cell, t) =
+  when defined(nimTypeNames):
+    if t.kind in {tyString, tySequence}:
+      let cap = cast[PGenericSeq](cellToUsr(cell)).space
+      let size =
+        if t.kind == tyString:
+          cap + 1 + GenericSeqSize
+        else:
+          align(GenericSeqSize, t.base.align) + cap * t.base.size
+      atomicDec t.sizes, size+sizeof(Cell)
+    else:
+      atomicDec t.sizes, t.base.size+sizeof(Cell)
+    atomicDec t.instances
+
+template incTypeSize(typ, size) =
+  when defined(nimTypeNames):
+    atomicInc typ.instances
+    atomicInc typ.sizes, size+sizeof(Cell)
+
+proc dispose*(x: ForeignCell) =
+  when hasThreadSupport:
+    # if we own it we can free it directly:
+    if x.owner == addr(gch):
+      nimGCunref(x.data)
+    else:
+      x.owner.toDispose.add(x.data)
+  else:
+    nimGCunref(x.data)
+
+proc isNotForeign*(x: ForeignCell): bool =
+  ## returns true if 'x' belongs to the calling thread.
+  ## No deep copy has to be performed then.
+  x.owner == addr(gch)
+
+when nimCoroutines:
+  iterator items(first: var GcStack): ptr GcStack =
+    var item = addr(first)
+    while true:
+      yield item
+      item = item.next
+      if item == addr(first):
+        break
+
+  proc append(first: var GcStack, stack: ptr GcStack) =
+    ## Append stack to the ring of stacks.
+    first.prev.next = stack
+    stack.prev = first.prev
+    first.prev = stack
+    stack.next = addr(first)
+
+  proc append(first: var GcStack): ptr GcStack =
+    ## Allocate new GcStack object, append it to the ring of stacks and return it.
+    result = cast[ptr GcStack](alloc0(sizeof(GcStack)))
+    first.append(result)
+
+  proc remove(first: var GcStack, stack: ptr GcStack) =
+    ## Remove stack from ring of stacks.
+    gcAssert(addr(first) != stack, "Main application stack can not be removed")
+    if addr(first) == stack or stack == nil:
+      return
+    stack.prev.next = stack.next
+    stack.next.prev = stack.prev
+    dealloc(stack)
+
+  proc remove(stack: ptr GcStack) =
+    gch.stack.remove(stack)
+
+  proc find(first: var GcStack, bottom: pointer): ptr GcStack =
+    ## Find stack struct based on bottom pointer. If `bottom` is nil then main
+    ## thread stack is is returned.
+    if bottom == nil:
+      return addr(gch.stack)
+
+    for stack in first.items():
+      if stack.bottom == bottom:
+        return stack
+
+  proc len(stack: var GcStack): int =
+    for _ in stack.items():
+      result = result + 1
+else:
+  # This iterator gets optimized out in forEachStackSlot().
+  iterator items(first: var GcStack): ptr GcStack = yield addr(first)
+  proc len(stack: var GcStack): int = 1
+
+when defined(nimdoc):
+  proc setupForeignThreadGc*() {.gcsafe.} =
+    ## Call this if you registered a callback that will be run from a thread not
+    ## under your control. This has a cheap thread-local guard, so the GC for
+    ## this thread will only be initialized once per thread, no matter how often
+    ## it is called.
+    ##
+    ## This function is available only when `--threads:on` and `--tlsEmulation:off`
+    ## switches are used
+    discard
+
+  proc tearDownForeignThreadGc*() {.gcsafe.} =
+    ## Call this to tear down the GC, previously initialized by `setupForeignThreadGc`.
+    ## If GC has not been previously initialized, or has already been torn down, the
+    ## call does nothing.
+    ##
+    ## This function is available only when `--threads:on` and `--tlsEmulation:off`
+    ## switches are used
+    discard
+elif declared(threadType):
+  proc setupForeignThreadGc*() {.gcsafe.} =
+    if threadType == ThreadType.None:
+      var stackTop {.volatile.}: pointer
+      nimGC_setStackBottom(addr(stackTop))
+      initGC()
+      threadType = ThreadType.ForeignThread
+
+  proc tearDownForeignThreadGc*() {.gcsafe.} =
+    if threadType != ThreadType.ForeignThread:
+      return
+    when declared(deallocOsPages): deallocOsPages()
+    threadType = ThreadType.None
+    when declared(gch): zeroMem(addr gch, sizeof(gch))
+
+else:
+  template setupForeignThreadGc*() =
+    {.error: "setupForeignThreadGc is available only when ``--threads:on`` and ``--tlsEmulation:off`` are used".}
+
+  template tearDownForeignThreadGc*() =
+    {.error: "tearDownForeignThreadGc is available only when ``--threads:on`` and ``--tlsEmulation:off`` are used".}
+
+# ----------------- stack management --------------------------------------
+#  inspired from Smart Eiffel
+
+when defined(emscripten) or defined(wasm):
+  const stackIncreases = true
+elif defined(sparc):
+  const stackIncreases = false
+elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
+     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
+  const stackIncreases = true
+else:
+  const stackIncreases = false
+
+proc stackSize(stack: ptr GcStack): int {.noinline.} =
+  when nimCoroutines:
+    var pos = stack.pos
+  else:
+    var pos {.volatile, noinit.}: pointer
+    pos = addr(pos)
+
+  if pos != nil:
+    when stackIncreases:
+      result = cast[int](pos) -% cast[int](stack.bottom)
+    else:
+      result = cast[int](stack.bottom) -% cast[int](pos)
+  else:
+    result = 0
+
+proc stackSize(): int {.noinline.} =
+  result = 0
+  for stack in gch.stack.items():
+    result = result + stack.stackSize()
+
+when nimCoroutines:
+  proc setPosition(stack: ptr GcStack, position: pointer) =
+    stack.pos = position
+    stack.maxStackSize = max(stack.maxStackSize, stack.stackSize())
+
+  proc setPosition(stack: var GcStack, position: pointer) =
+    setPosition(addr(stack), position)
+
+  proc getActiveStack(gch: var GcHeap): ptr GcStack =
+    return gch.activeStack
+
+  proc isActiveStack(stack: ptr GcStack): bool =
+    return gch.activeStack == stack
+else:
+  # Stack positions do not need to be tracked if coroutines are not used.
+  proc setPosition(stack: ptr GcStack, position: pointer) = discard
+  proc setPosition(stack: var GcStack, position: pointer) = discard
+  # There is just one stack - main stack of the thread. It is active always.
+  proc getActiveStack(gch: var GcHeap): ptr GcStack = addr(gch.stack)
+  proc isActiveStack(stack: ptr GcStack): bool = true
+
+{.push stack_trace: off.}
+when nimCoroutines:
+  proc GC_addStack(bottom: pointer) {.cdecl, dynlib, exportc.} =
+    # c_fprintf(stdout, "GC_addStack: %p;\n", bottom)
+    var stack = gch.stack.append()
+    stack.bottom = bottom
+    stack.setPosition(bottom)
+
+  proc GC_removeStack(bottom: pointer) {.cdecl, dynlib, exportc.} =
+    # c_fprintf(stdout, "GC_removeStack: %p;\n", bottom)
+    gch.stack.find(bottom).remove()
+
+  proc GC_setActiveStack(bottom: pointer) {.cdecl, dynlib, exportc.} =
+    ## Sets active stack and updates current stack position.
+    # c_fprintf(stdout, "GC_setActiveStack: %p;\n", bottom)
+    var sp {.volatile.}: pointer
+    gch.activeStack = gch.stack.find(bottom)
+    gch.activeStack.setPosition(addr(sp))
+
+  proc GC_getActiveStack() : pointer {.cdecl, exportc.} =
+    return gch.activeStack.bottom
+
+when not defined(useNimRtl):
+  proc nimGC_setStackBottom(theStackBottom: pointer) =
+    # Initializes main stack of the thread.
+    when nimCoroutines:
+      if gch.stack.next == nil:
+        # Main stack was not initialized yet
+        gch.stack.next = addr(gch.stack)
+        gch.stack.prev = addr(gch.stack)
+        gch.stack.bottom = theStackBottom
+        gch.stack.maxStackSize = 0
+        gch.activeStack = addr(gch.stack)
+
+    if gch.stack.bottom == nil:
+      # This branch will not be called when -d:nimCoroutines - it is fine,
+      # because same thing is done just above.
+      #c_fprintf(stdout, "stack bottom: %p;\n", theStackBottom)
+      # the first init must be the one that defines the stack bottom:
+      gch.stack.bottom = theStackBottom
+    elif theStackBottom != gch.stack.bottom:
+      var a = cast[int](theStackBottom) # and not PageMask - PageSize*2
+      var b = cast[int](gch.stack.bottom)
+      #c_fprintf(stdout, "old: %p new: %p;\n",gch.stack.bottom,theStackBottom)
+      when stackIncreases:
+        gch.stack.bottom = cast[pointer](min(a, b))
+      else:
+        gch.stack.bottom = cast[pointer](max(a, b))
+
+    when nimCoroutines:
+      if theStackBottom != nil: gch.stack.bottom = theStackBottom
+
+    gch.stack.setPosition(theStackBottom)
+{.pop.}
+
+proc isOnStack(p: pointer): bool =
+  var stackTop {.volatile, noinit.}: pointer
+  stackTop = addr(stackTop)
+  var a = cast[int](gch.getActiveStack().bottom)
+  var b = cast[int](stackTop)
+  when not stackIncreases:
+    swap(a, b)
+  var x = cast[int](p)
+  result = a <=% x and x <=% b
+
+when defined(sparc): # For SPARC architecture.
+  when nimCoroutines:
+    {.error: "Nim coroutines are not supported on this platform."}
+
+  template forEachStackSlot(gch, gcMark: untyped) {.dirty.} =
+    when defined(sparcv9):
+      asm  """"flushw \n" """
+    else:
+      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
+
+    var
+      max = gch.stack.bottom
+      sp: PPointer
+      stackTop: array[0..1, pointer]
+    sp = addr(stackTop[0])
+    # Addresses decrease as the stack grows.
+    while sp <= max:
+      gcMark(gch, sp[])
+      sp = cast[PPointer](cast[int](sp) +% sizeof(pointer))
+
+elif defined(ELATE):
+  {.error: "stack marking code is to be written for this architecture".}
+
+elif stackIncreases:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses increase as the stack grows.
+  # ---------------------------------------------------------------------------
+  when defined(emscripten) or defined(wasm):
+    var
+      jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
+        # a little hack to get the size of a JmpBuf in the generated C code
+        # in a platform independent way
+
+  template forEachStackSlotAux(gch, gcMark: untyped) {.dirty.} =
+    for stack in gch.stack.items():
+      var max = cast[int](gch.stack.bottom)
+      var sp = cast[int](addr(registers)) -% sizeof(pointer)
+      while sp >=% max:
+        gcMark(gch, cast[PPointer](sp)[])
+        sp = sp -% sizeof(pointer)
+
+  template forEachStackSlot(gch, gcMark: untyped) {.dirty.} =
+    when defined(emscripten) or defined(wasm):
+      var registers: cint
+      forEachStackSlotAux(gch, gcMark)
+    else:
+      var registers {.noinit.}: C_JmpBuf
+      if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+        forEachStackSlotAux(gch, gcMark)
+
+else:
+  # ---------------------------------------------------------------------------
+  # Generic code for architectures where addresses decrease as the stack grows.
+  # ---------------------------------------------------------------------------
+  template forEachStackSlot(gch, gcMark: untyped) {.dirty.} =
+    # We use a jmp_buf buffer that is in the C stack.
+    # Used to traverse the stack and registers assuming
+    # that 'setjmp' will save registers in the C stack.
+    type PStackSlice = ptr array[0..7, pointer]
+    var registers {.noinit.}: C_JmpBuf
+    # Update position of stack gc is executing in.
+    gch.getActiveStack().setPosition(addr(registers))
+    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
+      for stack in gch.stack.items():
+        var max = cast[int](stack.bottom)
+        var sp = cast[int](addr(registers))
+        when defined(amd64):
+          if stack.isActiveStack():
+            # words within the jmp_buf structure may not be properly aligned.
+            let regEnd = sp +% sizeof(registers)
+            while sp <% regEnd:
+              gcMark(gch, cast[PPointer](sp)[])
+              sp = sp +% sizeof(pointer)
+        # Make sure sp is word-aligned
+        sp = sp and not (sizeof(pointer) - 1)
+        # loop unrolled:
+        while sp <% max - 8*sizeof(pointer):
+          gcMark(gch, cast[PStackSlice](sp)[0])
+          gcMark(gch, cast[PStackSlice](sp)[1])
+          gcMark(gch, cast[PStackSlice](sp)[2])
+          gcMark(gch, cast[PStackSlice](sp)[3])
+          gcMark(gch, cast[PStackSlice](sp)[4])
+          gcMark(gch, cast[PStackSlice](sp)[5])
+          gcMark(gch, cast[PStackSlice](sp)[6])
+          gcMark(gch, cast[PStackSlice](sp)[7])
+          sp = sp +% sizeof(pointer)*8
+        # last few entries:
+        while sp <=% max:
+          gcMark(gch, cast[PPointer](sp)[])
+          sp = sp +% sizeof(pointer)
+
+# ----------------------------------------------------------------------------
+# end of non-portable code
+# ----------------------------------------------------------------------------
+
+proc prepareDealloc(cell: PCell) {.raises: [].} =
+  when declared(useMarkForDebug):
+    when useMarkForDebug:
+      gcAssert(cell notin gch.marked, "Cell still alive!")
+  let t = cell.typ
+  if t.finalizer != nil:
+    # the finalizer could invoke something that
+    # allocates memory; this could trigger a garbage
+    # collection. Since we are already collecting we
+    # prevent recursive entering here by a lock.
+    # XXX: we should set the cell's children to nil!
+    inc(gch.recGcLock)
+    (cast[Finalizer](t.finalizer))(cellToUsr(cell))
+    dec(gch.recGcLock)
+  decTypeSize(cell, t)
+
+proc deallocHeap*(runFinalizers = true; allowGcAfterwards = true) =
+  ## Frees the thread local heap. Runs every finalizer if `runFinalizers`
+  ## is true. If `allowGcAfterwards` is true, a minimal amount of allocation
+  ## happens to ensure the GC can continue to work after the call
+  ## to `deallocHeap`.
+  template deallocCell(x) =
+    if isCell(x):
+      # cast to PCell is correct here:
+      prepareDealloc(cast[PCell](x))
+
+  if runFinalizers:
+    when not declared(allObjectsAsProc):
+      for x in allObjects(gch.region):
+        deallocCell(x)
+    else:
+      var spaceIter: ObjectSpaceIter
+      while true:
+        let x = allObjectsAsProc(gch.region, addr spaceIter)
+        if spaceIter.state < 0: break
+        deallocCell(x)
+
+  deallocOsPages(gch.region)
+  zeroMem(addr gch.region, sizeof(gch.region))
+  if allowGcAfterwards:
+    initGC()
+
+type
+  GlobalMarkerProc = proc () {.nimcall, benign, raises: [].}
+var
+  globalMarkersLen {.exportc.}: int
+  globalMarkers {.exportc.}: array[0..3499, GlobalMarkerProc]
+  threadLocalMarkersLen {.exportc.}: int
+  threadLocalMarkers {.exportc.}: array[0..3499, GlobalMarkerProc]
+  gHeapidGenerator: int
+
+proc nimRegisterGlobalMarker(markerProc: GlobalMarkerProc) {.compilerproc.} =
+  if globalMarkersLen <= high(globalMarkers):
+    globalMarkers[globalMarkersLen] = markerProc
+    inc globalMarkersLen
+  else:
+    cstderr.rawWrite("[GC] cannot register global variable; too many global variables")
+    rawQuit 1
+
+proc nimRegisterThreadLocalMarker(markerProc: GlobalMarkerProc) {.compilerproc.} =
+  if threadLocalMarkersLen <= high(threadLocalMarkers):
+    threadLocalMarkers[threadLocalMarkersLen] = markerProc
+    inc threadLocalMarkersLen
+  else:
+    cstderr.rawWrite("[GC] cannot register thread local variable; too many thread local variables")
+    rawQuit 1
diff --git a/lib/system/gc_hooks.nim b/lib/system/gc_hooks.nim
new file mode 100644
index 000000000..ace62eea0
--- /dev/null
+++ b/lib/system/gc_hooks.nim
@@ -0,0 +1,53 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2019 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Hooks for memory management. Can be used to implement custom garbage
+## collectors etc.
+
+type
+  GlobalMarkerProc = proc () {.nimcall, benign, raises: [], tags: [].}
+var
+  globalMarkersLen: int
+  globalMarkers: array[0..3499, GlobalMarkerProc]
+  threadLocalMarkersLen: int
+  threadLocalMarkers: array[0..3499, GlobalMarkerProc]
+
+proc nimRegisterGlobalMarker(markerProc: GlobalMarkerProc) {.compilerproc.} =
+  if globalMarkersLen <= high(globalMarkers):
+    globalMarkers[globalMarkersLen] = markerProc
+    inc globalMarkersLen
+  else:
+    cstderr.rawWrite("[GC] cannot register global variable; too many global variables")
+    rawQuit 1
+
+proc nimRegisterThreadLocalMarker(markerProc: GlobalMarkerProc) {.compilerproc.} =
+  if threadLocalMarkersLen <= high(threadLocalMarkers):
+    threadLocalMarkers[threadLocalMarkersLen] = markerProc
+    inc threadLocalMarkersLen
+  else:
+    cstderr.rawWrite("[GC] cannot register thread local variable; too many thread local variables")
+    rawQuit 1
+
+proc traverseGlobals*() =
+  for i in 0..globalMarkersLen-1:
+    globalMarkers[i]()
+
+proc traverseThreadLocals*() =
+  for i in 0..threadLocalMarkersLen-1:
+    threadLocalMarkers[i]()
+
+var
+  newObjHook*: proc (typ: PNimType, size: int): pointer {.nimcall, tags: [], raises: [], gcsafe.}
+  traverseObjHook*: proc (p: pointer, op: int) {.nimcall, tags: [], raises: [], gcsafe.}
+
+proc nimGCvisit(p: pointer, op: int) {.inl, compilerRtl.} =
+  traverseObjHook(p, op)
+
+proc newObj(typ: PNimType, size: int): pointer {.inl, compilerRtl.} =
+  result = newObjHook(typ, size)
diff --git a/lib/system/gc_interface.nim b/lib/system/gc_interface.nim
new file mode 100644
index 000000000..84145f33a
--- /dev/null
+++ b/lib/system/gc_interface.nim
@@ -0,0 +1,100 @@
+# ----------------- GC interface ---------------------------------------------
+const
+  usesDestructors = defined(gcDestructors) or defined(gcHooks)
+
+when not usesDestructors:
+  {.pragma: nodestroy.}
+
+when hasAlloc:
+  type
+    GC_Strategy* = enum  ## The strategy the GC should use for the application.
+      gcThroughput,      ## optimize for throughput
+      gcResponsiveness,  ## optimize for responsiveness (default)
+      gcOptimizeTime,    ## optimize for speed
+      gcOptimizeSpace    ## optimize for memory footprint
+
+when hasAlloc and not defined(js) and not usesDestructors:
+  proc GC_disable*() {.rtl, inl, benign, raises: [].}
+    ## Disables the GC. If called `n` times, `n` calls to `GC_enable`
+    ## are needed to reactivate the GC.
+    ##
+    ## Note that in most circumstances one should only disable
+    ## the mark and sweep phase with
+    ## `GC_disableMarkAndSweep <#GC_disableMarkAndSweep>`_.
+
+  proc GC_enable*() {.rtl, inl, benign, raises: [].}
+    ## Enables the GC again.
+
+  proc GC_fullCollect*() {.rtl, benign.}
+    ## Forces a full garbage collection pass.
+    ## Ordinary code does not need to call this (and should not).
+
+  proc GC_enableMarkAndSweep*() {.rtl, benign.}
+  proc GC_disableMarkAndSweep*() {.rtl, benign.}
+    ## The current implementation uses a reference counting garbage collector
+    ## with a seldomly run mark and sweep phase to free cycles. The mark and
+    ## sweep phase may take a long time and is not needed if the application
+    ## does not create cycles. Thus the mark and sweep phase can be deactivated
+    ## and activated separately from the rest of the GC.
+
+  proc GC_getStatistics*(): string {.rtl, benign.}
+    ## Returns an informative string about the GC's activity. This may be useful
+    ## for tweaking.
+
+  proc GC_ref*[T](x: ref T) {.magic: "GCref", benign.}
+  proc GC_ref*[T](x: seq[T]) {.magic: "GCref", benign.}
+  proc GC_ref*(x: string) {.magic: "GCref", benign.}
+    ## Marks the object `x` as referenced, so that it will not be freed until
+    ## it is unmarked via `GC_unref`.
+    ## If called n-times for the same object `x`,
+    ## n calls to `GC_unref` are needed to unmark `x`.
+
+  proc GC_unref*[T](x: ref T) {.magic: "GCunref", benign.}
+  proc GC_unref*[T](x: seq[T]) {.magic: "GCunref", benign.}
+  proc GC_unref*(x: string) {.magic: "GCunref", benign.}
+    ## See the documentation of `GC_ref <#GC_ref,string>`_.
+
+  proc nimGC_setStackBottom*(theStackBottom: pointer) {.compilerRtl, noinline, benign, raises: [].}
+    ## Expands operating GC stack range to `theStackBottom`. Does nothing
+      ## if current stack bottom is already lower than `theStackBottom`.
+
+when hasAlloc and defined(js):
+  template GC_disable* =
+    {.warning: "GC_disable is a no-op in JavaScript".}
+
+  template GC_enable* =
+    {.warning: "GC_enable is a no-op in JavaScript".}
+
+  template GC_fullCollect* =
+    {.warning: "GC_fullCollect is a no-op in JavaScript".}
+
+  template GC_setStrategy* =
+    {.warning: "GC_setStrategy is a no-op in JavaScript".}
+
+  template GC_enableMarkAndSweep* =
+    {.warning: "GC_enableMarkAndSweep is a no-op in JavaScript".}
+
+  template GC_disableMarkAndSweep* =
+    {.warning: "GC_disableMarkAndSweep is a no-op in JavaScript".}
+
+  template GC_ref*[T](x: ref T) =
+    {.warning: "GC_ref is a no-op in JavaScript".}
+
+  template GC_ref*[T](x: seq[T]) =
+    {.warning: "GC_ref is a no-op in JavaScript".}
+
+  template GC_ref*(x: string) =
+    {.warning: "GC_ref is a no-op in JavaScript".}
+
+  template GC_unref*[T](x: ref T) =
+    {.warning: "GC_unref is a no-op in JavaScript".}
+
+  template GC_unref*[T](x: seq[T]) =
+    {.warning: "GC_unref is a no-op in JavaScript".}
+
+  template GC_unref*(x: string) =
+    {.warning: "GC_unref is a no-op in JavaScript".}
+
+  template GC_getStatistics*(): string =
+    {.warning: "GC_getStatistics is a no-op in JavaScript".}
+    ""
diff --git a/lib/system/gc_ms.nim b/lib/system/gc_ms.nim
index 9ebc27a9f..c885a6893 100644
--- a/lib/system/gc_ms.nim
+++ b/lib/system/gc_ms.nim
@@ -1,143 +1,204 @@
 #
 #
-#            Nimrod's Runtime Library
-#        (c) Copyright 2013 Andreas Rumpf
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
 #
 
-# A simple mark&sweep garbage collector for Nimrod. Define the 
+# A simple mark&sweep garbage collector for Nim. Define the
 # symbol ``gcUseBitvectors`` to generate a variant of this GC.
+
 {.push profiler:off.}
 
 const
   InitialThreshold = 4*1024*1024 # X MB because marking&sweeping is slow
-  withBitvectors = defined(gcUseBitvectors) 
+  withBitvectors = defined(gcUseBitvectors)
   # bitvectors are significantly faster for GC-bench, but slower for
   # bootstrapping and use more memory
   rcWhite = 0
   rcGrey = 1   # unused
   rcBlack = 2
 
-template mulThreshold(x): expr {.immediate.} = x * 2
+template mulThreshold(x): untyped = x * 2
 
 when defined(memProfiler):
   proc nimProfile(requestedSize: int)
 
+when hasThreadSupport:
+  import std/sharedlist
+
 type
-  TWalkOp = enum
+  WalkOp = enum
     waMarkGlobal,  # we need to mark conservatively for global marker procs
                    # as these may refer to a global var and not to a thread
-                   # local 
+                   # local
     waMarkPrecise  # fast precise marking
 
-  TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall.}
+  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign, raises: [].}
     # A ref type can have a finalizer that is called before the object's
     # storage is freed.
-  
-  TGlobalMarkerProc = proc () {.nimcall.}
 
-  TGcStat = object
+  GcStat = object
     collections: int         # number of performed full collections
     maxThreshold: int        # max threshold that has been set
     maxStackSize: int        # max stack size
-    freedObjects: int        # max entries in cycle table  
-  
-  TGcHeap = object           # this contains the zero count and
+    freedObjects: int        # max entries in cycle table
+
+  GcStack {.final, pure.} = object
+    when nimCoroutines:
+      prev: ptr GcStack
+      next: ptr GcStack
+      maxStackSize: int      # Used to track statistics because we can not use
+                             # GcStat.maxStackSize when multiple stacks exist.
+    bottom: pointer
+
+    when nimCoroutines:
+      pos: pointer
+
+  GcHeap = object            # this contains the zero count and
                              # non-zero count table
-    stackBottom: pointer
+    stack: GcStack
+    when nimCoroutines:
+      activeStack: ptr GcStack    # current executing coroutine stack.
     cycleThreshold: int
+    when useCellIds:
+      idGenerator: int
     when withBitvectors:
-      allocated, marked: TCellSet
-    tempStack: TCellSeq      # temporary stack for recursion elimination
+      allocated, marked: CellSet
+    tempStack: CellSeq       # temporary stack for recursion elimination
     recGcLock: int           # prevent recursion via finalizers; no thread lock
-    region: TMemRegion       # garbage collected region
-    stat: TGcStat
+    region: MemRegion        # garbage collected region
+    stat: GcStat
+    when hasThreadSupport:
+      toDispose: SharedList[pointer]
+    gcThreadId: int
+    additionalRoots: CellSeq # dummy roots for GC_ref/unref
+    when defined(nimTracing):
+      tracing: bool
+      indentation: int
 
 var
-  gch {.rtlThreadVar.}: TGcHeap
+  gch {.rtlThreadVar.}: GcHeap
 
 when not defined(useNimRtl):
-  InstantiateForRegion(gch.region)
-
-template acquire(gch: TGcHeap) = 
-  when hasThreadSupport and hasSharedHeap:
-    AcquireSys(HeapLock)
-
-template release(gch: TGcHeap) = 
-  when hasThreadSupport and hasSharedHeap:
-    releaseSys(HeapLock)
+  instantiateForRegion(gch.region)
 
 template gcAssert(cond: bool, msg: string) =
   when defined(useGcAssert):
     if not cond:
-      echo "[GCASSERT] ", msg
-      quit 1
+      cstderr.rawWrite "[GCASSERT] "
+      cstderr.rawWrite msg
+      rawQuit 1
 
 proc cellToUsr(cell: PCell): pointer {.inline.} =
   # convert object (=pointer to refcount) to pointer to userdata
-  result = cast[pointer](cast[TAddress](cell)+%TAddress(sizeof(TCell)))
+  result = cast[pointer](cast[int](cell)+%ByteAddress(sizeof(Cell)))
 
 proc usrToCell(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
-  result = cast[PCell](cast[TAddress](usr)-%TAddress(sizeof(TCell)))
-
-proc canbeCycleRoot(c: PCell): bool {.inline.} =
-  result = ntfAcyclic notin c.typ.flags
+  result = cast[PCell](cast[int](usr)-%ByteAddress(sizeof(Cell)))
 
 proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
   # used for code generation concerning debugging
   result = usrToCell(c).typ
 
-proc unsureAsgnRef(dest: ppointer, src: pointer) {.inline.} =
+proc unsureAsgnRef(dest: PPointer, src: pointer) {.inline, compilerproc.} =
   dest[] = src
 
 proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
   result = 0
 
-var
-  globalMarkersLen: int
-  globalMarkers: array[0.. 7_000, TGlobalMarkerProc]
-
-proc nimRegisterGlobalMarker(markerProc: TGlobalMarkerProc) {.compilerProc.} =
-  if globalMarkersLen <= high(globalMarkers):
-    globalMarkers[globalMarkersLen] = markerProc
-    inc globalMarkersLen
-  else:
-    echo "[GC] cannot register global variable; too many global variables"
-    quit 1
-
 # this that has to equals zero, otherwise we have to round up UnitsPerPage:
 when BitsPerPage mod (sizeof(int)*8) != 0:
   {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
 
 # forward declarations:
-proc collectCT(gch: var TGcHeap)
-proc IsOnStack*(p: pointer): bool {.noinline.}
-proc forAllChildren(cell: PCell, op: TWalkOp)
-proc doOperation(p: pointer, op: TWalkOp)
-proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp)
+proc collectCT(gch: var GcHeap; size: int) {.benign, raises: [].}
+proc forAllChildren(cell: PCell, op: WalkOp) {.benign, raises: [].}
+proc doOperation(p: pointer, op: WalkOp) {.benign, raises: [].}
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign, raises: [].}
 # we need the prototype here for debugging purposes
 
-proc prepareDealloc(cell: PCell) =
-  if cell.typ.finalizer != nil:
-    # the finalizer could invoke something that
-    # allocates memory; this could trigger a garbage
-    # collection. Since we are already collecting we
-    # prevend recursive entering here by a lock.
-    # XXX: we should set the cell's children to nil!
-    inc(gch.recGcLock)
-    (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell))
-    dec(gch.recGcLock)
+when defined(nimGcRefLeak):
+  const
+    MaxTraceLen = 20 # tracking the last 20 calls is enough
+
+  type
+    GcStackTrace = object
+      lines: array[0..MaxTraceLen-1, cstring]
+      files: array[0..MaxTraceLen-1, cstring]
 
-proc nimGCref(p: pointer) {.compilerProc, inline.} = 
+  proc captureStackTrace(f: PFrame, st: var GcStackTrace) =
+    const
+      firstCalls = 5
+    var
+      it = f
+      i = 0
+      total = 0
+    while it != nil and i <= high(st.lines)-(firstCalls-1):
+      # the (-1) is for the "..." entry
+      st.lines[i] = it.procname
+      st.files[i] = it.filename
+      inc(i)
+      inc(total)
+      it = it.prev
+    var b = it
+    while it != nil:
+      inc(total)
+      it = it.prev
+    for j in 1..total-i-(firstCalls-1):
+      if b != nil: b = b.prev
+    if total != i:
+      st.lines[i] = "..."
+      st.files[i] = "..."
+      inc(i)
+    while b != nil and i <= high(st.lines):
+      st.lines[i] = b.procname
+      st.files[i] = b.filename
+      inc(i)
+      b = b.prev
+
+  var ax: array[10_000, GcStackTrace]
+
+proc nimGCref(p: pointer) {.compilerproc.} =
   # we keep it from being collected by pretending it's not even allocated:
-  when withBitvectors: excl(gch.allocated, usrToCell(p))
-  else: usrToCell(p).refcount = rcBlack
-proc nimGCunref(p: pointer) {.compilerProc, inline.} = 
-  when withBitvectors: incl(gch.allocated, usrToCell(p))
-  else: usrToCell(p).refcount = rcWhite
+  when false:
+    when withBitvectors: excl(gch.allocated, usrToCell(p))
+    else: usrToCell(p).refcount = rcBlack
+  when defined(nimGcRefLeak):
+    captureStackTrace(framePtr, ax[gch.additionalRoots.len])
+  add(gch.additionalRoots, usrToCell(p))
+
+proc nimGCunref(p: pointer) {.compilerproc.} =
+  let cell = usrToCell(p)
+  var L = gch.additionalRoots.len-1
+  var i = L
+  let d = gch.additionalRoots.d
+  while i >= 0:
+    if d[i] == cell:
+      d[i] = d[L]
+      when defined(nimGcRefLeak):
+        ax[i] = ax[L]
+      dec gch.additionalRoots.len
+      break
+    dec(i)
+  when false:
+    when withBitvectors: incl(gch.allocated, usrToCell(p))
+    else: usrToCell(p).refcount = rcWhite
+
+when defined(nimGcRefLeak):
+  proc writeLeaks() =
+    for i in 0..gch.additionalRoots.len-1:
+      c_fprintf(stdout, "[Heap] NEW STACK TRACE\n")
+      for ii in 0..MaxTraceLen-1:
+        let line = ax[i].lines[ii]
+        let file = ax[i].files[ii]
+        if isNil(line): break
+        c_fprintf(stdout, "[Heap] %s(%s)\n", file, line)
+
+include gc_common
 
 proc initGC() =
   when not defined(useNimRtl):
@@ -146,12 +207,17 @@ proc initGC() =
     gch.stat.maxThreshold = 0
     gch.stat.maxStackSize = 0
     init(gch.tempStack)
+    init(gch.additionalRoots)
     when withBitvectors:
-      Init(gch.allocated)
+      init(gch.allocated)
       init(gch.marked)
+    when hasThreadSupport:
+      init(gch.toDispose)
+    gch.gcThreadId = atomicInc(gHeapidGenerator) - 1
+    gcAssert(gch.gcThreadId >= 0, "invalid computed thread ID")
 
-proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
-  var d = cast[TAddress](dest)
+proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
+  var d = cast[int](dest)
   case n.kind
   of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
   of nkList:
@@ -162,21 +228,21 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) =
     if m != nil: forAllSlotsAux(dest, m, op)
   of nkNone: sysAssert(false, "forAllSlotsAux")
 
-proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) =
-  var d = cast[TAddress](dest)
+proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
+  var d = cast[int](dest)
   if dest == nil: return # nothing to do
   if ntfNoRefs notin mt.flags:
-    case mt.Kind
+    case mt.kind
     of tyRef, tyString, tySequence: # leaf:
-      doOperation(cast[ppointer](d)[], op)
+      doOperation(cast[PPointer](d)[], op)
     of tyObject, tyTuple:
       forAllSlotsAux(dest, mt.node, op)
     of tyArray, tyArrayConstr, tyOpenArray:
       for i in 0..(mt.size div mt.base.size)-1:
         forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op)
-    else: nil
+    else: discard
 
-proc forAllChildren(cell: PCell, op: TWalkOp) =
+proc forAllChildren(cell: PCell, op: WalkOp) =
   gcAssert(cell != nil, "forAllChildren: 1")
   gcAssert(cell.typ != nil, "forAllChildren: 2")
   gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3"
@@ -184,25 +250,25 @@ proc forAllChildren(cell: PCell, op: TWalkOp) =
   if marker != nil:
     marker(cellToUsr(cell), op.int)
   else:
-    case cell.typ.Kind
+    case cell.typ.kind
     of tyRef: # common case
       forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
     of tySequence:
-      var d = cast[TAddress](cellToUsr(cell))
-      var s = cast[PGenericSeq](d)
-      if s != nil:
-        for i in 0..s.len-1:
-          forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +%
-            GenericSeqSize), cell.typ.base, op)
-    else: nil
-
-proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
+      when not defined(nimSeqsV2):
+        var d = cast[int](cellToUsr(cell))
+        var s = cast[PGenericSeq](d)
+        if s != nil:
+          for i in 0..s.len-1:
+            forAllChildrenAux(cast[pointer](d +% align(GenericSeqSize, cell.typ.base.align) +% i *% cell.typ.base.size), cell.typ.base, op)
+    else: discard
+
+proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
   # generates a new object and sets its reference counter to 0
-  acquire(gch)
+  incTypeSize typ, size
   gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
-  collectCT(gch)
-  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell)))
-  gcAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
+  collectCT(gch, size + sizeof(Cell))
+  var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
+  gcAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
   # now it is buffered in the ZCT
   res.typ = typ
   when leakDetector and not hasThreadSupport:
@@ -210,10 +276,17 @@ proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
       res.filename = framePtr.prev.filename
       res.line = framePtr.prev.line
   res.refcount = 0
-  release(gch)
   when withBitvectors: incl(gch.allocated, res)
+  when useCellIds:
+    inc gch.idGenerator
+    res.id = gch.idGenerator
   result = cellToUsr(res)
 
+when useCellIds:
+  proc getCellId*[T](x: ref T): int =
+    let p = usrToCell(cast[pointer](x))
+    result = p.id
+
 {.pop.}
 
 proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
@@ -221,59 +294,69 @@ proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
   zeroMem(result, size)
   when defined(memProfiler): nimProfile(size)
 
-proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  # `newObj` already uses locks, so no need for them here.
-  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
-  result = newObj(typ, size)
-  cast[PGenericSeq](result).len = len
-  cast[PGenericSeq](result).reserved = len
+proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  result = rawNewObj(typ, size, gch)
   when defined(memProfiler): nimProfile(size)
 
 proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
   result = rawNewObj(typ, size, gch)
   zeroMem(result, size)
   when defined(memProfiler): nimProfile(size)
-  
-proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
-  result = newObj(typ, size)
-  cast[PGenericSeq](result).len = len
-  cast[PGenericSeq](result).reserved = len
-  when defined(memProfiler): nimProfile(size)
-  
-proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
-  acquire(gch)
-  collectCT(gch)
-  var ol = usrToCell(old)
-  sysAssert(ol.typ != nil, "growObj: 1")
-  gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
-  
-  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell)))
-  var elemSize = 1
-  if ol.typ.kind != tyString: elemSize = ol.typ.base.size
-  
-  var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
-  copyMem(res, ol, oldsize + sizeof(TCell))
-  zeroMem(cast[pointer](cast[TAddress](res)+% oldsize +% sizeof(TCell)),
-          newsize-oldsize)
-  sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
-  when withBitvectors: excl(gch.allocated, ol)
-  when reallyDealloc: rawDealloc(gch.region, ol)
-  else:
-    zeroMem(ol, sizeof(TCell))
-  when withBitvectors: incl(gch.allocated, res)
-  release(gch)
-  result = cellToUsr(res)
-  when defined(memProfiler): nimProfile(newsize-oldsize)
 
-proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
-  result = growObj(old, newsize, gch)
+when not defined(nimSeqsV2):
+  {.push overflowChecks: on.}
+  proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
+    # `newObj` already uses locks, so no need for them here.
+    let size = align(GenericSeqSize, typ.base.align) + len * typ.base.size
+    result = newObj(typ, size)
+    cast[PGenericSeq](result).len = len
+    cast[PGenericSeq](result).reserved = len
+    when defined(memProfiler): nimProfile(size)
+
+  proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
+    let size = align(GenericSeqSize, typ.base.align) + len * typ.base.size
+    result = newObj(typ, size)
+    cast[PGenericSeq](result).len = len
+    cast[PGenericSeq](result).reserved = len
+    when defined(memProfiler): nimProfile(size)
+  {.pop.}
+
+  proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
+    collectCT(gch, newsize + sizeof(Cell))
+    var ol = usrToCell(old)
+    sysAssert(ol.typ != nil, "growObj: 1")
+    gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
+
+    var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
+    var elemSize, elemAlign = 1
+    if ol.typ.kind != tyString:
+      elemSize = ol.typ.base.size
+      elemAlign = ol.typ.base.align
+    incTypeSize ol.typ, newsize
+
+    var oldsize = align(GenericSeqSize, elemAlign) + cast[PGenericSeq](old).len*elemSize
+    copyMem(res, ol, oldsize + sizeof(Cell))
+    zeroMem(cast[pointer](cast[int](res)+% oldsize +% sizeof(Cell)),
+            newsize-oldsize)
+    sysAssert((cast[int](res) and (MemAlign-1)) == 0, "growObj: 3")
+    when withBitvectors: incl(gch.allocated, res)
+    when useCellIds:
+      inc gch.idGenerator
+      res.id = gch.idGenerator
+    result = cellToUsr(res)
+    when defined(memProfiler): nimProfile(newsize-oldsize)
+
+  proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
+    result = growObj(old, newsize, gch)
 
 {.push profiler:off.}
 
 # ----------------- collector -----------------------------------------------
 
-proc mark(gch: var TGcHeap, c: PCell) =
+proc mark(gch: var GcHeap, c: PCell) =
+  when hasThreadSupport:
+    for c in gch.toDispose:
+      nimGCunref(c)
   when withBitvectors:
     incl(gch.marked, c)
     gcAssert gch.tempStack.len == 0, "stack not empty!"
@@ -284,42 +367,55 @@ proc mark(gch: var TGcHeap, c: PCell) =
       if not containsOrIncl(gch.marked, d):
         forAllChildren(d, waMarkPrecise)
   else:
-    c.refCount = rcBlack
+    # XXX no 'if c.refCount != rcBlack' here?
+    when defined(nimTracing):
+      if gch.tracing:
+        for i in 1..gch.indentation: c_fprintf(stdout, " ")
+        c_fprintf(stdout, "start marking %p of type %s ((\n",
+                  c, c.typ.name)
+        inc gch.indentation, 2
+
+    c.refcount = rcBlack
     gcAssert gch.tempStack.len == 0, "stack not empty!"
     forAllChildren(c, waMarkPrecise)
     while gch.tempStack.len > 0:
       dec gch.tempStack.len
       var d = gch.tempStack.d[gch.tempStack.len]
       if d.refcount == rcWhite:
-        d.refCount = rcBlack
+        d.refcount = rcBlack
         forAllChildren(d, waMarkPrecise)
 
-proc doOperation(p: pointer, op: TWalkOp) =
+    when defined(nimTracing):
+      if gch.tracing:
+        dec gch.indentation, 2
+        for i in 1..gch.indentation: c_fprintf(stdout, " ")
+        c_fprintf(stdout, "finished marking %p of type %s))\n",
+                  c, c.typ.name)
+
+proc doOperation(p: pointer, op: WalkOp) =
   if p == nil: return
   var c: PCell = usrToCell(p)
   gcAssert(c != nil, "doOperation: 1")
   case op
-  of waMarkGlobal:
-    when hasThreadSupport:
-      # could point to a cell which we don't own and don't want to touch/trace
-      if isAllocatedPtr(gch.region, c):
-        mark(gch, c)
+  of waMarkGlobal: mark(gch, c)
+  of waMarkPrecise:
+    when defined(nimTracing):
+      if c.refcount == rcWhite: mark(gch, c)
     else:
-      mark(gch, c)
-  of waMarkPrecise: add(gch.tempStack, c)
+      add(gch.tempStack, c)
 
 proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
-  doOperation(d, TWalkOp(op))
+  doOperation(d, WalkOp(op))
 
-proc freeCyclicCell(gch: var TGcHeap, c: PCell) =
+proc freeCyclicCell(gch: var GcHeap, c: PCell) =
   inc gch.stat.freedObjects
   prepareDealloc(c)
   when reallyDealloc: rawDealloc(gch.region, c)
   else:
     gcAssert(c.typ != nil, "freeCyclicCell")
-    zeroMem(c, sizeof(TCell))
+    zeroMem(c, sizeof(Cell))
 
-proc sweep(gch: var TGcHeap) =
+proc sweep(gch: var GcHeap) =
   when withBitvectors:
     for c in gch.allocated.elementsExcept(gch.marked):
       gch.allocated.excl(c)
@@ -332,154 +428,45 @@ proc sweep(gch: var TGcHeap) =
         if c.refcount == rcBlack: c.refcount = rcWhite
         else: freeCyclicCell(gch, c)
 
-proc markGlobals(gch: var TGcHeap) =
-  for i in 0 .. < globalMarkersLen: globalMarkers[i]()
-
-proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} =
+proc markGlobals(gch: var GcHeap) =
+  if gch.gcThreadId == 0:
+    when defined(nimTracing):
+      if gch.tracing:
+        c_fprintf(stdout, "------- globals marking phase:\n")
+    for i in 0 .. globalMarkersLen-1: globalMarkers[i]()
+  when defined(nimTracing):
+    if gch.tracing:
+      c_fprintf(stdout, "------- thread locals marking phase:\n")
+  for i in 0 .. threadLocalMarkersLen-1: threadLocalMarkers[i]()
+  when defined(nimTracing):
+    if gch.tracing:
+      c_fprintf(stdout, "------- additional roots marking phase:\n")
+  let d = gch.additionalRoots.d
+  for i in 0 .. gch.additionalRoots.len-1: mark(gch, d[i])
+
+proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
-  var cell = usrToCell(p)
-  var c = cast[TAddress](cell)
+  var c = cast[int](p)
   if c >% PageSize:
     # fast check: does it look like a cell?
-    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
+    var objStart = cast[PCell](interiorAllocatedPtr(gch.region, p))
     if objStart != nil:
       mark(gch, objStart)
-  
-# ----------------- stack management --------------------------------------
-#  inspired from Smart Eiffel
-
-when defined(sparc):
-  const stackIncreases = false
-elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
-     defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
-  const stackIncreases = true
-else:
-  const stackIncreases = false
 
-when not defined(useNimRtl):
-  {.push stack_trace: off.}
-  proc setStackBottom(theStackBottom: pointer) =
-    #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
-    # the first init must be the one that defines the stack bottom:
-    if gch.stackBottom == nil: gch.stackBottom = theStackBottom
-    else:
-      var a = cast[TAddress](theStackBottom) # and not PageMask - PageSize*2
-      var b = cast[TAddress](gch.stackBottom)
-      #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
-      when stackIncreases:
-        gch.stackBottom = cast[pointer](min(a, b))
-      else:
-        gch.stackBottom = cast[pointer](max(a, b))
-  {.pop.}
+proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
+  forEachStackSlot(gch, gcMark)
 
-proc stackSize(): int {.noinline.} =
-  var stackTop {.volatile.}: pointer
-  result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
-
-when defined(sparc): # For SPARC architecture.
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[TAddress](gch.stackBottom)
-    var a = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
-    when defined(sparcv9):
-      asm  """"flushw \n" """
-    else:
-      asm  """"ta      0x3   ! ST_FLUSH_WINDOWS\n" """
-
-    var
-      max = gch.stackBottom
-      sp: PPointer
-      stackTop: array[0..1, pointer]
-    sp = addr(stackTop[0])
-    # Addresses decrease as the stack grows.
-    while sp <= max:
-      gcMark(gch, sp[])
-      sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer))
-
-elif defined(ELATE):
-  {.error: "stack marking code is to be written for this architecture".}
-
-elif stackIncreases:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses increase as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var a = cast[TAddress](gch.stackBottom)
-    var b = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-
-  var
-    jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
-      # a little hack to get the size of a TJmpBuf in the generated C code
-      # in a platform independant way
-
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[TAddress](gch.stackBottom)
-      var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
-      # sp will traverse the JMP_BUF as well (jmp_buf size is added,
-      # otherwise sp would be below the registers structure).
-      while sp >=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp -% sizeof(pointer)
-
-else:
-  # ---------------------------------------------------------------------------
-  # Generic code for architectures where addresses decrease as the stack grows.
-  # ---------------------------------------------------------------------------
-  proc isOnStack(p: pointer): bool =
-    var stackTop {.volatile.}: pointer
-    stackTop = addr(stackTop)
-    var b = cast[TAddress](gch.stackBottom)
-    var a = cast[TAddress](stackTop)
-    var x = cast[TAddress](p)
-    result = a <=% x and x <=% b
-
-  proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} =
-    # We use a jmp_buf buffer that is in the C stack.
-    # Used to traverse the stack and registers assuming
-    # that 'setjmp' will save registers in the C stack.
-    type PStackSlice = ptr array [0..7, pointer]
-    var registers: C_JmpBuf
-    if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
-      var max = cast[TAddress](gch.stackBottom)
-      var sp = cast[TAddress](addr(registers))
-      # loop unrolled:
-      while sp <% max - 8*sizeof(pointer):
-        gcMark(gch, cast[PStackSlice](sp)[0])
-        gcMark(gch, cast[PStackSlice](sp)[1])
-        gcMark(gch, cast[PStackSlice](sp)[2])
-        gcMark(gch, cast[PStackSlice](sp)[3])
-        gcMark(gch, cast[PStackSlice](sp)[4])
-        gcMark(gch, cast[PStackSlice](sp)[5])
-        gcMark(gch, cast[PStackSlice](sp)[6])
-        gcMark(gch, cast[PStackSlice](sp)[7])
-        sp = sp +% sizeof(pointer)*8
-      # last few entries:
-      while sp <=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp +% sizeof(pointer)
-
-# ----------------------------------------------------------------------------
-# end of non-portable code
-# ----------------------------------------------------------------------------
-
-proc collectCTBody(gch: var TGcHeap) =
-  gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+proc collectCTBody(gch: var GcHeap) =
+  when not nimCoroutines:
+    gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
+  when defined(nimTracing):
+    if gch.tracing:
+      c_fprintf(stdout, "------- stack marking phase:\n")
   prepareForInteriorPointerChecking(gch.region)
   markStackAndRegisters(gch)
   markGlobals(gch)
   sweep(gch)
-  
+
   inc(gch.stat.collections)
   when withBitvectors:
     deinit(gch.marked)
@@ -487,49 +474,53 @@ proc collectCTBody(gch: var TGcHeap) =
   gch.cycleThreshold = max(InitialThreshold, getOccupiedMem().mulThreshold)
   gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
   sysAssert(allocInv(gch.region), "collectCT: end")
-  
-proc collectCT(gch: var TGcHeap) =
-  if getOccupiedMem(gch.region) >= gch.cycleThreshold and gch.recGcLock == 0:
+
+proc collectCT(gch: var GcHeap; size: int) =
+  let fmem = getFreeMem(gch.region)
+  if (getOccupiedMem(gch.region) >= gch.cycleThreshold or
+      size > fmem and fmem > InitialThreshold) and gch.recGcLock == 0:
     collectCTBody(gch)
 
 when not defined(useNimRtl):
-  proc GC_disable() = 
-    when hasThreadSupport and hasSharedHeap:
-      atomicInc(gch.recGcLock, 1)
-    else:
-      inc(gch.recGcLock)
+  proc GC_disable() =
+    inc(gch.recGcLock)
   proc GC_enable() =
-    if gch.recGcLock > 0: 
-      when hasThreadSupport and hasSharedHeap:
-        atomicDec(gch.recGcLock, 1)
-      else:
-        dec(gch.recGcLock)
+    when defined(nimDoesntTrackDefects):
+      if gch.recGcLock <= 0:
+        raise newException(AssertionDefect,
+            "API usage error: GC_enable called but GC is already enabled")
+    dec(gch.recGcLock)
 
-  proc GC_setStrategy(strategy: TGC_Strategy) = nil
+  proc GC_setStrategy(strategy: GC_Strategy) = discard
 
   proc GC_enableMarkAndSweep() =
     gch.cycleThreshold = InitialThreshold
 
   proc GC_disableMarkAndSweep() =
-    gch.cycleThreshold = high(gch.cycleThreshold)-1
+    gch.cycleThreshold = high(typeof(gch.cycleThreshold))-1
     # set to the max value to suppress the cycle detector
 
+  when defined(nimTracing):
+    proc GC_logTrace*() =
+      gch.tracing = true
+
   proc GC_fullCollect() =
-    acquire(gch)
-    var oldThreshold = gch.cycleThreshold
+    let oldThreshold = gch.cycleThreshold
     gch.cycleThreshold = 0 # forces cycle collection
-    collectCT(gch)
+    collectCT(gch, 0)
     gch.cycleThreshold = oldThreshold
-    release(gch)
 
   proc GC_getStatistics(): string =
-    GC_disable()
     result = "[GC] total memory: " & $getTotalMem() & "\n" &
              "[GC] occupied memory: " & $getOccupiedMem() & "\n" &
              "[GC] collections: " & $gch.stat.collections & "\n" &
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
-             "[GC] freed objects: " & $gch.stat.freedObjects & "\n" &
-             "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
-    GC_enable()
+             "[GC] freed objects: " & $gch.stat.freedObjects & "\n"
+    when nimCoroutines:
+      result.add "[GC] number of stacks: " & $gch.stack.len & "\n"
+      for stack in items(gch.stack):
+        result.add "[GC]   stack " & stack.bottom.repr & "[GC]     max stack size " & $stack.maxStackSize & "\n"
+    else:
+      result.add "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
 
 {.pop.}
diff --git a/lib/system/gc_regions.nim b/lib/system/gc_regions.nim
new file mode 100644
index 000000000..d96de7eac
--- /dev/null
+++ b/lib/system/gc_regions.nim
@@ -0,0 +1,442 @@
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2016 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# "Stack GC" for embedded devices or ultra performance requirements.
+import std/private/syslocks
+
+when defined(memProfiler):
+  proc nimProfile(requestedSize: int) {.benign.}
+
+when defined(useMalloc):
+  proc roundup(x, v: int): int {.inline.} =
+    result = (x + (v-1)) and not (v-1)
+  proc emalloc(size: int): pointer {.importc: "malloc", header: "<stdlib.h>".}
+  proc efree(mem: pointer) {.importc: "free", header: "<stdlib.h>".}
+
+  proc osAllocPages(size: int): pointer {.inline.} =
+    emalloc(size)
+
+  proc osTryAllocPages(size: int): pointer {.inline.} =
+    emalloc(size)
+
+  proc osDeallocPages(p: pointer, size: int) {.inline.} =
+    efree(p)
+
+else:
+  include osalloc
+
+# We manage memory as a thread local stack. Since the allocation pointer
+# is detached from the control flow pointer, this model is vastly more
+# useful than the traditional programming model while almost as safe.
+# Individual objects can also be deleted but no coalescing is performed.
+# Stacks can also be moved from one thread to another.
+
+# We also support 'finalizers'.
+
+type
+  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
+    # A ref type can have a finalizer that is called before the object's
+    # storage is freed.
+
+  AlignType = BiggestFloat
+  ObjHeader = object
+    typ: PNimType
+    nextFinal: ptr ObjHeader # next object with finalizer
+
+  Chunk = ptr BaseChunk
+  BaseChunk = object
+    next: Chunk
+    size: int
+    head, tail: ptr ObjHeader # first and last object in chunk that
+                              # has a finalizer attached to it
+
+const
+  MaxSmallObject = 128
+
+type
+  FreeEntry = ptr object
+    next: FreeEntry
+  SizedFreeEntry = ptr object
+    next: SizedFreeEntry
+    size: int
+  StackPtr = object
+    bump: pointer
+    remaining: int
+    current: Chunk
+
+  MemRegion* = object
+    remaining: int
+    bump: pointer
+    head, tail: Chunk
+    nextChunkSize, totalSize: int
+    when false:
+      freeLists: array[MaxSmallObject div MemAlign, FreeEntry]
+      holes: SizedFreeEntry
+    when hasThreadSupport:
+      lock: SysLock
+
+  SeqHeader = object # minor hack ahead: Since we know that seqs
+                     # and strings cannot have finalizers, we use the field
+                     # instead for a 'region' field so that they can grow
+                     # and shrink safely.
+    typ: PNimType
+    region: ptr MemRegion
+
+var
+  tlRegion {.threadvar.}: MemRegion
+#  tempStrRegion {.threadvar.}: MemRegion  # not yet used
+
+template withRegion*(r: var MemRegion; body: untyped) =
+  let oldRegion = tlRegion
+  tlRegion = r
+  try:
+    body
+  finally:
+    r = tlRegion
+    tlRegion = oldRegion
+
+template inc(p: pointer, s: int) =
+  p = cast[pointer](cast[int](p) +% s)
+
+template dec(p: pointer, s: int) =
+  p = cast[pointer](cast[int](p) -% s)
+
+template `+!`(p: pointer, s: int): pointer =
+  cast[pointer](cast[int](p) +% s)
+
+template `-!`(p: pointer, s: int): pointer =
+  cast[pointer](cast[int](p) -% s)
+
+const nimMinHeapPages {.intdefine.} = 4
+
+proc allocSlowPath(r: var MemRegion; size: int) =
+  # we need to ensure that the underlying linked list
+  # stays small. Say we want to grab 16GB of RAM with some
+  # exponential growth function. So we allocate 16KB, then
+  # 32 KB, 64 KB, 128KB, 256KB, 512KB, 1MB, 2MB, 4MB,
+  # 8MB, 16MB, 32MB, 64MB, 128MB, 512MB, 1GB, 2GB, 4GB, 8GB,
+  # 16GB --> list contains only 20 elements! That's reasonable.
+  if (r.totalSize and 1) == 0:
+    r.nextChunkSize = if r.totalSize < 64 * 1024: PageSize*nimMinHeapPages
+                      else: r.nextChunkSize*2
+  var s = roundup(size+sizeof(BaseChunk), PageSize)
+  var fresh: Chunk
+  if s > r.nextChunkSize:
+    fresh = cast[Chunk](osAllocPages(s))
+  else:
+    fresh = cast[Chunk](osTryAllocPages(r.nextChunkSize))
+    if fresh == nil:
+      fresh = cast[Chunk](osAllocPages(s))
+      # lowest bit in totalSize is the "don't increase nextChunkSize"
+      inc r.totalSize
+    else:
+      s = r.nextChunkSize
+  fresh.size = s
+  fresh.head = nil
+  fresh.tail = nil
+  fresh.next = nil
+  inc r.totalSize, s
+  let old = r.tail
+  if old == nil:
+    r.head = fresh
+  else:
+    r.tail.next = fresh
+  r.bump = fresh +! sizeof(BaseChunk)
+  r.tail = fresh
+  r.remaining = s - sizeof(BaseChunk)
+
+proc allocFast(r: var MemRegion; size: int): pointer =
+  when false:
+    if size <= MaxSmallObject:
+      var it = r.freeLists[size div MemAlign]
+      if it != nil:
+        r.freeLists[size div MemAlign] = it.next
+        return pointer(it)
+    else:
+      var it = r.holes
+      var prev: SizedFreeEntry = nil
+      while it != nil:
+        if it.size >= size:
+          if prev != nil: prev.next = it.next
+          else: r.holes = it.next
+          return pointer(it)
+        prev = it
+        it = it.next
+  let size = roundup(size, MemAlign)
+  if size > r.remaining:
+    allocSlowPath(r, size)
+  sysAssert(size <= r.remaining, "size <= r.remaining")
+  dec(r.remaining, size)
+  result = r.bump
+  inc r.bump, size
+
+proc runFinalizers(c: Chunk) =
+  var it = c.head
+  while it != nil:
+    # indivually freed objects with finalizer stay in the list, but
+    # their typ is nil then:
+    if it.typ != nil and it.typ.finalizer != nil:
+      (cast[Finalizer](it.typ.finalizer))(it+!sizeof(ObjHeader))
+    it = it.nextFinal
+
+proc runFinalizers(c: Chunk; newbump: pointer) =
+  var it = c.head
+  var prev: ptr ObjHeader = nil
+  while it != nil:
+    let nxt = it.nextFinal
+    if it >= newbump:
+      if it.typ != nil and it.typ.finalizer != nil:
+        (cast[Finalizer](it.typ.finalizer))(it+!sizeof(ObjHeader))
+    elif prev != nil:
+      prev.nextFinal = nil
+    prev = it
+    it = nxt
+
+proc dealloc(r: var MemRegion; p: pointer; size: int) =
+  let it = cast[ptr ObjHeader](p-!sizeof(ObjHeader))
+  if it.typ != nil and it.typ.finalizer != nil:
+    (cast[Finalizer](it.typ.finalizer))(p)
+  it.typ = nil
+  # it is beneficial to not use the free lists here:
+  if r.bump -! size == p:
+    dec r.bump, size
+  when false:
+    if size <= MaxSmallObject:
+      let it = cast[FreeEntry](p)
+      it.next = r.freeLists[size div MemAlign]
+      r.freeLists[size div MemAlign] = it
+    else:
+      let it = cast[SizedFreeEntry](p)
+      it.size = size
+      it.next = r.holes
+      r.holes = it
+
+proc deallocAll(r: var MemRegion; head: Chunk) =
+  var it = head
+  while it != nil:
+    let nxt = it.next
+    runFinalizers(it)
+    dec r.totalSize, it.size
+    osDeallocPages(it, it.size)
+    it = nxt
+
+proc deallocAll*(r: var MemRegion) =
+  deallocAll(r, r.head)
+  zeroMem(addr r, sizeof r)
+
+proc obstackPtr*(r: MemRegion): StackPtr =
+  result.bump = r.bump
+  result.remaining = r.remaining
+  result.current = r.tail
+
+template computeRemaining(r): untyped =
+  r.tail.size -% (cast[int](r.bump) -% cast[int](r.tail))
+
+proc setObstackPtr*(r: var MemRegion; sp: StackPtr) =
+  # free everything after 'sp':
+  if sp.current != nil and sp.current.next != nil:
+    deallocAll(r, sp.current.next)
+    sp.current.next = nil
+    when false:
+      # better leak this memory than be sorry:
+      for i in 0..high(r.freeLists): r.freeLists[i] = nil
+      r.holes = nil
+  if r.tail != nil: runFinalizers(r.tail, sp.bump)
+
+  r.bump = sp.bump
+  r.tail = sp.current
+  r.remaining = sp.remaining
+
+proc obstackPtr*(): StackPtr = tlRegion.obstackPtr()
+proc setObstackPtr*(sp: StackPtr) = tlRegion.setObstackPtr(sp)
+proc deallocAll*() = tlRegion.deallocAll()
+
+proc deallocOsPages(r: var MemRegion) = r.deallocAll()
+
+when false:
+  let obs = obstackPtr()
+  try:
+    body
+  finally:
+    setObstackPtr(obs)
+
+template withScratchRegion*(body: untyped) =
+  let oldRegion = tlRegion
+  tlRegion = MemRegion()
+  try:
+    body
+  finally:
+    deallocAll()
+    tlRegion = oldRegion
+
+when false:
+  proc joinRegion*(dest: var MemRegion; src: MemRegion) =
+    # merging is not hard.
+    if dest.head.isNil:
+      dest.head = src.head
+    else:
+      dest.tail.next = src.head
+    dest.tail = src.tail
+    dest.bump = src.bump
+    dest.remaining = src.remaining
+    dest.nextChunkSize = max(dest.nextChunkSize, src.nextChunkSize)
+    inc dest.totalSize, src.totalSize
+
+proc isOnHeap*(r: MemRegion; p: pointer): bool =
+  # the tail chunk is the largest, so check it first. It's also special
+  # in that contains the current bump pointer:
+  if r.tail >= p and p < r.bump:
+    return true
+  var it = r.head
+  while it != r.tail:
+    if it >= p and p <= it+!it.size: return true
+    it = it.next
+
+proc rawNewObj(r: var MemRegion, typ: PNimType, size: int): pointer =
+  var res = cast[ptr ObjHeader](allocFast(r, size + sizeof(ObjHeader)))
+  res.typ = typ
+  if typ.finalizer != nil:
+    res.nextFinal = r.head.head
+    r.head.head = res
+  result = res +! sizeof(ObjHeader)
+
+proc rawNewSeq(r: var MemRegion, typ: PNimType, size: int): pointer =
+  var res = cast[ptr SeqHeader](allocFast(r, size + sizeof(SeqHeader)))
+  res.typ = typ
+  res.region = addr(r)
+  result = res +! sizeof(SeqHeader)
+
+proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  sysAssert typ.kind notin {tySequence, tyString}, "newObj cannot be used to construct seqs"
+  result = rawNewObj(tlRegion, typ, size)
+  zeroMem(result, size)
+  when defined(memProfiler): nimProfile(size)
+
+proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  sysAssert typ.kind notin {tySequence, tyString}, "newObj cannot be used to construct seqs"
+  result = rawNewObj(tlRegion, typ, size)
+  when defined(memProfiler): nimProfile(size)
+
+{.push overflowChecks: on.}
+proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
+  let size = roundup(align(GenericSeqSize, typ.base.align) + len * typ.base.size, MemAlign)
+  result = rawNewSeq(tlRegion, typ, size)
+  zeroMem(result, size)
+  cast[PGenericSeq](result).len = len
+  cast[PGenericSeq](result).reserved = len
+
+proc newStr(typ: PNimType, len: int; init: bool): pointer {.compilerRtl.} =
+  let size = roundup(len + GenericSeqSize, MemAlign)
+  result = rawNewSeq(tlRegion, typ, size)
+  if init: zeroMem(result, size)
+  cast[PGenericSeq](result).len = 0
+  cast[PGenericSeq](result).reserved = len
+{.pop.}
+
+proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  result = rawNewObj(tlRegion, typ, size)
+  zeroMem(result, size)
+
+proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
+  result = newSeq(typ, len)
+
+proc growObj(regionUnused: var MemRegion; old: pointer, newsize: int): pointer =
+  let sh = cast[ptr SeqHeader](old -! sizeof(SeqHeader))
+  let typ = sh.typ
+  result = rawNewSeq(sh.region[], typ,
+                     roundup(newsize, MemAlign))
+  let elemSize = if typ.kind == tyString: 1 else: typ.base.size
+  let elemAlign = if typ.kind == tyString: 1 else: typ.base.align
+  let oldsize = align(GenericSeqSize, elemAlign) + cast[PGenericSeq](old).len*elemSize
+  zeroMem(result +! oldsize, newsize-oldsize)
+  copyMem(result, old, oldsize)
+  dealloc(sh.region[], old, roundup(oldsize, MemAlign))
+
+proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
+  result = growObj(tlRegion, old, newsize)
+
+proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline,
+  deprecated: "old compiler compat".} = asgnRef(dest, src)
+
+proc allocImpl(size: Natural): pointer =
+  result = c_malloc(cast[csize_t](size))
+  if result == nil: raiseOutOfMem()
+proc alloc0Impl(size: Natural): pointer =
+  result = alloc(size)
+  zeroMem(result, size)
+proc reallocImpl(p: pointer, newsize: Natural): pointer =
+  result = c_realloc(p, cast[csize_t](newsize))
+  if result == nil: raiseOutOfMem()
+proc realloc0Impl(p: pointer, oldsize, newsize: Natural): pointer =
+  result = c_realloc(p, cast[csize_t](newsize))
+  if result == nil: raiseOutOfMem()
+  if newsize > oldsize:
+    zeroMem(cast[pointer](cast[int](result) + oldsize), newsize - oldsize)
+proc deallocImpl(p: pointer) = c_free(p)
+
+proc alloc0(r: var MemRegion; size: Natural): pointer =
+  # ignore the region. That is correct for the channels module
+  # but incorrect in general. XXX
+  result = alloc0(size)
+
+proc alloc(r: var MemRegion; size: Natural): pointer =
+  # ignore the region. That is correct for the channels module
+  # but incorrect in general. XXX
+  result = alloc(size)
+
+proc dealloc(r: var MemRegion; p: pointer) = dealloc(p)
+
+proc allocSharedImpl(size: Natural): pointer =
+  result = c_malloc(cast[csize_t](size))
+  if result == nil: raiseOutOfMem()
+proc allocShared0Impl(size: Natural): pointer =
+  result = alloc(size)
+  zeroMem(result, size)
+proc reallocSharedImpl(p: pointer, newsize: Natural): pointer =
+  result = c_realloc(p, cast[csize_t](newsize))
+  if result == nil: raiseOutOfMem()
+proc reallocShared0Impl(p: pointer, oldsize, newsize: Natural): pointer =
+  result = c_realloc(p, cast[csize_t](newsize))
+  if result == nil: raiseOutOfMem()
+  if newsize > oldsize:
+    zeroMem(cast[pointer](cast[int](result) + oldsize), newsize - oldsize)
+proc deallocSharedImpl(p: pointer) = c_free(p)
+
+when hasThreadSupport:
+  proc getFreeSharedMem(): int = 0
+  proc getTotalSharedMem(): int = 0
+  proc getOccupiedSharedMem(): int = 0
+
+proc GC_disable() = discard
+proc GC_enable() = discard
+proc GC_fullCollect() = discard
+proc GC_setStrategy(strategy: GC_Strategy) = discard
+proc GC_enableMarkAndSweep() = discard
+proc GC_disableMarkAndSweep() = discard
+proc GC_getStatistics(): string = return ""
+
+proc getOccupiedMem(): int =
+  result = tlRegion.totalSize - tlRegion.remaining
+proc getFreeMem(): int = tlRegion.remaining
+proc getTotalMem(): int =
+  result = tlRegion.totalSize
+
+proc getOccupiedMem*(r: MemRegion): int =
+  result = r.totalSize - r.remaining
+proc getFreeMem*(r: MemRegion): int = r.remaining
+proc getTotalMem*(r: MemRegion): int =
+  result = r.totalSize
+
+proc nimGC_setStackBottom(theStackBottom: pointer) = discard
+
+proc nimGCref(x: pointer) {.compilerproc.} = discard
+proc nimGCunref(x: pointer) {.compilerproc.} = discard
diff --git a/lib/system/hti.nim b/lib/system/hti.nim
index a2d132dbf..a26aff982 100755..100644
--- a/lib/system/hti.nim
+++ b/lib/system/hti.nim
@@ -1,20 +1,14 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2012 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
 #
 
-when defined(NimString): 
-  # we are in system module:
-  {.pragma: codegenType, compilerproc.}
-else:
-  {.pragma: codegenType.}
-
-type 
-  # This should be he same as ast.TTypeKind
+type
+  # This should be the same as ast.TTypeKind
   # many enum fields are not used at runtime
   TNimKind = enum
     tyNone,
@@ -23,10 +17,10 @@ type
     tyEmpty,
     tyArrayConstr,
     tyNil,
-    tyExpr,
-    tyStmt,
+    tyUntyped,
+    tyTyped,
     tyTypeDesc,
-    tyGenericInvokation, # ``T[a, b]`` for types to invoke
+    tyGenericInvocation, # ``T[a, b]`` for types to invoke
     tyGenericBody,       # ``T[a, b, body]`` last parameter is the body
     tyGenericInst,       # ``T[a, b, realInstance]`` instantiated generic type
     tyGenericParam,      # ``a`` in the example
@@ -46,7 +40,7 @@ type
     tyPointer,
     tyOpenArray,
     tyString,
-    tyCString,
+    tyCstring,
     tyForward,
     tyInt,
     tyInt8,
@@ -62,30 +56,68 @@ type
     tyUInt16,
     tyUInt32,
     tyUInt64,
-    tyBigNum,
+    tyOwned, tyUnused1, tyUnused2,
+    tyVarargsHidden,
+    tyUncheckedArray,
+    tyErrorHidden,
+    tyBuiltInTypeClassHidden,
+    tyUserTypeClassHidden,
+    tyUserTypeClassInstHidden,
+    tyCompositeTypeClassHidden,
+    tyInferredHidden,
+    tyAndHidden, tyOrHidden, tyNotHidden,
+    tyAnythingHidden,
+    tyStaticHidden,
+    tyFromExprHidden,
+    tyOptDeprecated,
+    tyVoidHidden
 
   TNimNodeKind = enum nkNone, nkSlot, nkList, nkCase
-  TNimNode {.codegenType, final.} = object
+  TNimNode {.compilerproc.} = object
     kind: TNimNodeKind
     offset: int
     typ: ptr TNimType
-    name: Cstring
+    name: cstring
     len: int
-    sons: ptr array [0..0x7fff, ptr TNimNode]
+    sons: ptr array[0x7fff, ptr TNimNode]
 
-  TNimTypeFlag = enum 
+  TNimTypeFlag = enum
     ntfNoRefs = 0,     # type contains no tyRef, tySequence, tyString
     ntfAcyclic = 1,    # type cannot form a cycle
     ntfEnumHole = 2    # enum has holes and thus `$` for them needs the slow
                        # version
-  TNimType {.codegenType, final.} = object
-    size: int
+  TNimType {.compilerproc.} = object
+    when defined(gcHooks):
+      head*: pointer
+    size*: int
+    align*: int
     kind: TNimKind
     flags: set[TNimTypeFlag]
-    base: ptr TNimType
+    base*: ptr TNimType
     node: ptr TNimNode # valid for tyRecord, tyObject, tyTuple, tyEnum
-    finalizer: pointer # the finalizer for the type
-    marker: proc (p: pointer, op: int) {.nimcall.} # marker proc for GC
-  PNimType = ptr TNimType
-  
+    finalizer*: pointer # the finalizer for the type
+    marker*: proc (p: pointer, op: int) {.nimcall, benign, tags: [], raises: [].} # marker proc for GC
+    deepcopy: proc (p: pointer): pointer {.nimcall, benign, tags: [], raises: [].}
+    when defined(nimSeqsV2):
+      typeInfoV2*: pointer
+    when defined(nimTypeNames):
+      name: cstring
+      nextType: ptr TNimType
+      instances: int # count the number of instances
+      sizes: int # sizes of all instances in bytes
+
+when defined(gcHooks):
+  type
+    PNimType* = ptr TNimType
+else:
+  type
+    PNimType = ptr TNimType
+
+when defined(nimTypeNames):
+  # Declare this variable only once in system.nim
+  when declared(ThisIsSystem):
+    var nimTypeRoot {.compilerproc.}: PNimType
+  else:
+    var nimTypeRoot {.importc.}: PNimType
+
 # node.len may be the ``first`` element of a set
diff --git a/lib/system/inclrtl.nim b/lib/system/inclrtl.nim
index d9dfd3aa2..3bf0b9893 100755..100644
--- a/lib/system/inclrtl.nim
+++ b/lib/system/inclrtl.nim
@@ -1,7 +1,7 @@
 #
 #
-#            Nimrod's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -16,29 +16,35 @@
 #    -> defined(useNimRtl) or appType == "lib" and not defined(createNimRtl)
 # 3) Exported into nimrtl.
 #    -> appType == "lib" and defined(createNimRtl)
+when not defined(nimNewShared):
+  {.pragma: gcsafe.}
 
 when defined(createNimRtl):
-  when defined(useNimRtl): 
+  when defined(useNimRtl):
     {.error: "Cannot create and use nimrtl at the same time!".}
   elif appType != "lib":
     {.error: "nimrtl must be built as a library!".}
 
-when defined(createNimRtl): 
-  {.pragma: rtl, exportc: "nimrtl_$1", dynlib.}
+when defined(createNimRtl):
+  {.pragma: rtl, exportc: "nimrtl_$1", dynlib, gcsafe.}
   {.pragma: inl.}
   {.pragma: compilerRtl, compilerproc, exportc: "nimrtl_$1", dynlib.}
 elif defined(useNimRtl):
-  when hostOS == "windows": 
-    const nimrtl* = "nimrtl.dll"
-  elif hostOS == "macosx":
-    const nimrtl* = "nimrtl.dylib"
-  else: 
-    const nimrtl* = "libnimrtl.so"
-  {.pragma: rtl, importc: "nimrtl_$1", dynlib: nimrtl.}
+  #[
+  `{.rtl.}` should only be used for non-generic procs.
+  ]#
+  const nimrtl* =
+    when defined(windows): "nimrtl.dll"
+    elif defined(macosx): "libnimrtl.dylib"
+    else: "libnimrtl.so"
+  {.pragma: rtl, importc: "nimrtl_$1", dynlib: nimrtl, gcsafe.}
   {.pragma: inl.}
   {.pragma: compilerRtl, compilerproc, importc: "nimrtl_$1", dynlib: nimrtl.}
 else:
-  {.pragma: rtl.}
+  {.pragma: rtl, gcsafe.}
   {.pragma: inl, inline.}
   {.pragma: compilerRtl, compilerproc.}
 
+{.pragma: benign, gcsafe.}
+
+{.push sinkInference: on.}
diff --git a/lib/system/indexerrors.nim b/lib/system/indexerrors.nim
new file mode 100644
index 000000000..6a8cb8a0a
--- /dev/null
+++ b/lib/system/indexerrors.nim
@@ -0,0 +1,15 @@
+# imported by other modules, unlike helpers.nim which is included
+# xxx this is now included instead of imported, we should import instead
+
+template formatErrorIndexBound*[T](i, a, b: T): string =
+  when defined(standalone):
+    "indexOutOfBounds"
+  else:
+    if b < a: "index out of bounds, the container is empty"
+    else: "index " & $i & " not in " & $a & " .. " & $b
+
+template formatErrorIndexBound*[T](i, n: T): string =
+  formatErrorIndexBound(i, 0, n)
+
+template formatFieldDefect*(f, discVal): string =
+  f & discVal & "'"
diff --git a/lib/system/indices.nim b/lib/system/indices.nim
new file mode 100644
index 000000000..f2bad2528
--- /dev/null
+++ b/lib/system/indices.nim
@@ -0,0 +1,164 @@
+when not defined(nimHasSystemRaisesDefect):
+  {.pragma: systemRaisesDefect.}
+
+type
+  BackwardsIndex* = distinct int ## Type that is constructed by `^` for
+                                 ## reversed array accesses.
+                                 ## (See `^ template <#^.t,int>`_)
+
+template `^`*(x: int): BackwardsIndex = BackwardsIndex(x)
+  ## Builtin `roof`:idx: operator that can be used for convenient array access.
+  ## `a[^x]` is a shortcut for `a[a.len-x]`.
+  ##
+  ##   ```nim
+  ##   let
+  ##     a = [1, 3, 5, 7, 9]
+  ##     b = "abcdefgh"
+  ##
+  ##   echo a[^1] # => 9
+  ##   echo b[^2] # => g
+  ##   ```
+
+proc `[]`*[T](s: openArray[T]; i: BackwardsIndex): T {.inline, systemRaisesDefect.} =
+  system.`[]`(s, s.len - int(i))
+
+proc `[]`*[Idx, T](a: array[Idx, T]; i: BackwardsIndex): T {.inline, systemRaisesDefect.} =
+  a[Idx(a.len - int(i) + int low(a))]
+proc `[]`*(s: string; i: BackwardsIndex): char {.inline, systemRaisesDefect.} = s[s.len - int(i)]
+
+proc `[]`*[T](s: var openArray[T]; i: BackwardsIndex): var T {.inline, systemRaisesDefect.} =
+  system.`[]`(s, s.len - int(i))
+proc `[]`*[Idx, T](a: var array[Idx, T]; i: BackwardsIndex): var T {.inline, systemRaisesDefect.} =
+  a[Idx(a.len - int(i) + int low(a))]
+proc `[]`*(s: var string; i: BackwardsIndex): var char {.inline, systemRaisesDefect.} = s[s.len - int(i)]
+
+proc `[]=`*[T](s: var openArray[T]; i: BackwardsIndex; x: T) {.inline, systemRaisesDefect.} =
+  system.`[]=`(s, s.len - int(i), x)
+proc `[]=`*[Idx, T](a: var array[Idx, T]; i: BackwardsIndex; x: T) {.inline, systemRaisesDefect.} =
+  a[Idx(a.len - int(i) + int low(a))] = x
+proc `[]=`*(s: var string; i: BackwardsIndex; x: char) {.inline, systemRaisesDefect.} =
+  s[s.len - int(i)] = x
+
+template `..^`*(a, b: untyped): untyped =
+  ## A shortcut for `.. ^` to avoid the common gotcha that a space between
+  ## '..' and '^' is required.
+  a .. ^b
+
+template `..<`*(a, b: untyped): untyped =
+  ## A shortcut for `a .. pred(b)`.
+  ##   ```nim
+  ##   for i in 5 ..< 9:
+  ##     echo i # => 5; 6; 7; 8
+  ##   ```
+  a .. (when b is BackwardsIndex: succ(b) else: pred(b))
+
+template `[]`*(s: string; i: int): char = arrGet(s, i)
+template `[]=`*(s: string; i: int; val: char) = arrPut(s, i, val)
+
+template `^^`(s, i: untyped): untyped =
+  (when i is BackwardsIndex: s.len - int(i) else: int(i))
+
+template spliceImpl(s, a, L, b: typed): untyped =
+  # make room for additional elements or cut:
+  var shift = b.len - max(0,L)  # ignore negative slice size
+  var newLen = s.len + shift
+  if shift > 0:
+    # enlarge:
+    setLen(s, newLen)
+    for i in countdown(newLen-1, a+b.len): movingCopy(s[i], s[i-shift])
+  else:
+    for i in countup(a+b.len, newLen-1): movingCopy(s[i], s[i-shift])
+    # cut down:
+    setLen(s, newLen)
+  # fill the hole:
+  for i in 0 ..< b.len: s[a+i] = b[i]
+
+proc `[]`*[T, U: Ordinal](s: string, x: HSlice[T, U]): string {.inline, systemRaisesDefect.} =
+  ## Slice operation for strings.
+  ## Returns the inclusive range `[s[x.a], s[x.b]]`:
+  ##   ```nim
+  ##   var s = "abcdef"
+  ##   assert s[1..3] == "bcd"
+  ##   ```
+  let a = s ^^ x.a
+  let L = (s ^^ x.b) - a + 1
+  result = newString(L)
+  for i in 0 ..< L: result[i] = s[i + a]
+
+proc `[]=`*[T, U: Ordinal](s: var string, x: HSlice[T, U], b: string) {.systemRaisesDefect.} =
+  ## Slice assignment for strings.
+  ##
+  ## If `b.len` is not exactly the number of elements that are referred to
+  ## by `x`, a `splice`:idx: is performed:
+  ##
+  runnableExamples:
+    var s = "abcdefgh"
+    s[1 .. ^2] = "xyz"
+    assert s == "axyzh"
+
+  var a = s ^^ x.a
+  var L = (s ^^ x.b) - a + 1
+  if L == b.len:
+    for i in 0..<L: s[i+a] = b[i]
+  else:
+    spliceImpl(s, a, L, b)
+
+proc `[]`*[Idx, T; U, V: Ordinal](a: array[Idx, T], x: HSlice[U, V]): seq[T] {.systemRaisesDefect.} =
+  ## Slice operation for arrays.
+  ## Returns the inclusive range `[a[x.a], a[x.b]]`:
+  ##   ```nim
+  ##   var a = [1, 2, 3, 4]
+  ##   assert a[0..2] == @[1, 2, 3]
+  ##   ```
+  ##
+  ## See also:
+  ## * `toOpenArray(array[I, T];I,I) <#toOpenArray,array[I,T],I,I>`_
+  let xa = a ^^ x.a
+  let L = (a ^^ x.b) - xa + 1
+  result = newSeq[T](L)
+  for i in 0..<L: result[i] = a[Idx(i + xa)]
+
+proc `[]=`*[Idx, T; U, V: Ordinal](a: var array[Idx, T], x: HSlice[U, V], b: openArray[T]) {.systemRaisesDefect.} =
+  ## Slice assignment for arrays.
+  ##   ```nim
+  ##   var a = [10, 20, 30, 40, 50]
+  ##   a[1..2] = @[99, 88]
+  ##   assert a == [10, 99, 88, 40, 50]
+  ##   ```
+  let xa = a ^^ x.a
+  let L = (a ^^ x.b) - xa + 1
+  if L == b.len:
+    for i in 0..<L: a[Idx(i + xa)] = b[i]
+  else:
+    sysFatal(RangeDefect, "different lengths for slice assignment")
+
+proc `[]`*[T; U, V: Ordinal](s: openArray[T], x: HSlice[U, V]): seq[T] {.systemRaisesDefect.} =
+  ## Slice operation for sequences.
+  ## Returns the inclusive range `[s[x.a], s[x.b]]`:
+  ##   ```nim
+  ##   var s = @[1, 2, 3, 4]
+  ##   assert s[0..2] == @[1, 2, 3]
+  ##   ```
+  ##
+  ## See also:
+  ## * `toOpenArray(openArray[T];int,int) <#toOpenArray,openArray[T],int,int>`_
+  let a = s ^^ x.a
+  let L = (s ^^ x.b) - a + 1
+  newSeq(result, L)
+  for i in 0 ..< L: result[i] = s[i + a]
+
+proc `[]=`*[T; U, V: Ordinal](s: var seq[T], x: HSlice[U, V], b: openArray[T]) {.systemRaisesDefect.} =
+  ## Slice assignment for sequences.
+  ##
+  ## If `b.len` is not exactly the number of elements that are referred to
+  ## by `x`, a `splice`:idx: is performed.
+  runnableExamples:
+    var s = @"abcdefgh"
+    s[1 .. ^2] = @"xyz"
+    assert s == @"axyzh"
+  let a = s ^^ x.a
+  let L = (s ^^ x.b) - a + 1
+  if L == b.len:
+    for i in 0 ..< L: s[i+a] = b[i]
+  else:
+    spliceImpl(s, a, L, b)
diff --git a/lib/system/integerops.nim b/lib/system/integerops.nim
new file mode 100644
index 000000000..4ef3594f1
--- /dev/null
+++ b/lib/system/integerops.nim
@@ -0,0 +1,132 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2020 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Integer arithmetic with overflow checking. Uses
+# intrinsics or inline assembler.
+
+proc raiseOverflow {.compilerproc, noinline.} =
+  # a single proc to reduce code size to a minimum
+  sysFatal(OverflowDefect, "over- or underflow")
+
+proc raiseDivByZero {.compilerproc, noinline.} =
+  sysFatal(DivByZeroDefect, "division by zero")
+
+{.pragma: nimbaseH, importc, nodecl, noSideEffect, compilerproc.}
+
+when not defined(nimEmulateOverflowChecks):
+  # take the #define from nimbase.h
+
+  proc nimAddInt(a, b: int, res: ptr int): bool {.nimbaseH.}
+  proc nimSubInt(a, b: int, res: ptr int): bool {.nimbaseH.}
+  proc nimMulInt(a, b: int, res: ptr int): bool {.nimbaseH.}
+
+  proc nimAddInt64(a, b: int64; res: ptr int64): bool {.nimbaseH.}
+  proc nimSubInt64(a, b: int64; res: ptr int64): bool {.nimbaseH.}
+  proc nimMulInt64(a, b: int64; res: ptr int64): bool {.nimbaseH.}
+
+# unary minus and 'abs' not required here anymore and are directly handled
+# in the code generator.
+# 'nimModInt' does exist in nimbase.h without check as we moved the
+# check for 0 to the codgen.
+proc nimModInt(a, b: int; res: ptr int): bool {.nimbaseH.}
+
+proc nimModInt64(a, b: int64; res: ptr int64): bool {.nimbaseH.}
+
+# Platform independent versions.
+
+template addImplFallback(name, T, U) {.dirty.} =
+  when not declared(name):
+    proc name(a, b: T; res: ptr T): bool {.compilerproc, inline.} =
+      let r = cast[T](cast[U](a) + cast[U](b))
+      if (r xor a) >= T(0) or (r xor b) >= T(0):
+        res[] = r
+      else:
+        result = true
+
+addImplFallback(nimAddInt, int, uint)
+addImplFallback(nimAddInt64, int64, uint64)
+
+template subImplFallback(name, T, U) {.dirty.} =
+  when not declared(name):
+    proc name(a, b: T; res: ptr T): bool {.compilerproc, inline.} =
+      let r = cast[T](cast[U](a) - cast[U](b))
+      if (r xor a) >= 0 or (r xor not b) >= 0:
+        res[] = r
+      else:
+        result = true
+
+subImplFallback(nimSubInt, int, uint)
+subImplFallback(nimSubInt64, int64, uint64)
+
+template mulImplFallback(name, T, U, conv) {.dirty.} =
+  #
+  # This code has been inspired by Python's source code.
+  # The native int product x*y is either exactly right or *way* off, being
+  # just the last n bits of the true product, where n is the number of bits
+  # in an int (the delivered product is the true product plus i*2**n for
+  # some integer i).
+  #
+  # The native float64 product x*y is subject to three
+  # rounding errors: on a sizeof(int)==8 box, each cast to double can lose
+  # info, and even on a sizeof(int)==4 box, the multiplication can lose info.
+  # But, unlike the native int product, it's not in *range* trouble:  even
+  # if sizeof(int)==32 (256-bit ints), the product easily fits in the
+  # dynamic range of a float64. So the leading 50 (or so) bits of the float64
+  # product are correct.
+  #
+  # We check these two ways against each other, and declare victory if
+  # they're approximately the same. Else, because the native int product is
+  # the only one that can lose catastrophic amounts of information, it's the
+  # native int product that must have overflowed.
+  #
+  when not declared(name):
+    proc name(a, b: T; res: ptr T): bool {.compilerproc, inline.} =
+      let r = cast[T](cast[U](a) * cast[U](b))
+      let floatProd = conv(a) * conv(b)
+      let resAsFloat = conv(r)
+      # Fast path for normal case: small multiplicands, and no info
+      # is lost in either method.
+      if resAsFloat == floatProd:
+        res[] = r
+      else:
+        # Somebody somewhere lost info. Close enough, or way off? Note
+        # that a != 0 and b != 0 (else resAsFloat == floatProd == 0).
+        # The difference either is or isn't significant compared to the
+        # true value (of which floatProd is a good approximation).
+
+        # abs(diff)/abs(prod) <= 1/32 iff
+        #   32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough"
+        if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
+          res[] = r
+        else:
+          result = true
+
+mulImplFallback(nimMulInt, int, uint, toFloat)
+mulImplFallback(nimMulInt64, int64, uint64, toBiggestFloat)
+
+
+template divImplFallback(name, T) {.dirty.} =
+  proc name(a, b: T; res: ptr T): bool {.compilerproc, inline.} =
+    # we moved the b == 0 case out into the codegen.
+    if a == low(T) and b == T(-1):
+      result = true
+    else:
+      res[] = a div b
+
+divImplFallback(nimDivInt, int)
+divImplFallback(nimDivInt64, int64)
+
+proc raiseFloatInvalidOp {.compilerproc, noinline.} =
+  sysFatal(FloatInvalidOpDefect, "FPU operation caused a NaN result")
+
+proc raiseFloatOverflow(x: float64) {.compilerproc, noinline.} =
+  if x > 0.0:
+    sysFatal(FloatOverflowDefect, "FPU operation caused an overflow")
+  else:
+    sysFatal(FloatUnderflowDefect, "FPU operations caused an underflow")
diff --git a/lib/system/iterators.nim b/lib/system/iterators.nim
new file mode 100644
index 000000000..125bee98f
--- /dev/null
+++ b/lib/system/iterators.nim
@@ -0,0 +1,353 @@
+## Default iterators for some Nim types.
+
+when defined(nimPreviewSlimSystem):
+  import std/assertions
+
+when not defined(nimNoLentIterators):
+  template lent2(T): untyped = lent T
+else:
+  template lent2(T): untyped = T
+
+template unCheckedInc(x) =
+  {.push overflowChecks: off.}
+  inc(x)
+  {.pop.}
+
+iterator items*[T: not char](a: openArray[T]): lent2 T {.inline.} =
+  ## Iterates over each item of `a`.
+  var i = 0
+  while i < len(a):
+    yield a[i]
+    unCheckedInc(i)
+
+iterator items*[T: char](a: openArray[T]): T {.inline.} =
+  ## Iterates over each item of `a`.
+  # a VM bug currently prevents taking address of openArray[char]
+  # elements converted from a string (would fail in `tests/misc/thallo.nim`)
+  # in any case there's no performance advantage of returning char by address.
+  var i = 0
+  while i < len(a):
+    yield a[i]
+    unCheckedInc(i)
+
+iterator mitems*[T](a: var openArray[T]): var T {.inline.} =
+  ## Iterates over each item of `a` so that you can modify the yielded value.
+  var i = 0
+  while i < len(a):
+    yield a[i]
+    unCheckedInc(i)
+
+iterator items*[IX, T](a: array[IX, T]): T {.inline.} =
+  ## Iterates over each item of `a`.
+  when a.len > 0:
+    var i = low(IX)
+    while true:
+      yield a[i]
+      if i >= high(IX): break
+      unCheckedInc(i)
+
+iterator mitems*[IX, T](a: var array[IX, T]): var T {.inline.} =
+  ## Iterates over each item of `a` so that you can modify the yielded value.
+  when a.len > 0:
+    var i = low(IX)
+    while true:
+      yield a[i]
+      if i >= high(IX): break
+      unCheckedInc(i)
+
+iterator items*[T](a: set[T]): T {.inline.} =
+  ## Iterates over each element of `a`. `items` iterates only over the
+  ## elements that are really in the set (and not over the ones the set is
+  ## able to hold).
+  var i = low(T).int
+  while i <= high(T).int:
+    when T is enum and not defined(js):
+      if cast[T](i) in a: yield cast[T](i)
+    else:
+      if T(i) in a: yield T(i)
+    unCheckedInc(i)
+
+iterator items*(a: cstring): char {.inline.} =
+  ## Iterates over each item of `a`.
+  runnableExamples:
+    from std/sequtils import toSeq
+    assert toSeq("abc\0def".cstring) == @['a', 'b', 'c']
+    assert toSeq("abc".cstring) == @['a', 'b', 'c']
+  #[
+  assert toSeq(nil.cstring) == @[] # xxx fails with SIGSEGV
+  this fails with SIGSEGV; unclear whether we want to instead yield nothing
+  or pay a small price to check for `nil`, a benchmark is needed. Note that
+  other procs support `nil`.
+  ]#
+  template impl() =
+    var i = 0
+    let n = len(a)
+    while i < n:
+      yield a[i]
+      unCheckedInc(i)
+  when defined(js): impl()
+  else:
+    when nimvm:
+      # xxx `cstring` should behave like c backend instead.
+      impl()
+    else:
+      var i = 0
+      while a[i] != '\0':
+        yield a[i]
+        unCheckedInc(i)
+
+iterator mitems*(a: var cstring): var char {.inline.} =
+  ## Iterates over each item of `a` so that you can modify the yielded value.
+  # xxx this should give CT error in js RT.
+  runnableExamples:
+    from std/sugar import collect
+    var a = "abc\0def"
+    prepareMutation(a)
+    var b = a.cstring
+    let s = collect:
+      for bi in mitems(b):
+        if bi == 'b': bi = 'B'
+        bi
+    assert s == @['a', 'B', 'c']
+    assert b == "aBc"
+    assert a == "aBc\0def"
+
+  template impl() =
+    var i = 0
+    let n = len(a)
+    while i < n:
+      yield a[i]
+      unCheckedInc(i)
+  when defined(js): impl()
+  else:
+    when nimvm: impl()
+    else:
+      var i = 0
+      while a[i] != '\0':
+        yield a[i]
+        unCheckedInc(i)
+
+iterator items*[T: enum and Ordinal](E: typedesc[T]): T =
+  ## Iterates over the values of `E`.
+  ## See also `enumutils.items` for enums with holes.
+  runnableExamples:
+    type Goo = enum g0 = 2, g1, g2
+    from std/sequtils import toSeq
+    assert Goo.toSeq == [g0, g1, g2]
+  for v in low(E) .. high(E):
+    yield v
+
+iterator items*[T: Ordinal](s: Slice[T]): T =
+  ## Iterates over the slice `s`, yielding each value between `s.a` and `s.b`
+  ## (inclusively).
+  for x in s.a .. s.b:
+    yield x
+
+iterator pairs*[T](a: openArray[T]): tuple[key: int, val: T] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  var i = 0
+  while i < len(a):
+    yield (i, a[i])
+    unCheckedInc(i)
+
+iterator mpairs*[T](a: var openArray[T]): tuple[key: int, val: var T]{.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  ## `a[index]` can be modified.
+  var i = 0
+  while i < len(a):
+    yield (i, a[i])
+    unCheckedInc(i)
+
+iterator pairs*[IX, T](a: array[IX, T]): tuple[key: IX, val: T] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  when a.len > 0:
+    var i = low(IX)
+    while true:
+      yield (i, a[i])
+      if i >= high(IX): break
+      unCheckedInc(i)
+
+iterator mpairs*[IX, T](a: var array[IX, T]): tuple[key: IX, val: var T] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  ## `a[index]` can be modified.
+  when a.len > 0:
+    var i = low(IX)
+    while true:
+      yield (i, a[i])
+      if i >= high(IX): break
+      unCheckedInc(i)
+
+iterator pairs*[T](a: seq[T]): tuple[key: int, val: T] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  var i = 0
+  let L = len(a)
+  while i < L:
+    yield (i, a[i])
+    unCheckedInc(i)
+    assert(len(a) == L, "the length of the seq changed while iterating over it")
+
+iterator mpairs*[T](a: var seq[T]): tuple[key: int, val: var T] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  ## `a[index]` can be modified.
+  var i = 0
+  let L = len(a)
+  while i < L:
+    yield (i, a[i])
+    unCheckedInc(i)
+    assert(len(a) == L, "the length of the seq changed while iterating over it")
+
+iterator pairs*(a: string): tuple[key: int, val: char] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  var i = 0
+  let L = len(a)
+  while i < L:
+    yield (i, a[i])
+    unCheckedInc(i)
+    assert(len(a) == L, "the length of the string changed while iterating over it")
+
+iterator mpairs*(a: var string): tuple[key: int, val: var char] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  ## `a[index]` can be modified.
+  var i = 0
+  let L = len(a)
+  while i < L:
+    yield (i, a[i])
+    unCheckedInc(i)
+    assert(len(a) == L, "the length of the string changed while iterating over it")
+
+iterator pairs*(a: cstring): tuple[key: int, val: char] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  when defined(js):
+    var i = 0
+    var L = len(a)
+    while i < L:
+      yield (i, a[i])
+      unCheckedInc(i)
+  else:
+    var i = 0
+    while a[i] != '\0':
+      yield (i, a[i])
+      unCheckedInc(i)
+
+iterator mpairs*(a: var cstring): tuple[key: int, val: var char] {.inline.} =
+  ## Iterates over each item of `a`. Yields `(index, a[index])` pairs.
+  ## `a[index]` can be modified.
+  when defined(js):
+    var i = 0
+    var L = len(a)
+    while i < L:
+      yield (i, a[i])
+      unCheckedInc(i)
+  else:
+    var i = 0
+    while a[i] != '\0':
+      yield (i, a[i])
+      unCheckedInc(i)
+
+iterator items*[T](a: seq[T]): lent2 T {.inline.} =
+  ## Iterates over each item of `a`.
+  var i = 0
+  let L = len(a)
+  while i < L:
+    yield a[i]
+    unCheckedInc(i)
+    assert(len(a) == L, "the length of the seq changed while iterating over it")
+
+iterator mitems*[T](a: var seq[T]): var T {.inline.} =
+  ## Iterates over each item of `a` so that you can modify the yielded value.
+  var i = 0
+  let L = len(a)
+  while i < L:
+    yield a[i]
+    unCheckedInc(i)
+    assert(len(a) == L, "the length of the seq changed while iterating over it")
+
+iterator items*(a: string): char {.inline.} =
+  ## Iterates over each item of `a`.
+  var i = 0
+  let L = len(a)
+  while i < L:
+    yield a[i]
+    unCheckedInc(i)
+    assert(len(a) == L, "the length of the string changed while iterating over it")
+
+iterator mitems*(a: var string): var char {.inline.} =
+  ## Iterates over each item of `a` so that you can modify the yielded value.
+  var i = 0
+  let L = len(a)
+  while i < L:
+    yield a[i]
+    unCheckedInc(i)
+    assert(len(a) == L, "the length of the string changed while iterating over it")
+
+
+iterator fields*[T: tuple|object](x: T): RootObj {.
+  magic: "Fields", noSideEffect.} =
+  ## Iterates over every field of `x`.
+  ##
+  ## .. warning:: This really transforms the 'for' and unrolls the loop.
+  ##   The current implementation also has a bug
+  ##   that affects symbol binding in the loop body.
+  runnableExamples:
+    var t = (1, "foo")
+    for v in fields(t): v = default(typeof(v))
+    doAssert t == (0, "")
+
+iterator fields*[S:tuple|object, T:tuple|object](x: S, y: T): tuple[key: string, val: RootObj] {.
+  magic: "Fields", noSideEffect.} =
+  ## Iterates over every field of `x` and `y`.
+  ##
+  ## .. warning:: This really transforms the 'for' and unrolls the loop.
+  ##   The current implementation also has a bug that affects symbol binding
+  ##   in the loop body.
+  runnableExamples:
+    var t1 = (1, "foo")
+    var t2 = default(typeof(t1))
+    for v1, v2 in fields(t1, t2): v2 = v1
+    doAssert t1 == t2
+
+iterator fieldPairs*[T: tuple|object](x: T): tuple[key: string, val: RootObj] {.
+  magic: "FieldPairs", noSideEffect.} =
+  ## Iterates over every field of `x` returning their name and value.
+  ##
+  ## When you iterate over objects with different field types you have to use
+  ## the compile time `when` instead of a runtime `if` to select the code
+  ## you want to run for each type. To perform the comparison use the `is
+  ## operator <manual.html#generics-is-operator>`_.
+  ## Another way to do the same without `when` is to leave the task of
+  ## picking the appropriate code to a secondary proc which you overload for
+  ## each field type and pass the `value` to.
+  ##
+  ## .. warning:: This really transforms the 'for' and unrolls the loop. The
+  ##   current implementation also has a bug that affects symbol binding in the
+  ##   loop body.
+  runnableExamples:
+    type
+      Custom = object
+        foo: string
+        bar: bool
+    proc `$`(x: Custom): string =
+      result = "Custom:"
+      for name, value in x.fieldPairs:
+        when value is bool:
+          result.add("\n\t" & name & " is " & $value)
+        else:
+          result.add("\n\t" & name & " '" & value & "'")
+
+iterator fieldPairs*[S: tuple|object, T: tuple|object](x: S, y: T): tuple[
+  key: string, a, b: RootObj] {.
+  magic: "FieldPairs", noSideEffect.} =
+  ## Iterates over every field of `x` and `y`.
+  ##
+  ## .. warning:: This really transforms the 'for' and unrolls the loop.
+  ##   The current implementation also has a bug that affects symbol binding
+  ##   in the loop body.
+  runnableExamples:
+    type Foo = object
+      x1: int
+      x2: string
+    var a1 = Foo(x1: 12, x2: "abc")
+    var a2: Foo
+    for name, v1, v2 in fieldPairs(a1, a2):
+      when name == "x2": v2 = v1
+    doAssert a2 == Foo(x1: 0, x2: "abc")
diff --git a/lib/system/iterators_1.nim b/lib/system/iterators_1.nim
new file mode 100644
index 000000000..d00e3f823
--- /dev/null
+++ b/lib/system/iterators_1.nim
@@ -0,0 +1,180 @@
+when sizeof(int) <= 2:
+  type IntLikeForCount = int|int8|int16|char|bool|uint8|enum
+else:
+  type IntLikeForCount = int|int8|int16|int32|char|bool|uint8|uint16|enum
+
+iterator countdown*[T](a, b: T, step: Positive = 1): T {.inline.} =
+  ## Counts from ordinal value `a` down to `b` (inclusive) with the given
+  ## step count.
+  ##
+  ## `T` may be any ordinal type, `step` may only be positive.
+  ##
+  ## **Note**: This fails to count to `low(int)` if T = int for
+  ## efficiency reasons.
+  runnableExamples:
+    import std/sugar
+    let x = collect(newSeq):
+      for i in countdown(7, 3):
+        i
+
+    assert x == @[7, 6, 5, 4, 3]
+
+    let y = collect(newseq):
+      for i in countdown(9, 2, 3):
+        i
+    assert y == @[9, 6, 3]
+  when T is (uint|uint64):
+    var res = a
+    while res >= b:
+      yield res
+      if res == b: break
+      dec(res, step)
+  elif T is IntLikeForCount and T is Ordinal:
+    var res = int(a)
+    while res >= int(b):
+      when defined(nimHasCastExtendedVm):
+        yield cast[T](res)
+      else:
+        yield T(res)
+      dec(res, step)
+  else:
+    var res = a
+    while res >= b:
+      yield res
+      dec(res, step)
+
+iterator countup*[T](a, b: T, step: Positive = 1): T {.inline.} =
+  ## Counts from ordinal value `a` to `b` (inclusive) with the given
+  ## step count.
+  ##
+  ## `T` may be any ordinal type, `step` may only be positive.
+  ##
+  ## **Note**: This fails to count to `high(int)` if T = int for
+  ## efficiency reasons.
+  runnableExamples:
+    import std/sugar
+    let x = collect(newSeq):
+      for i in countup(3, 7):
+        i
+    
+    assert x == @[3, 4, 5, 6, 7]
+
+    let y = collect(newseq):
+      for i in countup(2, 9, 3):
+        i
+    assert y == @[2, 5, 8]
+  mixin inc
+  when T is IntLikeForCount and T is Ordinal:
+    var res = int(a)
+    while res <= int(b):
+      when defined(nimHasCastExtendedVm):
+        yield cast[T](res)
+      else:
+        yield T(res)
+      inc(res, step)
+  else:
+    var res = a
+    while res <= b:
+      yield res
+      inc(res, step)
+
+iterator `..`*[T](a, b: T): T {.inline.} =
+  ## An alias for `countup(a, b, 1)`.
+  ##
+  ## See also:
+  ## * [..<](#..<.i,T,T)
+  runnableExamples:
+    import std/sugar
+
+    let x = collect(newSeq):
+      for i in 3 .. 7:
+        i
+
+    assert x == @[3, 4, 5, 6, 7]
+  mixin inc
+  when T is IntLikeForCount and T is Ordinal:
+    var res = int(a)
+    while res <= int(b):
+      when defined(nimHasCastExtendedVm):
+        yield cast[T](res)
+      else:
+        yield T(res)
+      inc(res)
+  else:
+    var res = a
+    while res <= b:
+      yield res
+      inc(res)
+
+template dotdotImpl(t) {.dirty.} =
+  iterator `..`*(a, b: t): t {.inline.} =
+    ## A type specialized version of `..` for convenience so that
+    ## mixing integer types works better.
+    ##
+    ## See also:
+    ## * [..<](#..<.i,T,T)
+    var res = a
+    while res <= b:
+      yield res
+      inc(res)
+
+dotdotImpl(int64)
+dotdotImpl(int32)
+dotdotImpl(uint64)
+dotdotImpl(uint32)
+
+iterator `..<`*[T](a, b: T): T {.inline.} =
+  mixin inc
+  var i = a
+  while i < b:
+    yield i
+    inc i
+
+template dotdotLessImpl(t) {.dirty.} =
+  iterator `..<`*(a, b: t): t {.inline.} =
+    ## A type specialized version of `..<` for convenience so that
+    ## mixing integer types works better.
+    var res = a
+    while res < b:
+      yield res
+      inc(res)
+
+dotdotLessImpl(int64)
+dotdotLessImpl(int32)
+dotdotLessImpl(uint64)
+dotdotLessImpl(uint32)
+
+iterator `||`*[S, T](a: S, b: T, annotation: static string = "parallel for"): T {.
+  inline, magic: "OmpParFor", sideEffect.} =
+  ## OpenMP parallel loop iterator. Same as `..` but the loop may run in parallel.
+  ##
+  ## `annotation` is an additional annotation for the code generator to use.
+  ## The default annotation is `parallel for`.
+  ## Please refer to the `OpenMP Syntax Reference
+  ## <https://www.openmp.org/wp-content/uploads/OpenMP-4.5-1115-CPP-web.pdf>`_
+  ## for further information.
+  ##
+  ## Note that the compiler maps that to
+  ## the `#pragma omp parallel for` construct of `OpenMP`:idx: and as
+  ## such isn't aware of the parallelism in your code! Be careful! Later
+  ## versions of `||` will get proper support by Nim's code generator
+  ## and GC.
+  discard
+
+iterator `||`*[S, T](a: S, b: T, step: Positive, annotation: static string = "parallel for"): T {.
+  inline, magic: "OmpParFor", sideEffect.} =
+  ## OpenMP parallel loop iterator with stepping.
+  ## Same as `countup` but the loop may run in parallel.
+  ##
+  ## `annotation` is an additional annotation for the code generator to use.
+  ## The default annotation is `parallel for`.
+  ## Please refer to the `OpenMP Syntax Reference
+  ## <https://www.openmp.org/wp-content/uploads/OpenMP-4.5-1115-CPP-web.pdf>`_
+  ## for further information.
+  ##
+  ## Note that the compiler maps that to
+  ## the `#pragma omp parallel for` construct of `OpenMP`:idx: and as
+  ## such isn't aware of the parallelism in your code! Be careful! Later
+  ## versions of `||` will get proper support by Nim's code generator
+  ## and GC.
+  discard
diff --git a/lib/system/jssys.nim b/lib/system/jssys.nim
index 789e39d6d..5599240fd 100755..100644
--- a/lib/system/jssys.nim
+++ b/lib/system/jssys.nim
@@ -1,37 +1,43 @@
 #
 #
-#            Nimrod's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
 #
 
-when defined(nodejs):
-  proc alert*(s: cstring) {.importc: "console.log", nodecl.}
-else:
-  proc alert*(s: cstring) {.importc, nodecl.}
+include system/indexerrors
+import std/private/miscdollars
 
-proc log*(s: cstring) {.importc: "console.log", nodecl.}
+proc log*(s: cstring) {.importc: "console.log", varargs, nodecl.}
 
 type
-  PSafePoint = ptr TSafePoint
-  TSafePoint {.compilerproc, final.} = object
+  PSafePoint = ptr SafePoint
+  SafePoint {.compilerproc, final.} = object
     prev: PSafePoint # points to next safe point
-    exc: ref E_Base
+    exc: ref Exception
 
-  PCallFrame = ptr TCallFrame
-  TCallFrame {.importc, nodecl, final.} = object
+  PCallFrame = ptr CallFrame
+  CallFrame {.importc, nodecl, final.} = object
     prev: PCallFrame
-    procname: CString
+    procname: cstring
     line: int # current line number
-    filename: CString
+    filename: cstring
+
+  PJSError = ref object
+    columnNumber {.importc.}: int
+    fileName {.importc.}: cstring
+    lineNumber {.importc.}: int
+    message {.importc.}: cstring
+    stack {.importc.}: cstring
+
+  JSRef = ref RootObj # Fake type.
 
 var
   framePtr {.importc, nodecl, volatile.}: PCallFrame
-  excHandler {.importc, nodecl, volatile.}: PSafePoint = nil
-    # list of exception handlers
-    # a global variable for the root of all try blocks
+  excHandler {.importc, nodecl, volatile.}: int = 0
+  lastJSError {.importc, nodecl, volatile.}: PJSError = nil
 
 {.push stacktrace: off, profiler:off.}
 proc nimBoolToStr(x: bool): string {.compilerproc.} =
@@ -42,21 +48,46 @@ proc nimCharToStr(x: char): string {.compilerproc.} =
   result = newString(1)
   result[0] = x
 
+proc isNimException(): bool {.asmNoStackFrame.} =
+  {.emit: "return `lastJSError` && `lastJSError`.m_type;".}
+
+proc getCurrentException*(): ref Exception {.compilerRtl, benign.} =
+  if isNimException(): result = cast[ref Exception](lastJSError)
+
 proc getCurrentExceptionMsg*(): string =
-  if excHandler != nil: return $excHandler.exc.msg
+  if lastJSError != nil:
+    if isNimException():
+      return cast[Exception](lastJSError).msg
+    else:
+      var msg: cstring
+      {.emit: """
+      if (`lastJSError`.message !== undefined) {
+        `msg` = `lastJSError`.message;
+      }
+      """.}
+      if not msg.isNil:
+        return $msg
   return ""
 
+proc setCurrentException*(exc: ref Exception) =
+  lastJSError = cast[PJSError](exc)
+
+proc closureIterSetupExc(e: ref Exception) {.compilerproc, inline.} =
+  ## Used to set up exception handling for closure iterators
+  setCurrentException(e)
+
 proc auxWriteStackTrace(f: PCallFrame): string =
   type
-    TTempFrame = tuple[procname: CString, line: int]
+    TempFrame = tuple[procname: cstring, line: int, filename: cstring]
   var
     it = f
     i = 0
     total = 0
-    tempFrames: array [0..63, TTempFrame]
+    tempFrames: array[0..63, TempFrame]
   while it != nil and i <= high(tempFrames):
     tempFrames[i].procname = it.procname
     tempFrames[i].line = it.line
+    tempFrames[i].filename = it.filename
     inc(i)
     inc(total)
     it = it.prev
@@ -70,63 +101,86 @@ proc auxWriteStackTrace(f: PCallFrame): string =
     add(result, $(total-i))
     add(result, " calls omitted) ...\n")
   for j in countdown(i-1, 0):
+    result.toLocation($tempFrames[j].filename, tempFrames[j].line, 0)
+    add(result, " at ")
     add(result, tempFrames[j].procname)
-    if tempFrames[j].line > 0:
-      add(result, ", line: ")
-      add(result, $tempFrames[j].line)
     add(result, "\n")
 
 proc rawWriteStackTrace(): string =
-  if framePtr == nil:
+  if framePtr != nil:
+    result = "Traceback (most recent call last)\n" & auxWriteStackTrace(framePtr)
+  else:
     result = "No stack traceback available\n"
+
+proc writeStackTrace() =
+  var trace = rawWriteStackTrace()
+  trace.setLen(trace.len - 1)
+  echo trace
+
+proc getStackTrace*(): string = rawWriteStackTrace()
+proc getStackTrace*(e: ref Exception): string = e.trace
+
+proc unhandledException(e: ref Exception) {.
+    compilerproc, asmNoStackFrame.} =
+  var buf = ""
+  if e.msg.len != 0:
+    add(buf, "Error: unhandled exception: ")
+    add(buf, e.msg)
   else:
-    result = "Traceback (most recent call last)\n"& auxWriteStackTrace(framePtr)
+    add(buf, "Error: unhandled exception")
+  add(buf, " [")
+  add(buf, e.name)
+  add(buf, "]\n")
+  when NimStackTrace:
+    add(buf, rawWriteStackTrace())
+  let cbuf = cstring(buf)
+  when NimStackTrace:
     framePtr = nil
-
-proc raiseException(e: ref E_Base, ename: cstring) {.
-    compilerproc, noStackFrame.} =
+  {.emit: """
+  if (typeof(Error) !== "undefined") {
+    throw new Error(`cbuf`);
+  }
+  else {
+    throw `cbuf`;
+  }
+  """.}
+
+proc raiseException(e: ref Exception, ename: cstring) {.
+    compilerproc, asmNoStackFrame.} =
   e.name = ename
-  if excHandler != nil:
-    excHandler.exc = e
-  else:
-    when nimrodStackTrace:
-      var buf = rawWriteStackTrace()
-    else:
-      var buf = ""
-    if e.msg != nil and e.msg[0] != '\0':
-      add(buf, "Error: unhandled exception: ")
-      add(buf, e.msg)
-    else:
-      add(buf, "Error: unhandled exception")
-    add(buf, " [")
-    add(buf, ename)
-    add(buf, "]\n")
-    alert(buf)
-  asm """throw `e`;"""
-
-proc reraiseException() {.compilerproc, noStackFrame.} =
-  if excHandler == nil:
-    raise newException(ENoExceptionToReraise, "no exception to reraise")
+  if excHandler == 0:
+    unhandledException(e)
+  when NimStackTrace:
+    e.trace = rawWriteStackTrace()
+  {.emit: "throw `e`;".}
+
+proc reraiseException() {.compilerproc, asmNoStackFrame.} =
+  if lastJSError == nil:
+    raise newException(ReraiseDefect, "no exception to reraise")
   else:
-    asm """throw excHandler.exc;"""
+    if excHandler == 0:
+      if isNimException():
+        unhandledException(cast[ref Exception](lastJSError))
 
-proc raiseOverflow {.exportc: "raiseOverflow", noreturn.} =
-  raise newException(EOverflow, "over- or underflow")
+    {.emit: "throw lastJSError;".}
 
-proc raiseDivByZero {.exportc: "raiseDivByZero", noreturn.} =
-  raise newException(EDivByZero, "divison by zero")
+proc raiseOverflow {.exportc: "raiseOverflow", noreturn, compilerproc.} =
+  raise newException(OverflowDefect, "over- or underflow")
+
+proc raiseDivByZero {.exportc: "raiseDivByZero", noreturn, compilerproc.} =
+  raise newException(DivByZeroDefect, "division by zero")
 
 proc raiseRangeError() {.compilerproc, noreturn.} =
-  raise newException(EOutOfRange, "value out of range")
+  raise newException(RangeDefect, "value out of range")
 
-proc raiseIndexError() {.compilerproc, noreturn.} =
-  raise newException(EInvalidIndex, "index out of bounds")
+proc raiseIndexError(i, a, b: int) {.compilerproc, noreturn.} =
+  raise newException(IndexDefect, formatErrorIndexBound(int(i), int(a), int(b)))
 
-proc raiseFieldError(f: string) {.compilerproc, noreturn.} =
-  raise newException(EInvalidField, f & " is not accessible")
+proc raiseFieldError2(f: string, discVal: string) {.compilerproc, noreturn.} =
+  raise newException(FieldDefect, formatFieldDefect(f, discVal))
 
-proc SetConstr() {.varargs, noStackFrame, compilerproc.} =
-  asm """
+proc setConstr() {.varargs, asmNoStackFrame, compilerproc.} =
+  {.emit: """
     var result = {};
     for (var i = 0; i < arguments.length; ++i) {
       var x = arguments[i];
@@ -139,358 +193,329 @@ proc SetConstr() {.varargs, noStackFrame, compilerproc.} =
       }
     }
     return result;
-  """
-
-proc cstrToNimstr(c: cstring): string {.noStackFrame, compilerproc.} =
-  asm """
-    var result = [];
-    for (var i = 0; i < `c`.length; ++i) {
-      result[i] = `c`.charCodeAt(i);
+  """.}
+
+proc makeNimstrLit(c: cstring): string {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+  var result = [];
+  for (var i = 0; i < `c`.length; ++i) {
+    result[i] = `c`.charCodeAt(i);
+  }
+  return result;
+  """.}
+
+proc cstrToNimstr(c: cstring): string {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+  var ln = `c`.length;
+  var result = new Array(ln);
+  var r = 0;
+  for (var i = 0; i < ln; ++i) {
+    var ch = `c`.charCodeAt(i);
+
+    if (ch < 128) {
+      result[r] = ch;
     }
-    result[result.length] = 0; // terminating zero
-    return result;
-  """
-
-proc toJSStr(s: string): cstring {.noStackFrame, compilerproc.} =
-  asm """
-    var len = `s`.length-1;
-    var result = new Array(len);
-    var fcc = String.fromCharCode;
-    for (var i = 0; i < len; ++i) {
-      result[i] = fcc(`s`[i]);
+    else {
+      if (ch < 2048) {
+        result[r] = (ch >> 6) | 192;
+      }
+      else {
+        if (ch < 55296 || ch >= 57344) {
+          result[r] = (ch >> 12) | 224;
+        }
+        else {
+            ++i;
+            ch = 65536 + (((ch & 1023) << 10) | (`c`.charCodeAt(i) & 1023));
+            result[r] = (ch >> 18) | 240;
+            ++r;
+            result[r] = ((ch >> 12) & 63) | 128;
+        }
+        ++r;
+        result[r] = ((ch >> 6) & 63) | 128;
+      }
+      ++r;
+      result[r] = (ch & 63) | 128;
     }
-    return result.join("");
-  """
-
-proc mnewString(len: int): string {.noStackFrame, compilerproc.} =
-  asm """
-    var result = new Array(`len`+1);
-    result[0] = 0;
-    result[`len`] = 0;
+    ++r;
+  }
+  return result;
+  """.}
+
+proc toJSStr(s: string): cstring {.compilerproc.} =
+  proc fromCharCode(c: char): cstring {.importc: "String.fromCharCode".}
+  proc join(x: openArray[cstring]; d = cstring""): cstring {.
+    importcpp: "#.join(@)".}
+  proc decodeURIComponent(x: cstring): cstring {.
+    importc: "decodeURIComponent".}
+
+  proc toHexString(c: char; d = 16): cstring {.importcpp: "#.toString(@)".}
+
+  proc log(x: cstring) {.importc: "console.log".}
+
+  var res = newSeq[cstring](s.len)
+  var i = 0
+  var j = 0
+  while i < s.len:
+    var c = s[i]
+    if c < '\128':
+      res[j] = fromCharCode(c)
+      inc i
+    else:
+      var helper = newSeq[cstring]()
+      while true:
+        let code = toHexString(c)
+        if code.len == 1:
+          helper.add cstring"%0"
+        else:
+          helper.add cstring"%"
+        helper.add code
+        inc i
+        if i >= s.len or s[i] < '\128': break
+        c = s[i]
+      try:
+        res[j] = decodeURIComponent join(helper)
+      except:
+        res[j] = join(helper)
+    inc j
+  setLen(res, j)
+  result = join(res)
+
+proc mnewString(len: int): string {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+    var result = new Array(`len`);
+    for (var i = 0; i < `len`; i++) {result[i] = 0;}
     return result;
-  """
+  """.}
 
-proc SetCard(a: int): int {.compilerproc, noStackFrame.} =
+proc SetCard(a: int): int {.compilerproc, asmNoStackFrame.} =
   # argument type is a fake
-  asm """
+  {.emit: """
     var result = 0;
     for (var elem in `a`) { ++result; }
     return result;
-  """
+  """.}
 
-proc SetEq(a, b: int): bool {.compilerproc, noStackFrame.} =
-  asm """
+proc SetEq(a, b: int): bool {.compilerproc, asmNoStackFrame.} =
+  {.emit: """
     for (var elem in `a`) { if (!`b`[elem]) return false; }
     for (var elem in `b`) { if (!`a`[elem]) return false; }
     return true;
-  """
+  """.}
 
-proc SetLe(a, b: int): bool {.compilerproc, noStackFrame.} =
-  asm """
+proc SetLe(a, b: int): bool {.compilerproc, asmNoStackFrame.} =
+  {.emit: """
     for (var elem in `a`) { if (!`b`[elem]) return false; }
     return true;
-  """
+  """.}
 
 proc SetLt(a, b: int): bool {.compilerproc.} =
   result = SetLe(a, b) and not SetEq(a, b)
 
-proc SetMul(a, b: int): int {.compilerproc, noStackFrame.} =
-  asm """
+proc SetMul(a, b: int): int {.compilerproc, asmNoStackFrame.} =
+  {.emit: """
     var result = {};
     for (var elem in `a`) {
       if (`b`[elem]) { result[elem] = true; }
     }
     return result;
-  """
+  """.}
 
-proc SetPlus(a, b: int): int {.compilerproc, noStackFrame.} =
-  asm """
+proc SetPlus(a, b: int): int {.compilerproc, asmNoStackFrame.} =
+  {.emit: """
     var result = {};
     for (var elem in `a`) { result[elem] = true; }
     for (var elem in `b`) { result[elem] = true; }
     return result;
-  """
+  """.}
 
-proc SetMinus(a, b: int): int {.compilerproc, noStackFrame.} =
-  asm """
+proc SetMinus(a, b: int): int {.compilerproc, asmNoStackFrame.} =
+  {.emit: """
     var result = {};
     for (var elem in `a`) {
       if (!`b`[elem]) { result[elem] = true; }
     }
     return result;
-  """
+  """.}
 
-proc cmpStrings(a, b: string): int {.noStackFrame, compilerProc.} =
-  asm """
+proc cmpStrings(a, b: string): int {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     if (`a` == `b`) return 0;
     if (!`a`) return -1;
     if (!`b`) return 1;
-    for (var i = 0; i < `a`.length-1; ++i) {
+    for (var i = 0; i < `a`.length && i < `b`.length; i++) {
       var result = `a`[i] - `b`[i];
       if (result != 0) return result;
     }
-    return 0;
-  """
-
-proc cmp(x, y: string): int = return cmpStrings(x, y)
+    return `a`.length - `b`.length;
+  """.}
+
+proc cmp(x, y: string): int =
+  when nimvm:
+    if x == y: result = 0
+    elif x < y: result = -1
+    else: result = 1
+  else:
+    result = cmpStrings(x, y)
 
-proc eqStrings(a, b: string): bool {.noStackFrame, compilerProc.} =
-  asm """
+proc eqStrings(a, b: string): bool {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     if (`a` == `b`) return true;
+    if (`a` === null && `b`.length == 0) return true;
+    if (`b` === null && `a`.length == 0) return true;
     if ((!`a`) || (!`b`)) return false;
     var alen = `a`.length;
     if (alen != `b`.length) return false;
     for (var i = 0; i < alen; ++i)
       if (`a`[i] != `b`[i]) return false;
     return true;
-  """
-
-type
-  TDocument {.importc.} = object of TObject
-    write: proc (text: cstring) {.nimcall.}
-    writeln: proc (text: cstring) {.nimcall.}
-    createAttribute: proc (identifier: cstring): ref TNode {.nimcall.}
-    createElement: proc (identifier: cstring): ref TNode {.nimcall.}
-    createTextNode: proc (identifier: cstring): ref TNode {.nimcall.}
-    getElementById: proc (id: cstring): ref TNode {.nimcall.}
-    getElementsByName: proc (name: cstring): seq[ref TNode] {.nimcall.}
-    getElementsByTagName: proc (name: cstring): seq[ref TNode] {.nimcall.}
-
-  TNodeType* = enum
-    ElementNode = 1,
-    AttributeNode,
-    TextNode,
-    CDATANode,
-    EntityRefNode,
-    EntityNode,
-    ProcessingInstructionNode,
-    CommentNode,
-    DocumentNode,
-    DocumentTypeNode,
-    DocumentFragmentNode,
-    NotationNode
-  TNode* {.importc.} = object of TObject
-    attributes*: seq[ref TNode]
-    childNodes*: seq[ref TNode]
-    data*: cstring
-    firstChild*: ref TNode
-    lastChild*: ref TNode
-    nextSibling*: ref TNode
-    nodeName*: cstring
-    nodeType*: TNodeType
-    nodeValue*: cstring
-    parentNode*: ref TNode
-    previousSibling*: ref TNode
-    appendChild*: proc (child: ref TNode) {.nimcall.}
-    appendData*: proc (data: cstring) {.nimcall.}
-    cloneNode*: proc (copyContent: bool) {.nimcall.}
-    deleteData*: proc (start, len: int) {.nimcall.}
-    getAttribute*: proc (attr: cstring): cstring {.nimcall.}
-    getAttributeNode*: proc (attr: cstring): ref TNode {.nimcall.}
-    getElementsByTagName*: proc (): seq[ref TNode] {.nimcall.}
-    hasChildNodes*: proc (): bool {.nimcall.}
-    insertBefore*: proc (newNode, before: ref TNode) {.nimcall.}
-    insertData*: proc (position: int, data: cstring) {.nimcall.}
-    removeAttribute*: proc (attr: cstring) {.nimcall.}
-    removeAttributeNode*: proc (attr: ref TNode) {.nimcall.}
-    removeChild*: proc (child: ref TNode) {.nimcall.}
-    replaceChild*: proc (newNode, oldNode: ref TNode) {.nimcall.}
-    replaceData*: proc (start, len: int, text: cstring) {.nimcall.}
-    setAttribute*: proc (name, value: cstring) {.nimcall.}
-    setAttributeNode*: proc (attr: ref TNode) {.nimcall.}
+  """.}
 
 when defined(kwin):
-  proc rawEcho {.compilerproc, nostackframe.} =
-    asm """
+  proc rawEcho {.compilerproc, asmNoStackFrame.} =
+    {.emit: """
       var buf = "";
       for (var i = 0; i < arguments.length; ++i) {
         buf += `toJSStr`(arguments[i]);
       }
       print(buf);
-    """
-    
-elif defined(nodejs):
+    """.}
+
+elif not defined(nimOldEcho):
   proc ewriteln(x: cstring) = log(x)
-  
-  proc rawEcho {.compilerproc, nostackframe.} =
-    asm """
+
+  proc rawEcho {.compilerproc, asmNoStackFrame.} =
+    {.emit: """
       var buf = "";
       for (var i = 0; i < arguments.length; ++i) {
         buf += `toJSStr`(arguments[i]);
       }
       console.log(buf);
-    """
+    """.}
 
 else:
-  var
-    document {.importc, nodecl.}: ref TDocument
-
-  proc ewriteln(x: cstring) = 
-    var node = document.getElementsByTagName("body")[0]
-    if node != nil: 
-      node.appendChild(document.createTextNode(x))
-      node.appendChild(document.createElement("br"))
-    else: 
-      raise newException(EInvalidValue, "<body> element does not exist yet!")
+  proc ewriteln(x: cstring) =
+    var node : JSRef
+    {.emit: "`node` = document.getElementsByTagName('body')[0];".}
+    if node.isNil:
+      raise newException(ValueError, "<body> element does not exist yet!")
+    {.emit: """
+    `node`.appendChild(document.createTextNode(`x`));
+    `node`.appendChild(document.createElement("br"));
+    """.}
 
   proc rawEcho {.compilerproc.} =
-    var node = document.getElementsByTagName("body")[0]
-    if node == nil: raise newException(EIO, "<body> element does not exist yet!")
-    asm """
-      for (var i = 0; i < arguments.length; ++i) {
-        var x = `toJSStr`(arguments[i]);
-        `node`.appendChild(document.createTextNode(x))
-      }
-    """
-    node.appendChild(document.createElement("br"))
+    var node : JSRef
+    {.emit: "`node` = document.getElementsByTagName('body')[0];".}
+    if node.isNil:
+      raise newException(IOError, "<body> element does not exist yet!")
+    {.emit: """
+    for (var i = 0; i < arguments.length; ++i) {
+      var x = `toJSStr`(arguments[i]);
+      `node`.appendChild(document.createTextNode(x));
+    }
+    `node`.appendChild(document.createElement("br"));
+    """.}
 
 # Arithmetic:
-proc addInt(a, b: int): int {.noStackFrame, compilerproc.} =
-  asm """
+proc checkOverflowInt(a: int) {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+    if (`a` > 2147483647 || `a` < -2147483648) `raiseOverflow`();
+  """.}
+
+proc addInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     var result = `a` + `b`;
-    if (result > 2147483647 || result < -2147483648) `raiseOverflow`();
+    `checkOverflowInt`(result);
     return result;
-  """
+  """.}
 
-proc subInt(a, b: int): int {.noStackFrame, compilerproc.} =
-  asm """
+proc subInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     var result = `a` - `b`;
-    if (result > 2147483647 || result < -2147483648) `raiseOverflow`();
+    `checkOverflowInt`(result);
     return result;
-  """
+  """.}
 
-proc mulInt(a, b: int): int {.noStackFrame, compilerproc.} =
-  asm """
+proc mulInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     var result = `a` * `b`;
-    if (result > 2147483647 || result < -2147483648) `raiseOverflow`();
+    `checkOverflowInt`(result);
     return result;
-  """
+  """.}
 
-proc divInt(a, b: int): int {.noStackFrame, compilerproc.} =
-  asm """
+proc divInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     if (`b` == 0) `raiseDivByZero`();
     if (`b` == -1 && `a` == 2147483647) `raiseOverflow`();
-    return Math.floor(`a` / `b`);
-  """
+    return Math.trunc(`a` / `b`);
+  """.}
 
-proc modInt(a, b: int): int {.noStackFrame, compilerproc.} =
-  asm """
+proc modInt(a, b: int): int {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     if (`b` == 0) `raiseDivByZero`();
     if (`b` == -1 && `a` == 2147483647) `raiseOverflow`();
-    return Math.floor(`a` % `b`);
-  """
+    return Math.trunc(`a` % `b`);
+  """.}
+
+proc checkOverflowInt64(a: int64) {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+    if (`a` > 9223372036854775807n || `a` < -9223372036854775808n) `raiseOverflow`();
+  """.}
 
-proc addInt64(a, b: int): int {.noStackFrame, compilerproc.} =
-  asm """
+proc addInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     var result = `a` + `b`;
-    if (result > 9223372036854775807
-    || result < -9223372036854775808) `raiseOverflow`();
+    `checkOverflowInt64`(result);
     return result;
-  """
+  """.}
 
-proc subInt64(a, b: int): int {.noStackFrame, compilerproc.} =
-  asm """
+proc subInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     var result = `a` - `b`;
-    if (result > 9223372036854775807
-    || result < -9223372036854775808) `raiseOverflow`();
+    `checkOverflowInt64`(result);
     return result;
-  """
+  """.}
 
-proc mulInt64(a, b: int): int {.noStackFrame, compilerproc.} =
-  asm """
+proc mulInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
     var result = `a` * `b`;
-    if (result > 9223372036854775807
-    || result < -9223372036854775808) `raiseOverflow`();
+    `checkOverflowInt64`(result);
     return result;
-  """
-
-proc divInt64(a, b: int): int {.noStackFrame, compilerproc.} =
-  asm """
-    if (`b` == 0) `raiseDivByZero`();
-    if (`b` == -1 && `a` == 9223372036854775807) `raiseOverflow`();
-    return Math.floor(`a` / `b`);
-  """
-
-proc modInt64(a, b: int): int {.noStackFrame, compilerproc.} =
-  asm """
-    if (`b` == 0) `raiseDivByZero`();
-    if (`b` == -1 && `a` == 9223372036854775807) `raiseOverflow`();
-    return Math.floor(`a` % `b`);
-  """
-
-proc NegInt(a: int): int {.compilerproc.} =
+  """.}
+
+proc divInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+    if (`b` == 0n) `raiseDivByZero`();
+    if (`b` == -1n && `a` == 9223372036854775807n) `raiseOverflow`();
+    return `a` / `b`;
+  """.}
+
+proc modInt64(a, b: int64): int64 {.asmNoStackFrame, compilerproc.} =
+  {.emit: """
+    if (`b` == 0n) `raiseDivByZero`();
+    if (`b` == -1n && `a` == 9223372036854775807n) `raiseOverflow`();
+    return `a` % `b`;
+  """.}
+
+proc negInt(a: int): int {.compilerproc.} =
   result = a*(-1)
 
-proc NegInt64(a: int64): int64 {.compilerproc.} =
+proc negInt64(a: int64): int64 {.compilerproc.} =
   result = a*(-1)
 
-proc AbsInt(a: int): int {.compilerproc.} =
+proc absInt(a: int): int {.compilerproc.} =
   result = if a < 0: a*(-1) else: a
 
-proc AbsInt64(a: int64): int64 {.compilerproc.} =
+proc absInt64(a: int64): int64 {.compilerproc.} =
   result = if a < 0: a*(-1) else: a
 
-proc LeU(a, b: int): bool {.compilerproc.} =
-  result = abs(a) <= abs(b)
-
-proc LtU(a, b: int): bool {.compilerproc.} =
-  result = abs(a) < abs(b)
-
-proc LeU64(a, b: int64): bool {.compilerproc.} =
-  result = abs(a) <= abs(b)
-
-proc LtU64(a, b: int64): bool {.compilerproc.} =
-  result = abs(a) < abs(b)
-
-proc AddU(a, b: int): int {.compilerproc.} =
-  result = abs(a) + abs(b)
-proc AddU64(a, b: int64): int64 {.compilerproc.} =
-  result = abs(a) + abs(b)
-
-proc SubU(a, b: int): int {.compilerproc.} =
-  result = abs(a) - abs(b)
-proc SubU64(a, b: int64): int64 {.compilerproc.} =
-  result = abs(a) - abs(b)
-
-proc MulU(a, b: int): int {.compilerproc.} =
-  result = abs(a) * abs(b)
-proc MulU64(a, b: int64): int64 {.compilerproc.} =
-  result = abs(a) * abs(b)
-
-proc DivU(a, b: int): int {.compilerproc.} =
-  result = abs(a) div abs(b)
-proc DivU64(a, b: int64): int64 {.compilerproc.} =
-  result = abs(a) div abs(b)
-
-proc ModU(a, b: int): int {.compilerproc.} =
-  result = abs(a) mod abs(b)
-proc ModU64(a, b: int64): int64 {.compilerproc.} =
-  result = abs(a) mod abs(b)
-
-proc Ze(a: int): int {.compilerproc.} =
-  result = a
-proc Ze64(a: int64): int64 {.compilerproc.} =
-  result = a
-
-proc toU8(a: int): int8 {.noStackFrame, compilerproc.} =
-  asm """
-    return `a`;
-  """
-
-proc toU16(a: int): int16 {.noStackFrame, compilerproc.} =
-  asm """
-    return `a`;
-  """
-
-proc toU32(a: int): int32 {.noStackFrame, compilerproc.} =
-  asm """
-    return `a`;
-  """
-
-
 proc nimMin(a, b: int): int {.compilerproc.} = return if a <= b: a else: b
 proc nimMax(a, b: int): int {.compilerproc.} = return if a >= b: a else: b
 
+proc chckNilDisp(p: JSRef) {.compilerproc.} =
+  if p == nil:
+    sysFatal(NilAccessDefect, "cannot dispatch; dispatcher is nil")
+
 include "system/hti"
 
 proc isFatPointer(ti: PNimType): bool =
@@ -499,103 +524,116 @@ proc isFatPointer(ti: PNimType): bool =
     tyArray, tyArrayConstr, tyTuple,
     tyOpenArray, tySet, tyVar, tyRef, tyPtr}
 
-proc NimCopy(x: pointer, ti: PNimType): pointer {.compilerproc.}
+proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef {.compilerproc.}
 
-proc NimCopyAux(dest, src: Pointer, n: ptr TNimNode) {.compilerproc.} =
+proc nimCopyAux(dest, src: JSRef, n: ptr TNimNode) {.compilerproc.} =
   case n.kind
-  of nkNone: sysAssert(false, "NimCopyAux")
+  of nkNone: sysAssert(false, "nimCopyAux")
   of nkSlot:
-    asm "`dest`[`n`.offset] = NimCopy(`src`[`n`.offset], `n`.typ);"
+    {.emit: """
+      `dest`[`n`.offset] = nimCopy(`dest`[`n`.offset], `src`[`n`.offset], `n`.typ);
+    """.}
   of nkList:
-    for i in 0..n.len-1:
-      NimCopyAux(dest, src, n.sons[i])
+    {.emit: """
+    for (var i = 0; i < `n`.sons.length; i++) {
+      nimCopyAux(`dest`, `src`, `n`.sons[i]);
+    }
+    """.}
   of nkCase:
-    asm """
-      `dest`[`n`.offset] = NimCopy(`src`[`n`.offset], `n`.typ);
+    {.emit: """
+      `dest`[`n`.offset] = nimCopy(`dest`[`n`.offset], `src`[`n`.offset], `n`.typ);
       for (var i = 0; i < `n`.sons.length; ++i) {
-        NimCopyAux(`dest`, `src`, `n`.sons[i][1]);
+        nimCopyAux(`dest`, `src`, `n`.sons[i][1]);
       }
-    """
+    """.}
 
-proc NimCopy(x: pointer, ti: PNimType): pointer =
+proc nimCopy(dest, src: JSRef, ti: PNimType): JSRef =
   case ti.kind
   of tyPtr, tyRef, tyVar, tyNil:
     if not isFatPointer(ti):
-      result = x
+      result = src
     else:
-      asm """
-        `result` = [null, 0];
-        `result`[0] = `x`[0];
-        `result`[1] = `x`[1];
-      """
+      {.emit: "`result` = [`src`[0], `src`[1]];".}
   of tySet:
-    asm """
-      `result` = {};
-      for (var key in `x`) { `result`[key] = `x`[key]; }
-    """
+    {.emit: """
+      if (`dest` === null || `dest` === undefined) {
+        `dest` = {};
+      }
+      else {
+        for (var key in `dest`) { delete `dest`[key]; }
+      }
+      for (var key in `src`) { `dest`[key] = `src`[key]; }
+      `result` = `dest`;
+    """.}
   of tyTuple, tyObject:
-    if ti.base != nil: result = NimCopy(x, ti.base)
+    if ti.base != nil: result = nimCopy(dest, src, ti.base)
     elif ti.kind == tyObject:
-      asm "`result` = {m_type: `ti`};"
+      {.emit: "`result` = (`dest` === null || `dest` === undefined) ? {m_type: `ti`} : `dest`;".}
     else:
-      asm "`result` = {};"
-    NimCopyAux(result, x, ti.node)
-  of tySequence, tyArrayConstr, tyOpenArray, tyArray:
-    asm """
-      `result` = new Array(`x`.length);
-      for (var i = 0; i < `x`.length; ++i) {
-        `result`[i] = NimCopy(`x`[i], `ti`.base);
+      {.emit: "`result` = (`dest` === null || `dest` === undefined) ? {} : `dest`;".}
+    nimCopyAux(result, src, ti.node)
+  of tyArrayConstr, tyArray:
+    # In order to prevent a type change (TypedArray -> Array) and to have better copying performance,
+    # arrays constructors are considered separately
+    {.emit: """
+      if(ArrayBuffer.isView(`src`)) { 
+        if(`dest` === null || `dest` === undefined || `dest`.length != `src`.length) {
+          `dest` = new `src`.constructor(`src`);
+        } else {
+          `dest`.set(`src`, 0);
+        }
+        `result` = `dest`;
+      } else {
+        if (`src` === null) {
+          `result` = null;
+        }
+        else {
+          if (`dest` === null || `dest` === undefined || `dest`.length != `src`.length) {
+            `dest` = new Array(`src`.length);
+          }
+          `result` = `dest`;
+          for (var i = 0; i < `src`.length; ++i) {
+            `result`[i] = nimCopy(`result`[i], `src`[i], `ti`.base);
+          }
+        }
       }
-    """
-  of tyString:
-    asm "`result` = `x`.slice(0);"
-  else:
-    result = x
-
-proc genericReset(x: Pointer, ti: PNimType): pointer {.compilerproc.} =
-  case ti.kind
-  of tyPtr, tyRef, tyVar, tyNil:
-    if not isFatPointer(ti):
-      result = nil
-    else:
-      asm """
-        `result` = [null, 0];
-      """
-  of tySet:
-    asm """
-      `result` = {};
-    """
-  of tyTuple, tyObject:
-    if ti.kind == tyObject:
-      asm "`result` = {m_type: `ti`};"
-    else:
-      asm "`result` = {};"
+    """.}
   of tySequence, tyOpenArray:
-    asm """
-      `result` = [];
-    """
-  of tyArrayConstr, tyArray:
-    asm """
-      `result` = new Array(`x`.length);
-      for (var i = 0; i < `x`.length; ++i) {
-        `result`[i] = genericReset(`x`[i], `ti`.base);
+    {.emit: """
+      if (`src` === null) {
+        `result` = null;
+      }
+      else {
+        if (`dest` === null || `dest` === undefined || `dest`.length != `src`.length) {
+          `dest` = new Array(`src`.length);
+        }
+        `result` = `dest`;
+        for (var i = 0; i < `src`.length; ++i) {
+          `result`[i] = nimCopy(`result`[i], `src`[i], `ti`.base);
+        }
       }
-    """
+    """.}
+  of tyString:
+    {.emit: """
+      if (`src` !== null) {
+        `result` = `src`.slice(0);
+      }
+    """.}
   else:
-    result = nil
+    result = src
 
-proc ArrayConstr(len: int, value: pointer, typ: PNimType): pointer {.
-                 noStackFrame, compilerproc.} =
+proc arrayConstr(len: int, value: JSRef, typ: PNimType): JSRef {.
+                asmNoStackFrame, compilerproc.} =
   # types are fake
-  asm """
+  {.emit: """
     var result = new Array(`len`);
-    for (var i = 0; i < `len`; ++i) result[i] = NimCopy(`value`, `typ`);
+    for (var i = 0; i < `len`; ++i) result[i] = nimCopy(null, `value`, `typ`);
     return result;
-  """
+  """.}
 
 proc chckIndx(i, a, b: int): int {.compilerproc.} =
   if i >= a and i <= b: return i
-  else: raiseIndexError()
+  else: raiseIndexError(i, a, b)
 
 proc chckRange(i, a, b: int): int {.compilerproc.} =
   if i >= a and i <= b: return i
@@ -607,7 +645,7 @@ proc chckObj(obj, subclass: PNimType) {.compilerproc.} =
   if x == subclass: return # optimized fast path
   while x != subclass:
     if x == nil:
-      raise newException(EInvalidObjectConversion, "invalid object conversion")
+      raise newException(ObjectConversionDefect, "invalid object conversion")
     x = x.base
 
 proc isObj(obj, subclass: PNimType): bool {.compilerproc.} =
@@ -619,4 +657,112 @@ proc isObj(obj, subclass: PNimType): bool {.compilerproc.} =
     x = x.base
   return true
 
+proc addChar(x: string, c: char) {.compilerproc, asmNoStackFrame.} =
+  {.emit: "`x`.push(`c`);".}
+
 {.pop.}
+
+proc tenToThePowerOf(b: int): BiggestFloat =
+  # xxx deadcode
+  var b = b
+  var a = 10.0
+  result = 1.0
+  while true:
+    if (b and 1) == 1:
+      result = result * a
+    b = b shr 1
+    if b == 0: break
+    a = a * a
+
+const
+  IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
+
+
+proc parseFloatNative(a: openarray[char]): float =
+  var str = ""
+  for x in a:
+    str.add x
+
+  let cstr = cstring str
+
+  {.emit: """
+  `result` = Number(`cstr`);
+  """.}
+
+proc nimParseBiggestFloat(s: openarray[char], number: var BiggestFloat): int {.compilerproc.} =
+  var sign: bool
+  var i = 0
+  if s[i] == '+': inc(i)
+  elif s[i] == '-':
+    sign = true
+    inc(i)
+  if s[i] == 'N' or s[i] == 'n':
+    if s[i+1] == 'A' or s[i+1] == 'a':
+      if s[i+2] == 'N' or s[i+2] == 'n':
+        if s[i+3] notin IdentChars:
+          number = NaN
+          return i+3
+    return 0
+  if s[i] == 'I' or s[i] == 'i':
+    if s[i+1] == 'N' or s[i+1] == 'n':
+      if s[i+2] == 'F' or s[i+2] == 'f':
+        if s[i+3] notin IdentChars:
+          number = if sign: -Inf else: Inf
+          return i+3
+    return 0
+
+  var buf: string
+    # we could also use an `array[char, N]` buffer to avoid reallocs, or
+    # use a 2-pass algorithm that first computes the length.
+  if sign: buf.add '-'
+  template addInc =
+    buf.add s[i]
+    inc(i)
+  template eatUnderscores =
+    while s[i] == '_': inc(i)
+  while s[i] in {'0'..'9'}: # Read integer part
+    buf.add s[i]
+    inc(i)
+    eatUnderscores()
+  if s[i] == '.': # Decimal?
+    addInc()
+    while s[i] in {'0'..'9'}: # Read fractional part
+      addInc()
+      eatUnderscores()
+  # Again, read integer and fractional part
+  if buf.len == ord(sign): return 0
+  if s[i] in {'e', 'E'}: # Exponent?
+    addInc()
+    if s[i] == '+': inc(i)
+    elif s[i] == '-': addInc()
+    if s[i] notin {'0'..'9'}: return 0
+    while s[i] in {'0'..'9'}:
+      addInc()
+      eatUnderscores()
+  number = parseFloatNative(buf)
+  result = i
+
+# Workaround for IE, IE up to version 11 lacks 'Math.trunc'. We produce
+# 'Math.trunc' for Nim's ``div`` and ``mod`` operators:
+when defined(nimJsMathTruncPolyfill):
+  {.emit: """
+if (!Math.trunc) {
+  Math.trunc = function(v) {
+    v = +v;
+    if (!isFinite(v)) return v;
+    return (v - v % 1) || (v < 0 ? -0 : v === 0 ? v : 0);
+  };
+}
+""".}
+
+proc cmpClosures(a, b: JSRef): bool {.compilerproc, asmNoStackFrame.} =
+  # Both `a` and `b` need to be a closure
+  {.emit: """
+    if (`a` !== null && `a`.ClP_0 !== undefined &&
+        `b` !== null && `b`.ClP_0 !== undefined) {
+      return `a`.ClP_0 == `b`.ClP_0 && `a`.ClE_0 == `b`.ClE_0;
+    } else {
+      return `a` == `b`;
+    }
+  """
+  .}
diff --git a/lib/system/memalloc.nim b/lib/system/memalloc.nim
new file mode 100644
index 000000000..a94d0995c
--- /dev/null
+++ b/lib/system/memalloc.nim
@@ -0,0 +1,449 @@
+when notJSnotNims:
+  proc zeroMem*(p: pointer, size: Natural) {.inline, noSideEffect,
+    tags: [], raises: [].}
+    ## Overwrites the contents of the memory at `p` with the value 0.
+    ##
+    ## Exactly `size` bytes will be overwritten. Like any procedure
+    ## dealing with raw memory this is **unsafe**.
+
+  proc copyMem*(dest, source: pointer, size: Natural) {.inline, benign,
+    tags: [], raises: [].}
+    ## Copies the contents from the memory at `source` to the memory
+    ## at `dest`.
+    ## Exactly `size` bytes will be copied. The memory
+    ## regions may not overlap. Like any procedure dealing with raw
+    ## memory this is **unsafe**.
+
+  proc moveMem*(dest, source: pointer, size: Natural) {.inline, benign,
+    tags: [], raises: [].}
+    ## Copies the contents from the memory at `source` to the memory
+    ## at `dest`.
+    ##
+    ## Exactly `size` bytes will be copied. The memory
+    ## regions may overlap, `moveMem` handles this case appropriately
+    ## and is thus somewhat more safe than `copyMem`. Like any procedure
+    ## dealing with raw memory this is still **unsafe**, though.
+
+  proc equalMem*(a, b: pointer, size: Natural): bool {.inline, noSideEffect,
+    tags: [], raises: [].}
+    ## Compares the memory blocks `a` and `b`. `size` bytes will
+    ## be compared.
+    ##
+    ## If the blocks are equal, `true` is returned, `false`
+    ## otherwise. Like any procedure dealing with raw memory this is
+    ## **unsafe**.
+
+  proc cmpMem*(a, b: pointer, size: Natural): int {.inline, noSideEffect,
+    tags: [], raises: [].}
+    ## Compares the memory blocks `a` and `b`. `size` bytes will
+    ## be compared.
+    ##
+    ## Returns:
+    ## * a value less than zero, if `a < b`
+    ## * a value greater than zero, if `a > b`
+    ## * zero, if `a == b`
+    ##
+    ## Like any procedure dealing with raw memory this is
+    ## **unsafe**.
+
+when hasAlloc and not defined(js):
+
+  proc allocImpl*(size: Natural): pointer {.noconv, rtl, tags: [], benign, raises: [].}
+  proc alloc0Impl*(size: Natural): pointer {.noconv, rtl, tags: [], benign, raises: [].}
+  proc deallocImpl*(p: pointer) {.noconv, rtl, tags: [], benign, raises: [].}
+  proc reallocImpl*(p: pointer, newSize: Natural): pointer {.noconv, rtl, tags: [], benign, raises: [].}
+  proc realloc0Impl*(p: pointer, oldSize, newSize: Natural): pointer {.noconv, rtl, tags: [], benign, raises: [].}
+
+  proc allocSharedImpl*(size: Natural): pointer {.noconv, compilerproc, rtl, benign, raises: [], tags: [].}
+  proc allocShared0Impl*(size: Natural): pointer {.noconv, rtl, benign, raises: [], tags: [].}
+  proc deallocSharedImpl*(p: pointer) {.noconv, rtl, benign, raises: [], tags: [].}
+  proc reallocSharedImpl*(p: pointer, newSize: Natural): pointer {.noconv, rtl, tags: [], benign, raises: [].}
+  proc reallocShared0Impl*(p: pointer, oldSize, newSize: Natural): pointer {.noconv, rtl, tags: [], benign, raises: [].}
+
+  # Allocator statistics for memory leak tests
+
+  {.push stackTrace: off.}
+
+  type AllocStats* = object
+    allocCount: int
+    deallocCount: int
+
+  proc `-`*(a, b: AllocStats): AllocStats =
+    result.allocCount = a.allocCount - b.allocCount
+    result.deallocCount = a.deallocCount - b.deallocCount
+
+  template dumpAllocstats*(code: untyped) =
+    let stats1 = getAllocStats()
+    code
+    let stats2 = getAllocStats()
+    echo $(stats2 - stats1)
+
+  when defined(nimAllocStats):
+    var stats: AllocStats
+    template incStat(what: untyped) = atomicInc stats.what
+    proc getAllocStats*(): AllocStats = stats
+
+  else:
+    template incStat(what: untyped) = discard
+    proc getAllocStats*(): AllocStats = discard
+
+  template alloc*(size: Natural): pointer =
+    ## Allocates a new memory block with at least `size` bytes.
+    ##
+    ## The block has to be freed with `realloc(block, 0) <#realloc.t,pointer,Natural>`_
+    ## or `dealloc(block) <#dealloc,pointer>`_.
+    ## The block is not initialized, so reading
+    ## from it before writing to it is undefined behaviour!
+    ##
+    ## The allocated memory belongs to its allocating thread!
+    ## Use `allocShared <#allocShared.t,Natural>`_ to allocate from a shared heap.
+    ##
+    ## See also:
+    ## * `alloc0 <#alloc0.t,Natural>`_
+    incStat(allocCount)
+    allocImpl(size)
+
+  proc createU*(T: typedesc, size = 1.Positive): ptr T {.inline, benign, raises: [].} =
+    ## Allocates a new memory block with at least `T.sizeof * size` bytes.
+    ##
+    ## The block has to be freed with `resize(block, 0) <#resize,ptr.T,Natural>`_
+    ## or `dealloc(block) <#dealloc,pointer>`_.
+    ## The block is not initialized, so reading
+    ## from it before writing to it is undefined behaviour!
+    ##
+    ## The allocated memory belongs to its allocating thread!
+    ## Use `createSharedU <#createSharedU,typedesc>`_ to allocate from a shared heap.
+    ##
+    ## See also:
+    ## * `create <#create,typedesc>`_
+    cast[ptr T](alloc(T.sizeof * size))
+
+  template alloc0*(size: Natural): pointer =
+    ## Allocates a new memory block with at least `size` bytes.
+    ##
+    ## The block has to be freed with `realloc(block, 0) <#realloc.t,pointer,Natural>`_
+    ## or `dealloc(block) <#dealloc,pointer>`_.
+    ## The block is initialized with all bytes containing zero, so it is
+    ## somewhat safer than  `alloc <#alloc.t,Natural>`_.
+    ##
+    ## The allocated memory belongs to its allocating thread!
+    ## Use `allocShared0 <#allocShared0.t,Natural>`_ to allocate from a shared heap.
+    incStat(allocCount)
+    alloc0Impl(size)
+
+  proc create*(T: typedesc, size = 1.Positive): ptr T {.inline, benign, raises: [].} =
+    ## Allocates a new memory block with at least `T.sizeof * size` bytes.
+    ##
+    ## The block has to be freed with `resize(block, 0) <#resize,ptr.T,Natural>`_
+    ## or `dealloc(block) <#dealloc,pointer>`_.
+    ## The block is initialized with all bytes containing zero, so it is
+    ## somewhat safer than `createU <#createU,typedesc>`_.
+    ##
+    ## The allocated memory belongs to its allocating thread!
+    ## Use `createShared <#createShared,typedesc>`_ to allocate from a shared heap.
+    cast[ptr T](alloc0(sizeof(T) * size))
+
+  template realloc*(p: pointer, newSize: Natural): pointer =
+    ## Grows or shrinks a given memory block.
+    ##
+    ## If `p` is **nil** then a new memory block is returned.
+    ## In either way the block has at least `newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `realloc` calls `dealloc(p)`.
+    ## In other cases the block has to be freed with
+    ## `dealloc(block) <#dealloc,pointer>`_.
+    ##
+    ## The allocated memory belongs to its allocating thread!
+    ## Use `reallocShared <#reallocShared.t,pointer,Natural>`_ to reallocate
+    ## from a shared heap.
+    reallocImpl(p, newSize)
+
+  template realloc0*(p: pointer, oldSize, newSize: Natural): pointer =
+    ## Grows or shrinks a given memory block.
+    ##
+    ## If `p` is **nil** then a new memory block is returned.
+    ## In either way the block has at least `newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `realloc` calls `dealloc(p)`.
+    ## In other cases the block has to be freed with
+    ## `dealloc(block) <#dealloc,pointer>`_.
+    ##
+    ## The block is initialized with all bytes containing zero, so it is
+    ## somewhat safer then realloc
+    ##
+    ## The allocated memory belongs to its allocating thread!
+    ## Use `reallocShared <#reallocShared.t,pointer,Natural>`_ to reallocate
+    ## from a shared heap.
+    realloc0Impl(p, oldSize, newSize)
+
+  proc resize*[T](p: ptr T, newSize: Natural): ptr T {.inline, benign, raises: [].} =
+    ## Grows or shrinks a given memory block.
+    ##
+    ## If `p` is **nil** then a new memory block is returned.
+    ## In either way the block has at least `T.sizeof * newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `resize` calls `dealloc(p)`.
+    ## In other cases the block has to be freed with `free`.
+    ##
+    ## The allocated memory belongs to its allocating thread!
+    ## Use `resizeShared <#resizeShared,ptr.T,Natural>`_ to reallocate
+    ## from a shared heap.
+    cast[ptr T](realloc(p, T.sizeof * newSize))
+
+  proc dealloc*(p: pointer) {.noconv, compilerproc, rtl, benign, raises: [], tags: [].} =
+    ## Frees the memory allocated with `alloc`, `alloc0`,
+    ## `realloc`, `create` or `createU`.
+    ##
+    ## **This procedure is dangerous!**
+    ## If one forgets to free the memory a leak occurs; if one tries to
+    ## access freed memory (or just freeing it twice!) a core dump may happen
+    ## or other memory may be corrupted.
+    ##
+    ## The freed memory must belong to its allocating thread!
+    ## Use `deallocShared <#deallocShared,pointer>`_ to deallocate from a shared heap.
+    incStat(deallocCount)
+    deallocImpl(p)
+
+  template allocShared*(size: Natural): pointer =
+    ## Allocates a new memory block on the shared heap with at
+    ## least `size` bytes.
+    ##
+    ## The block has to be freed with
+    ## `reallocShared(block, 0) <#reallocShared.t,pointer,Natural>`_
+    ## or `deallocShared(block) <#deallocShared,pointer>`_.
+    ##
+    ## The block is not initialized, so reading from it before writing
+    ## to it is undefined behaviour!
+    ##
+    ## See also:
+    ## * `allocShared0 <#allocShared0.t,Natural>`_.
+    incStat(allocCount)
+    allocSharedImpl(size)
+
+  proc createSharedU*(T: typedesc, size = 1.Positive): ptr T {.inline, tags: [],
+                                                               benign, raises: [].} =
+    ## Allocates a new memory block on the shared heap with at
+    ## least `T.sizeof * size` bytes.
+    ##
+    ## The block has to be freed with
+    ## `resizeShared(block, 0) <#resizeShared,ptr.T,Natural>`_ or
+    ## `freeShared(block) <#freeShared,ptr.T>`_.
+    ##
+    ## The block is not initialized, so reading from it before writing
+    ## to it is undefined behaviour!
+    ##
+    ## See also:
+    ## * `createShared <#createShared,typedesc>`_
+    cast[ptr T](allocShared(T.sizeof * size))
+
+  template allocShared0*(size: Natural): pointer =
+    ## Allocates a new memory block on the shared heap with at
+    ## least `size` bytes.
+    ##
+    ## The block has to be freed with
+    ## `reallocShared(block, 0) <#reallocShared.t,pointer,Natural>`_
+    ## or `deallocShared(block) <#deallocShared,pointer>`_.
+    ##
+    ## The block is initialized with all bytes
+    ## containing zero, so it is somewhat safer than
+    ## `allocShared <#allocShared.t,Natural>`_.
+    incStat(allocCount)
+    allocShared0Impl(size)
+
+  proc createShared*(T: typedesc, size = 1.Positive): ptr T {.inline.} =
+    ## Allocates a new memory block on the shared heap with at
+    ## least `T.sizeof * size` bytes.
+    ##
+    ## The block has to be freed with
+    ## `resizeShared(block, 0) <#resizeShared,ptr.T,Natural>`_ or
+    ## `freeShared(block) <#freeShared,ptr.T>`_.
+    ##
+    ## The block is initialized with all bytes
+    ## containing zero, so it is somewhat safer than
+    ## `createSharedU <#createSharedU,typedesc>`_.
+    cast[ptr T](allocShared0(T.sizeof * size))
+
+  template reallocShared*(p: pointer, newSize: Natural): pointer =
+    ## Grows or shrinks a given memory block on the heap.
+    ##
+    ## If `p` is **nil** then a new memory block is returned.
+    ## In either way the block has at least `newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `reallocShared` calls
+    ## `deallocShared(p)`.
+    ## In other cases the block has to be freed with
+    ## `deallocShared <#deallocShared,pointer>`_.
+    reallocSharedImpl(p, newSize)
+
+  template reallocShared0*(p: pointer, oldSize, newSize: Natural): pointer =
+    ## Grows or shrinks a given memory block on the heap.
+    ##
+    ## When growing, the new bytes of the block is initialized with all bytes
+    ## containing zero, so it is somewhat safer then reallocShared
+    ##
+    ## If `p` is **nil** then a new memory block is returned.
+    ## In either way the block has at least `newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `reallocShared` calls
+    ## `deallocShared(p)`.
+    ## In other cases the block has to be freed with
+    ## `deallocShared <#deallocShared,pointer>`_.
+    reallocShared0Impl(p, oldSize, newSize)
+
+  proc resizeShared*[T](p: ptr T, newSize: Natural): ptr T {.inline, raises: [].} =
+    ## Grows or shrinks a given memory block on the heap.
+    ##
+    ## If `p` is **nil** then a new memory block is returned.
+    ## In either way the block has at least `T.sizeof * newSize` bytes.
+    ## If `newSize == 0` and `p` is not **nil** `resizeShared` calls
+    ## `freeShared(p)`.
+    ## In other cases the block has to be freed with
+    ## `freeShared <#freeShared,ptr.T>`_.
+    cast[ptr T](reallocShared(p, T.sizeof * newSize))
+
+  proc deallocShared*(p: pointer) {.noconv, compilerproc, rtl, benign, raises: [], tags: [].} =
+    ## Frees the memory allocated with `allocShared`, `allocShared0` or
+    ## `reallocShared`.
+    ##
+    ## **This procedure is dangerous!**
+    ## If one forgets to free the memory a leak occurs; if one tries to
+    ## access freed memory (or just freeing it twice!) a core dump may happen
+    ## or other memory may be corrupted.
+    incStat(deallocCount)
+    deallocSharedImpl(p)
+
+  proc freeShared*[T](p: ptr T) {.inline, benign, raises: [].} =
+    ## Frees the memory allocated with `createShared`, `createSharedU` or
+    ## `resizeShared`.
+    ##
+    ## **This procedure is dangerous!**
+    ## If one forgets to free the memory a leak occurs; if one tries to
+    ## access freed memory (or just freeing it twice!) a core dump may happen
+    ## or other memory may be corrupted.
+    deallocShared(p)
+
+  include bitmasks
+
+  template `+!`(p: pointer, s: SomeInteger): pointer =
+    cast[pointer](cast[int](p) +% int(s))
+
+  template `-!`(p: pointer, s: SomeInteger): pointer =
+    cast[pointer](cast[int](p) -% int(s))
+
+  proc alignedAlloc(size, align: Natural): pointer =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        result = allocShared(size)
+      else:
+        result = alloc(size)
+    else:
+      # allocate (size + align - 1) necessary for alignment,
+      # plus 2 bytes to store offset
+      when compileOption("threads"):
+        let base = allocShared(size + align - 1 + sizeof(uint16))
+      else:
+        let base = alloc(size + align - 1 + sizeof(uint16))
+      # memory layout: padding + offset (2 bytes) + user_data
+      # in order to deallocate: read offset at user_data - 2 bytes,
+      # then deallocate user_data - offset
+      let offset = align - (cast[int](base) and (align - 1))
+      cast[ptr uint16](base +! (offset - sizeof(uint16)))[] = uint16(offset)
+      result = base +! offset
+
+  proc alignedAlloc0(size, align: Natural): pointer =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        result = allocShared0(size)
+      else:
+        result = alloc0(size)
+    else:
+      # see comments for alignedAlloc
+      when compileOption("threads"):
+        let base = allocShared0(size + align - 1 + sizeof(uint16))
+      else:
+        let base = alloc0(size + align - 1 + sizeof(uint16))
+      let offset = align - (cast[int](base) and (align - 1))
+      cast[ptr uint16](base +! (offset - sizeof(uint16)))[] = uint16(offset)
+      result = base +! offset
+
+  proc alignedDealloc(p: pointer, align: int) {.compilerproc.} =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        deallocShared(p)
+      else:
+        dealloc(p)
+    else:
+      # read offset at p - 2 bytes, then deallocate (p - offset) pointer
+      let offset = cast[ptr uint16](p -! sizeof(uint16))[]
+      when compileOption("threads"):
+        deallocShared(p -! offset)
+      else:
+        dealloc(p -! offset)
+
+  proc alignedRealloc(p: pointer, oldSize, newSize, align: Natural): pointer =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        result = reallocShared(p, newSize)
+      else:
+        result = realloc(p, newSize)
+    else:
+      result = alignedAlloc(newSize, align)
+      copyMem(result, p, oldSize)
+      alignedDealloc(p, align)
+
+  proc alignedRealloc0(p: pointer, oldSize, newSize, align: Natural): pointer =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        result = reallocShared0(p, oldSize, newSize)
+      else:
+        result = realloc0(p, oldSize, newSize)
+    else:
+      result = alignedAlloc(newSize, align)
+      copyMem(result, p, oldSize)
+      zeroMem(result +! oldSize, newSize - oldSize)
+      alignedDealloc(p, align)
+
+  {.pop.}
+
+# GC interface:
+
+when hasAlloc:
+  proc getOccupiedMem*(): int {.rtl.}
+    ## Returns the number of bytes that are owned by the process and hold data.
+
+  proc getFreeMem*(): int {.rtl.}
+    ## Returns the number of bytes that are owned by the process, but do not
+    ## hold any meaningful data.
+
+  proc getTotalMem*(): int {.rtl.}
+    ## Returns the number of bytes that are owned by the process.
+
+
+when defined(js):
+  # Stubs:
+  proc getOccupiedMem(): int = return -1
+  proc getFreeMem(): int = return -1
+  proc getTotalMem(): int = return -1
+
+  proc dealloc(p: pointer) = discard
+  proc alloc(size: Natural): pointer = discard
+  proc alloc0(size: Natural): pointer = discard
+  proc realloc(p: pointer, newsize: Natural): pointer = discard
+  proc realloc0(p: pointer, oldsize, newsize: Natural): pointer = discard
+
+  proc allocShared(size: Natural): pointer = discard
+  proc allocShared0(size: Natural): pointer = discard
+  proc deallocShared(p: pointer) = discard
+  proc reallocShared(p: pointer, newsize: Natural): pointer = discard
+  proc reallocShared0(p: pointer, oldsize, newsize: Natural): pointer = discard
+
+
+when hasAlloc and hasThreadSupport and not defined(useMalloc):
+  proc getOccupiedSharedMem*(): int {.rtl.}
+    ## Returns the number of bytes that are owned by the process
+    ## on the shared heap and hold data. This is only available when
+    ## threads are enabled.
+
+  proc getFreeSharedMem*(): int {.rtl.}
+    ## Returns the number of bytes that are owned by the
+    ## process on the shared heap, but do not hold any meaningful data.
+    ## This is only available when threads are enabled.
+
+  proc getTotalSharedMem*(): int {.rtl.}
+    ## Returns the number of bytes on the shared heap that are owned by the
+    ## process. This is only available when threads are enabled.
diff --git a/lib/system/memory.nim b/lib/system/memory.nim
new file mode 100644
index 000000000..156773c48
--- /dev/null
+++ b/lib/system/memory.nim
@@ -0,0 +1,55 @@
+{.push stack_trace: off.}
+
+const useLibC = not defined(nimNoLibc)
+
+when useLibC:
+  import ansi_c
+
+proc nimCopyMem*(dest, source: pointer, size: Natural) {.nonReloadable, compilerproc, inline.} =
+  when useLibC:
+    c_memcpy(dest, source, cast[csize_t](size))
+  else:
+    let d = cast[ptr UncheckedArray[byte]](dest)
+    let s = cast[ptr UncheckedArray[byte]](source)
+    var i = 0
+    while i < size:
+      d[i] = s[i]
+      inc i
+
+proc nimSetMem*(a: pointer, v: cint, size: Natural) {.nonReloadable, inline.} =
+  when useLibC:
+    c_memset(a, v, cast[csize_t](size))
+  else:
+    let a = cast[ptr UncheckedArray[byte]](a)
+    var i = 0
+    let v = cast[byte](v)
+    while i < size:
+      a[i] = v
+      inc i
+
+proc nimZeroMem*(p: pointer, size: Natural) {.compilerproc, nonReloadable, inline.} =
+  nimSetMem(p, 0, size)
+
+proc nimCmpMem*(a, b: pointer, size: Natural): cint {.compilerproc, nonReloadable, inline.} =
+  when useLibC:
+    c_memcmp(a, b, cast[csize_t](size))
+  else:
+    let a = cast[ptr UncheckedArray[byte]](a)
+    let b = cast[ptr UncheckedArray[byte]](b)
+    var i = 0
+    while i < size:
+      let d = a[i].cint - b[i].cint
+      if d != 0: return d
+      inc i
+
+proc nimCStrLen*(a: cstring): int {.compilerproc, nonReloadable, inline.} =
+  if a.isNil: return 0
+  when useLibC:
+    cast[int](c_strlen(a))
+  else:
+    var a = cast[ptr byte](a)
+    while a[] != 0:
+      a = cast[ptr byte](cast[uint](a) + 1)
+      inc result
+
+{.pop.}
diff --git a/lib/system/memtracker.nim b/lib/system/memtracker.nim
new file mode 100644
index 000000000..289f4e024
--- /dev/null
+++ b/lib/system/memtracker.nim
@@ -0,0 +1,106 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2016 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Memory tracking support for Nim.
+
+when not defined(memTracker):
+  {.error: "Memory tracking support is turned off! Enable memory tracking by passing `--memtracker:on` to the compiler (see the Nim Compiler User Guide for more options).".}
+
+when defined(noSignalHandler):
+  {.error: "Memory tracking works better with the default signal handler.".}
+
+# We don't want to memtrack the tracking code ...
+{.push memtracker: off.}
+
+when declared(getThreadId):
+  template myThreadId(): untyped = getThreadId()
+else:
+  template myThreadId(): untyped = 0
+
+type
+  LogEntry* = object
+    op*: cstring
+    address*: pointer
+    size*: int
+    file*: cstring
+    line*: int
+    thread*: int
+  TrackLog* = object
+    count*: int
+    disabled: bool
+    data*: array[400, LogEntry]
+  TrackLogger* = proc (log: TrackLog) {.nimcall, tags: [], gcsafe.}
+
+var
+  gLog*: TrackLog
+  gLogger*: TrackLogger = proc (log: TrackLog) = discard
+  ilocs: array[4000, (int, int)]
+  ilocn: int
+
+proc trackLocation*(p: pointer; size: int) =
+  let x = (cast[int](p), size)
+  for i in 0..ilocn-1:
+    # already known?
+    if ilocs[i] == x: return
+  ilocs[ilocn] = x
+  inc ilocn
+
+proc setTrackLogger*(logger: TrackLogger) =
+  gLogger = logger
+
+proc addEntry(entry: LogEntry) =
+  if not gLog.disabled:
+    var interesting = false
+    for i in 0..ilocn-1:
+      let p = ilocs[i]
+      #  X..Y and C..D overlap iff (X <= D and C <= Y)
+      let x = p[0]
+      let y = p[0]+p[1]-1
+      let c = cast[int](entry.address)
+      let d = c + entry.size-1
+      if x <= d and c <= y:
+        interesting = myThreadId() != entry.thread # true
+        break
+    if interesting:
+      gLog.disabled = true
+      cprintf("interesting %s:%ld %s\n", entry.file, entry.line, entry.op)
+      let x = cast[proc() {.nimcall, tags: [], gcsafe, raises: [].}](writeStackTrace)
+      x()
+      rawQuit 1
+      #if gLog.count > high(gLog.data):
+      #  gLogger(gLog)
+      #  gLog.count = 0
+      #gLog.data[gLog.count] = entry
+      #inc gLog.count
+      #gLog.disabled = false
+
+proc memTrackerWrite(address: pointer; size: int; file: cstring; line: int) {.compilerproc.} =
+  addEntry LogEntry(op: "write", address: address,
+      size: size, file: file, line: line, thread: myThreadId())
+
+proc memTrackerOp*(op: cstring; address: pointer; size: int) {.tags: [],
+         gcsafe.} =
+  addEntry LogEntry(op: op, address: address, size: size,
+      file: "", line: 0, thread: myThreadId())
+
+proc memTrackerDisable*() =
+  gLog.disabled = true
+
+proc memTrackerEnable*() =
+  gLog.disabled = false
+
+proc logPendingOps() {.noconv.} =
+  # forward declared and called from Nim's signal handler.
+  gLogger(gLog)
+  gLog.count = 0
+
+import std/exitprocs
+addExitProc logPendingOps
+
+{.pop.}
diff --git a/lib/system/mm/boehm.nim b/lib/system/mm/boehm.nim
new file mode 100644
index 000000000..362d2d470
--- /dev/null
+++ b/lib/system/mm/boehm.nim
@@ -0,0 +1,140 @@
+
+
+
+proc boehmGCinit {.importc: "GC_init", boehmGC.}
+proc boehmGC_disable {.importc: "GC_disable", boehmGC.}
+proc boehmGC_enable {.importc: "GC_enable", boehmGC.}
+proc boehmGCincremental {.
+  importc: "GC_enable_incremental", boehmGC.}
+proc boehmGCfullCollect {.importc: "GC_gcollect", boehmGC.}
+proc boehmGC_set_all_interior_pointers(flag: cint) {.
+  importc: "GC_set_all_interior_pointers", boehmGC.}
+proc boehmAlloc(size: int): pointer {.importc: "GC_malloc", boehmGC.}
+proc boehmAllocAtomic(size: int): pointer {.
+  importc: "GC_malloc_atomic", boehmGC.}
+proc boehmRealloc(p: pointer, size: int): pointer {.
+  importc: "GC_realloc", boehmGC.}
+proc boehmDealloc(p: pointer) {.importc: "GC_free", boehmGC.}
+when hasThreadSupport:
+  proc boehmGC_allow_register_threads {.
+    importc: "GC_allow_register_threads", boehmGC.}
+
+proc boehmGetHeapSize: int {.importc: "GC_get_heap_size", boehmGC.}
+  ## Return the number of bytes in the heap.  Excludes collector private
+  ## data structures. Includes empty blocks and fragmentation loss.
+  ## Includes some pages that were allocated but never written.
+
+proc boehmGetFreeBytes: int {.importc: "GC_get_free_bytes", boehmGC.}
+  ## Return a lower bound on the number of free bytes in the heap.
+
+proc boehmGetBytesSinceGC: int {.importc: "GC_get_bytes_since_gc", boehmGC.}
+  ## Return the number of bytes allocated since the last collection.
+
+proc boehmGetTotalBytes: int {.importc: "GC_get_total_bytes", boehmGC.}
+  ## Return the total number of bytes allocated in this process.
+  ## Never decreases.
+
+proc boehmRegisterFinalizer(obj, ff, cd, off, ocd: pointer) {.importc: "GC_register_finalizer", boehmGC.}
+
+proc allocAtomic(size: int): pointer =
+  result = boehmAllocAtomic(size)
+  zeroMem(result, size)
+
+when not defined(useNimRtl):
+
+  proc allocImpl(size: Natural): pointer =
+    result = boehmAlloc(size)
+    if result == nil: raiseOutOfMem()
+  proc alloc0Impl(size: Natural): pointer =
+    result = alloc(size)
+  proc reallocImpl(p: pointer, newSize: Natural): pointer =
+    result = boehmRealloc(p, newSize)
+    if result == nil: raiseOutOfMem()
+  proc realloc0Impl(p: pointer, oldSize, newSize: Natural): pointer =
+    result = boehmRealloc(p, newSize)
+    if result == nil: raiseOutOfMem()
+    if newSize > oldSize:
+      zeroMem(cast[pointer](cast[int](result) + oldSize), newSize - oldSize)
+  proc deallocImpl(p: pointer) = boehmDealloc(p)
+
+  proc allocSharedImpl(size: Natural): pointer = allocImpl(size)
+  proc allocShared0Impl(size: Natural): pointer = alloc0Impl(size)
+  proc reallocSharedImpl(p: pointer, newSize: Natural): pointer = reallocImpl(p, newSize)
+  proc reallocShared0Impl(p: pointer, oldSize, newSize: Natural): pointer = realloc0Impl(p, oldSize, newSize)
+  proc deallocSharedImpl(p: pointer) = deallocImpl(p)
+
+  when hasThreadSupport:
+    proc getFreeSharedMem(): int =
+      boehmGetFreeBytes()
+    proc getTotalSharedMem(): int =
+      boehmGetHeapSize()
+    proc getOccupiedSharedMem(): int =
+      getTotalSharedMem() - getFreeSharedMem()
+
+  #boehmGCincremental()
+
+  proc GC_disable() = boehmGC_disable()
+  proc GC_enable() = boehmGC_enable()
+  proc GC_fullCollect() = boehmGCfullCollect()
+  proc GC_setStrategy(strategy: GC_Strategy) = discard
+  proc GC_enableMarkAndSweep() = discard
+  proc GC_disableMarkAndSweep() = discard
+  proc GC_getStatistics(): string = return ""
+
+  proc getOccupiedMem(): int = return boehmGetHeapSize()-boehmGetFreeBytes()
+  proc getFreeMem(): int = return boehmGetFreeBytes()
+  proc getTotalMem(): int = return boehmGetHeapSize()
+
+  proc nimGC_setStackBottom(theStackBottom: pointer) = discard
+
+proc initGC() =
+  when defined(boehmNoIntPtr):
+    # See #12286
+    boehmGC_set_all_interior_pointers(0)
+  boehmGCinit()
+  when hasThreadSupport:
+    boehmGC_allow_register_threads()
+
+proc boehmgc_finalizer(obj: pointer, typedFinalizer: (proc(x: pointer) {.cdecl.})) =
+  typedFinalizer(obj)
+
+
+proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
+  if ntfNoRefs in typ.flags: result = allocAtomic(size)
+  else: result = alloc(size)
+  if typ.finalizer != nil:
+    boehmRegisterFinalizer(result, boehmgc_finalizer, typ.finalizer, nil, nil)
+{.push overflowChecks: on.}
+proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
+  result = newObj(typ, align(GenericSeqSize, typ.base.align) + len * typ.base.size)
+  cast[PGenericSeq](result).len = len
+  cast[PGenericSeq](result).reserved = len
+{.pop.}
+
+proc growObj(old: pointer, newsize: int): pointer =
+  result = realloc(old, newsize)
+
+proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
+proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
+
+proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline,
+  deprecated: "old compiler compat".} = asgnRef(dest, src)
+
+type
+  MemRegion = object
+
+proc alloc(r: var MemRegion, size: int): pointer =
+  result = boehmAlloc(size)
+  if result == nil: raiseOutOfMem()
+proc alloc0(r: var MemRegion, size: int): pointer =
+  result = alloc(size)
+  zeroMem(result, size)
+proc dealloc(r: var MemRegion, p: pointer) = boehmDealloc(p)
+proc deallocOsPages(r: var MemRegion) {.inline.} = discard
+proc deallocOsPages() {.inline.} = discard
+
+include "system/cellsets"
diff --git a/lib/system/mm/go.nim b/lib/system/mm/go.nim
new file mode 100644
index 000000000..8f3aeb964
--- /dev/null
+++ b/lib/system/mm/go.nim
@@ -0,0 +1,153 @@
+
+when defined(windows):
+  const goLib = "libgo.dll"
+elif defined(macosx):
+  const goLib = "libgo.dylib"
+else:
+  const goLib = "libgo.so"
+
+proc initGC() = discard
+proc GC_disable() = discard
+proc GC_enable() = discard
+proc go_gc() {.importc: "go_gc", dynlib: goLib.}
+proc GC_fullCollect() = go_gc()
+proc GC_setStrategy(strategy: GC_Strategy) = discard
+proc GC_enableMarkAndSweep() = discard
+proc GC_disableMarkAndSweep() = discard
+
+const
+  goNumSizeClasses = 67
+
+type
+  goMStats = object
+    alloc: uint64          # bytes allocated and still in use
+    total_alloc: uint64    # bytes allocated (even if freed)
+    sys: uint64            # bytes obtained from system
+    nlookup: uint64        # number of pointer lookups
+    nmalloc: uint64        # number of mallocs
+    nfree: uint64          # number of frees
+    heap_objects: uint64   # total number of allocated objects
+    pause_total_ns: uint64 # cumulative nanoseconds in GC stop-the-world pauses since the program started
+    numgc: uint32          # number of completed GC cycles
+
+proc goMemStats(): goMStats {.importc: "go_mem_stats", dynlib: goLib.}
+proc goMalloc(size: uint): pointer {.importc: "go_malloc", dynlib: goLib.}
+proc goSetFinalizer(obj: pointer, f: pointer) {.importc: "set_finalizer", codegenDecl:"$1 $2$3 __asm__ (\"main.Set_finalizer\");\n$1 $2$3", dynlib: goLib.}
+proc writebarrierptr(dest: PPointer, src: pointer) {.importc: "writebarrierptr", codegenDecl:"$1 $2$3 __asm__ (\"main.Atomic_store_pointer\");\n$1 $2$3", dynlib: goLib.}
+
+proc GC_getStatistics(): string =
+  var mstats = goMemStats()
+  result = "[GC] total allocated memory: " & $(mstats.total_alloc) & "\n" &
+           "[GC] total memory obtained from system: " & $(mstats.sys) & "\n" &
+           "[GC] occupied memory: " & $(mstats.alloc) & "\n" &
+           "[GC] number of pointer lookups: " & $(mstats.nlookup) & "\n" &
+           "[GC] number of mallocs: " & $(mstats.nmalloc) & "\n" &
+           "[GC] number of frees: " & $(mstats.nfree) & "\n" &
+           "[GC] heap objects: " & $(mstats.heap_objects) & "\n" &
+           "[GC] number of completed GC cycles: " & $(mstats.numgc) & "\n" &
+           "[GC] total GC pause time [ms]: " & $(mstats.pause_total_ns div 1000_000)
+
+proc getOccupiedMem(): int =
+  var mstats = goMemStats()
+  result = int(mstats.alloc)
+
+proc getFreeMem(): int =
+  var mstats = goMemStats()
+  result = int(mstats.sys - mstats.alloc)
+
+proc getTotalMem(): int =
+  var mstats = goMemStats()
+  result = int(mstats.sys)
+
+proc nimGC_setStackBottom(theStackBottom: pointer) = discard
+
+proc allocImpl(size: Natural): pointer =
+  result = goMalloc(size.uint)
+
+proc alloc0Impl(size: Natural): pointer =
+  result = goMalloc(size.uint)
+
+proc reallocImpl(p: pointer, newsize: Natural): pointer =
+  doAssert false, "not implemented"
+
+proc realloc0Impl(p: pointer, oldsize, newsize: Natural): pointer =
+  doAssert false, "not implemented"
+
+proc deallocImpl(p: pointer) =
+  discard
+
+proc allocSharedImpl(size: Natural): pointer = allocImpl(size)
+proc allocShared0Impl(size: Natural): pointer = alloc0Impl(size)
+proc reallocSharedImpl(p: pointer, newsize: Natural): pointer = reallocImpl(p, newsize)
+proc reallocShared0Impl(p: pointer, oldsize, newsize: Natural): pointer = realloc0Impl(p, oldsize, newsize)
+proc deallocSharedImpl(p: pointer) = deallocImpl(p)
+
+when hasThreadSupport:
+  proc getFreeSharedMem(): int = discard
+  proc getTotalSharedMem(): int = discard
+  proc getOccupiedSharedMem(): int = discard
+
+proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
+  writebarrierptr(addr(result), goMalloc(size.uint))
+  if typ.finalizer != nil:
+    goSetFinalizer(result, typ.finalizer)
+
+proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
+  writebarrierptr(addr(result), newObj(typ, size))
+
+proc newObjNoInit(typ: PNimType, size: int): pointer =
+  writebarrierptr(addr(result), newObj(typ, size))
+
+proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
+  writebarrierptr(addr(result), newObj(typ, align(GenericSeqSize, typ.base.align) + len * typ.base.size))
+  cast[PGenericSeq](result).len = len
+  cast[PGenericSeq](result).reserved = len
+  cast[PGenericSeq](result).elemSize = typ.base.size
+  cast[PGenericSeq](result).elemAlign = typ.base.align
+
+proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
+  writebarrierptr(addr(result), newSeq(typ, len))
+
+proc nimNewSeqOfCap(typ: PNimType, cap: int): pointer {.compilerproc.} =
+  result = newObjNoInit(typ, align(GenericSeqSize, typ.base.align) + cap * typ.base.size)
+  cast[PGenericSeq](result).len = 0
+  cast[PGenericSeq](result).reserved = cap
+  cast[PGenericSeq](result).elemSize = typ.base.size
+  cast[PGenericSeq](result).elemAlign = typ.base.align
+
+proc typedMemMove(dest: pointer, src: pointer, size: uint) {.importc: "typedmemmove", dynlib: goLib.}
+
+proc growObj(old: pointer, newsize: int): pointer =
+  # the Go GC doesn't have a realloc
+  let old = cast[PGenericSeq](old)
+  var metadataOld = cast[PGenericSeq](old)
+  if metadataOld.elemSize == 0:
+    metadataOld.elemSize = 1
+
+  let oldsize = align(GenericSeqSize, old.elemAlign) + old.len * old.elemSize
+  writebarrierptr(addr(result), goMalloc(newsize.uint))
+  typedMemMove(result, old, oldsize.uint)
+
+proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
+proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
+proc nimGCunrefNoCycle(p: pointer) {.compilerproc, inline.} = discard
+proc nimGCunrefRC1(p: pointer) {.compilerproc, inline.} = discard
+proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} = discard
+
+proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  writebarrierptr(dest, src)
+proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  writebarrierptr(dest, src)
+proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline,
+  deprecated: "old compiler compat".} = asgnRef(dest, src)
+
+type
+  MemRegion = object
+
+proc alloc(r: var MemRegion, size: int): pointer =
+  result = alloc(size)
+proc alloc0(r: var MemRegion, size: int): pointer =
+  result = alloc0Impl(size)
+proc dealloc(r: var MemRegion, p: pointer) = dealloc(p)
+proc deallocOsPages(r: var MemRegion) {.inline.} = discard
+proc deallocOsPages() {.inline.} = discard
diff --git a/lib/system/mm/malloc.nim b/lib/system/mm/malloc.nim
new file mode 100644
index 000000000..47f1a95ae
--- /dev/null
+++ b/lib/system/mm/malloc.nim
@@ -0,0 +1,97 @@
+
+{.push stackTrace: off.}
+
+proc allocImpl(size: Natural): pointer =
+  result = c_malloc(size.csize_t)
+  when defined(zephyr):
+    if result == nil:
+      raiseOutOfMem()
+
+proc alloc0Impl(size: Natural): pointer =
+  result = c_calloc(size.csize_t, 1)
+  when defined(zephyr):
+    if result == nil:
+      raiseOutOfMem()
+
+proc reallocImpl(p: pointer, newSize: Natural): pointer =
+  result = c_realloc(p, newSize.csize_t)
+  when defined(zephyr):
+    if result == nil:
+      raiseOutOfMem()
+
+proc realloc0Impl(p: pointer, oldsize, newSize: Natural): pointer =
+  result = realloc(p, newSize.csize_t)
+  if newSize > oldSize:
+    zeroMem(cast[pointer](cast[uint](result) + uint(oldSize)), newSize - oldSize)
+
+proc deallocImpl(p: pointer) =
+  c_free(p)
+
+
+# The shared allocators map on the regular ones
+
+proc allocSharedImpl(size: Natural): pointer =
+  allocImpl(size)
+
+proc allocShared0Impl(size: Natural): pointer =
+  alloc0Impl(size)
+
+proc reallocSharedImpl(p: pointer, newSize: Natural): pointer =
+  reallocImpl(p, newSize)
+
+proc reallocShared0Impl(p: pointer, oldsize, newSize: Natural): pointer =
+  realloc0Impl(p, oldSize, newSize)
+
+proc deallocSharedImpl(p: pointer) = deallocImpl(p)
+
+
+# Empty stubs for the GC
+
+proc GC_disable() = discard
+proc GC_enable() = discard
+
+when not defined(gcOrc):
+  proc GC_fullCollect() = discard
+  proc GC_enableMarkAndSweep() = discard
+  proc GC_disableMarkAndSweep() = discard
+
+proc GC_setStrategy(strategy: GC_Strategy) = discard
+
+proc getOccupiedMem(): int = discard
+proc getFreeMem(): int = discard
+proc getTotalMem(): int = discard
+
+proc nimGC_setStackBottom(theStackBottom: pointer) = discard
+
+proc initGC() = discard
+
+proc newObjNoInit(typ: PNimType, size: int): pointer =
+  result = alloc(size)
+
+proc growObj(old: pointer, newsize: int): pointer =
+  result = realloc(old, newsize)
+
+proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
+proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
+
+when not defined(gcDestructors):
+  proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+    dest[] = src
+
+proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline,
+  deprecated: "old compiler compat".} = asgnRef(dest, src)
+
+type
+  MemRegion = object
+
+proc alloc(r: var MemRegion, size: int): pointer =
+  result = alloc(size)
+proc alloc0(r: var MemRegion, size: int): pointer =
+  result = alloc0Impl(size)
+proc dealloc(r: var MemRegion, p: pointer) = dealloc(p)
+proc deallocOsPages(r: var MemRegion) = discard
+proc deallocOsPages() = discard
+
+{.pop.}
diff --git a/lib/system/mm/none.nim b/lib/system/mm/none.nim
new file mode 100644
index 000000000..7818a0805
--- /dev/null
+++ b/lib/system/mm/none.nim
@@ -0,0 +1,46 @@
+
+when appType == "lib":
+  {.warning: "nogc in a library context may not work".}
+
+include "system/alloc"
+
+proc initGC() = discard
+proc GC_disable() = discard
+proc GC_enable() = discard
+proc GC_fullCollect() = discard
+proc GC_setStrategy(strategy: GC_Strategy) = discard
+proc GC_enableMarkAndSweep() = discard
+proc GC_disableMarkAndSweep() = discard
+proc GC_getStatistics(): string = return ""
+
+proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
+  result = alloc0Impl(size)
+
+proc newObjNoInit(typ: PNimType, size: int): pointer =
+  result = alloc(size)
+
+{.push overflowChecks: on.}
+proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
+  result = newObj(typ, align(GenericSeqSize, typ.align) + len * typ.base.size)
+  cast[PGenericSeq](result).len = len
+  cast[PGenericSeq](result).reserved = len
+{.pop.}
+
+proc growObj(old: pointer, newsize: int): pointer =
+  result = realloc(old, newsize)
+
+proc nimGC_setStackBottom(theStackBottom: pointer) = discard
+proc nimGCref(p: pointer) {.compilerproc, inline.} = discard
+proc nimGCunref(p: pointer) {.compilerproc, inline.} = discard
+
+proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline,
+  deprecated: "old compiler compat".} = asgnRef(dest, src)
+
+var allocator {.rtlThreadVar.}: MemRegion
+instantiateForRegion(allocator)
+
+include "system/cellsets"
diff --git a/lib/system/mmdisp.nim b/lib/system/mmdisp.nim
index 9f37e95c1..26f2f0bbf 100755..100644
--- a/lib/system/mmdisp.nim
+++ b/lib/system/mmdisp.nim
@@ -1,14 +1,14 @@
 #
 #
-#            Nimrod's Runtime Library
-#        (c) Copyright 2013 Andreas Rumpf
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
 #
 
-# Nimrod high-level memory manager: It supports Boehm's GC, no GC and the
-# native Nimrod GC. The native Nimrod GC is the default.
+# Nim high-level memory manager: It supports Boehm's GC, Go's GC, no GC and the
+# native Nim GC. The native Nim GC is the default.
 
 #{.push checks:on, assertions:on.}
 {.push checks:off.}
@@ -17,305 +17,104 @@ const
   debugGC = false # we wish to debug the GC...
   logGC = false
   traceGC = false # extensive debugging
-  alwaysCycleGC = false
-  alwaysGC = false # collect after every memory allocation (for debugging)
-  leakDetector = false
-  overwriteFree = false
+  alwaysCycleGC = defined(nimSmokeCycles)
+  alwaysGC = defined(nimFulldebug) # collect after every memory
+                                # allocation (for debugging)
+  leakDetector = defined(nimLeakDetector)
+  overwriteFree = defined(nimBurnFree) # overwrite memory with 0xFF before free
   trackAllocationSource = leakDetector
-  
+
   cycleGC = true # (de)activate the cycle GC
   reallyDealloc = true # for debugging purposes this can be set to false
   reallyOsDealloc = true
   coalescRight = true
   coalescLeft = true
+  logAlloc = false
+  useCellIds = defined(nimCorruption)
 
 type
   PPointer = ptr pointer
-  TByteArray = array[0..1000_0000, byte]
-  PByte = ptr TByteArray
+  ByteArray = UncheckedArray[byte]
+  PByte = ptr ByteArray
   PString = ptr string
 
-# Page size of the system; in most cases 4096 bytes. For exotic OS or
-# CPU this needs to be changed:
-const
-  PageShift = 12
-  PageSize = 1 shl PageShift
-  PageMask = PageSize-1
-
-  MemAlign = 8 # also minimal allocatable memory block
-
-  BitsPerPage = PageSize div MemAlign
-  UnitsPerPage = BitsPerPage div (sizeof(int)*8)
-    # how many ints do we need to describe a page:
-    # on 32 bit systems this is only 16 (!)
-
-  TrunkShift = 9
-  BitsPerTrunk = 1 shl TrunkShift # needs to be power of 2 and divisible by 64
-  TrunkMask = BitsPerTrunk - 1
-  IntsPerTrunk = BitsPerTrunk div (sizeof(int)*8)
-  IntShift = 5 + ord(sizeof(int) == 8) # 5 or 6, depending on int width
-  IntMask = 1 shl IntShift - 1
+when declared(IntsPerTrunk):
+  discard
+else:
+  include bitmasks
 
 proc raiseOutOfMem() {.noinline.} =
   if outOfMemHook != nil: outOfMemHook()
-  echo("out of memory")
-  quit(1)
+  cstderr.rawWrite("out of memory\n")
+  rawQuit(1)
 
 when defined(boehmgc):
-  when defined(windows):
-    const boehmLib = "boehmgc.dll"
-  elif defined(macosx):
-    const boehmLib = "libgc.dylib"
-  else:
-    const boehmLib = "/usr/lib/libgc.so.1"
-    
-  proc boehmGCinit {.importc: "GC_init", dynlib: boehmLib.}
-  proc boehmGC_disable {.importc: "GC_disable", dynlib: boehmLib.} 
-  proc boehmGC_enable {.importc: "GC_enable", dynlib: boehmLib.} 
-  proc boehmGCincremental {.
-    importc: "GC_enable_incremental", dynlib: boehmLib.} 
-  proc boehmGCfullCollect {.importc: "GC_gcollect", dynlib: boehmLib.}  
-  proc boehmAlloc(size: int): pointer {.
-    importc: "GC_malloc", dynlib: boehmLib.}
-  proc boehmAllocAtomic(size: int): pointer {.
-    importc: "GC_malloc_atomic", dynlib: boehmLib.}
-  proc boehmRealloc(p: pointer, size: int): pointer {.
-    importc: "GC_realloc", dynlib: boehmLib.}
-  proc boehmDealloc(p: pointer) {.importc: "GC_free", dynlib: boehmLib.}
-  
-  proc boehmGetHeapSize: int {.importc: "GC_get_heap_size", dynlib: boehmLib.}
-    ## Return the number of bytes in the heap.  Excludes collector private
-    ## data structures. Includes empty blocks and fragmentation loss.
-    ## Includes some pages that were allocated but never written.
-
-  proc boehmGetFreeBytes: int {.importc: "GC_get_free_bytes", dynlib: boehmLib.}
-    ## Return a lower bound on the number of free bytes in the heap.
-
-  proc boehmGetBytesSinceGC: int {.importc: "GC_get_bytes_since_gc",
-    dynlib: boehmLib.}
-    ## Return the number of bytes allocated since the last collection.
-
-  proc boehmGetTotalBytes: int {.importc: "GC_get_total_bytes",
-    dynlib: boehmLib.}
-    ## Return the total number of bytes allocated in this process.
-    ## Never decreases.
-
-  proc allocAtomic(size: int): pointer =
-    result = boehmAllocAtomic(size)
-    zeroMem(result, size)
-
-  when not defined(useNimRtl):
-    
-    proc alloc(size: int): pointer =
-      result = boehmAlloc(size)
-      if result == nil: raiseOutOfMem()
-    proc alloc0(size: int): pointer =
-      result = alloc(size)
-      zeroMem(result, size)
-    proc realloc(p: Pointer, newsize: int): pointer =
-      result = boehmRealloc(p, newsize)
-      if result == nil: raiseOutOfMem()
-    proc dealloc(p: Pointer) = boehmDealloc(p)
-    
-    proc allocShared(size: int): pointer =
-      result = boehmAlloc(size)
-      if result == nil: raiseOutOfMem()
-    proc allocShared0(size: int): pointer =
-      result = alloc(size)
-      zeroMem(result, size)
-    proc reallocShared(p: Pointer, newsize: int): pointer =
-      result = boehmRealloc(p, newsize)
-      if result == nil: raiseOutOfMem()
-    proc deallocShared(p: Pointer) = boehmDealloc(p)
-
-    #boehmGCincremental()
-
-    proc GC_disable() = boehmGC_disable()
-    proc GC_enable() = boehmGC_enable()
-    proc GC_fullCollect() = boehmGCfullCollect()
-    proc GC_setStrategy(strategy: TGC_Strategy) = nil
-    proc GC_enableMarkAndSweep() = nil
-    proc GC_disableMarkAndSweep() = nil
-    proc GC_getStatistics(): string = return ""
-    
-    proc getOccupiedMem(): int = return boehmGetHeapSize()-boehmGetFreeBytes()
-    proc getFreeMem(): int = return boehmGetFreeBytes()
-    proc getTotalMem(): int = return boehmGetHeapSize()
-
-    proc setStackBottom(theStackBottom: pointer) = nil
-
-  proc initGC() = 
-    when defined(macosx): boehmGCinit()
+  include system / mm / boehm
 
-  proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
-    if ntfNoRefs in typ.flags: result = allocAtomic(size)
-    else: result = alloc(size)
-  proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
-    result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
-    cast[PGenericSeq](result).len = len
-    cast[PGenericSeq](result).reserved = len
+elif defined(gogc):
+  include system / mm / go
 
-  proc growObj(old: pointer, newsize: int): pointer =
-    result = realloc(old, newsize)
+elif (defined(nogc) or defined(gcDestructors)) and defined(useMalloc):
+  include system / mm / malloc
 
-  proc nimGCref(p: pointer) {.compilerproc, inline.} = nil
-  proc nimGCunref(p: pointer) {.compilerproc, inline.} = nil
-  
-  proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-  proc asgnRef(dest: ppointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-  proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
+  when defined(nogc):
+    proc GC_getStatistics(): string = ""
+    proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
+      result = alloc0(size)
 
-  type
-    TMemRegion = object {.final, pure.}
-  
-  proc Alloc(r: var TMemRegion, size: int): pointer =
-    result = boehmAlloc(size)
-    if result == nil: raiseOutOfMem()
-  proc Alloc0(r: var TMemRegion, size: int): pointer =
-    result = alloc(size)
-    zeroMem(result, size)
-  proc Dealloc(r: var TMemRegion, p: Pointer) = boehmDealloc(p)  
-  proc deallocOsPages(r: var TMemRegion) {.inline.} = nil
-  proc deallocOsPages() {.inline.} = nil
-
-  include "system/cellsets"
-elif defined(nogc) and defined(useMalloc):
-  
-  when not defined(useNimRtl):
-    proc alloc(size: int): pointer =
-      result = cmalloc(size)
-      if result == nil: raiseOutOfMem()
-    proc alloc0(size: int): pointer =
-      result = alloc(size)
-      zeroMem(result, size)
-    proc realloc(p: Pointer, newsize: int): pointer =
-      result = crealloc(p, newsize)
-      if result == nil: raiseOutOfMem()
-    proc dealloc(p: Pointer) = cfree(p)
-    
-    proc allocShared(size: int): pointer =
-      result = cmalloc(size)
-      if result == nil: raiseOutOfMem()
-    proc allocShared0(size: int): pointer =
-      result = alloc(size)
-      zeroMem(result, size)
-    proc reallocShared(p: Pointer, newsize: int): pointer =
-      result = crealloc(p, newsize)
-      if result == nil: raiseOutOfMem()
-    proc deallocShared(p: Pointer) = cfree(p)
-
-    proc GC_disable() = nil
-    proc GC_enable() = nil
-    proc GC_fullCollect() = nil
-    proc GC_setStrategy(strategy: TGC_Strategy) = nil
-    proc GC_enableMarkAndSweep() = nil
-    proc GC_disableMarkAndSweep() = nil
-    proc GC_getStatistics(): string = return ""
-    
-    proc getOccupiedMem(): int = nil
-    proc getFreeMem(): int = nil
-    proc getTotalMem(): int = nil
-    
-    proc setStackBottom(theStackBottom: pointer) = nil
-
-  proc initGC() = nil
-
-  proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
-    result = alloc(size)
-  proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
-    result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
-    cast[PGenericSeq](result).len = len
-    cast[PGenericSeq](result).reserved = len
-
-  proc growObj(old: pointer, newsize: int): pointer =
-    result = realloc(old, newsize)
-
-  proc nimGCref(p: pointer) {.compilerproc, inline.} = nil
-  proc nimGCunref(p: pointer) {.compilerproc, inline.} = nil
-  
-  proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-  proc asgnRef(dest: ppointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-  proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-
-  type
-    TMemRegion = object {.final, pure.}
-  
-  proc Alloc(r: var TMemRegion, size: int): pointer =
-    result = alloc(size)
-  proc Alloc0(r: var TMemRegion, size: int): pointer =
-    result = alloc0(size)
-  proc Dealloc(r: var TMemRegion, p: Pointer) = Dealloc(p)
-  proc deallocOsPages(r: var TMemRegion) {.inline.} = nil
-  proc deallocOsPages() {.inline.} = nil
+    proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
+      result = newObj(typ, align(GenericSeqSize, typ.align) + len * typ.base.size)
+      cast[PGenericSeq](result).len = len
+      cast[PGenericSeq](result).reserved = len
 
 elif defined(nogc):
-  # Even though we don't want the GC, we cannot simply use C's memory manager
-  # because Nimrod's runtime wants ``realloc`` to zero out the additional
-  # space which C's ``realloc`` does not. And we cannot get the old size of an
-  # object, because C does not support this operation... Even though every
-  # possible implementation has to have a way to determine the object's size.
-  # C just sucks.
-  when appType == "lib": 
-    {.warning: "nogc in a library context may not work".}
-  
-  include "system/alloc"
-
-  proc initGC() = nil
-  proc GC_disable() = nil
-  proc GC_enable() = nil
-  proc GC_fullCollect() = nil
-  proc GC_setStrategy(strategy: TGC_Strategy) = nil
-  proc GC_enableMarkAndSweep() = nil
-  proc GC_disableMarkAndSweep() = nil
-  proc GC_getStatistics(): string = return ""
-  
-  
-  proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} =
-    result = alloc0(size)
-  proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} =
-    result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
-    cast[PGenericSeq](result).len = len
-    cast[PGenericSeq](result).reserved = len
-  proc growObj(old: pointer, newsize: int): pointer =
-    result = realloc(old, newsize)
-
-  proc setStackBottom(theStackBottom: pointer) = nil
-  proc nimGCref(p: pointer) {.compilerproc, inline.} = nil
-  proc nimGCunref(p: pointer) {.compilerproc, inline.} = nil
-  
-  proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-  proc asgnRef(dest: ppointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-  proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerproc, inline.} =
-    dest[] = src
-
-  var allocator {.rtlThreadVar.}: TMemRegion
-  InstantiateForRegion(allocator)
-
-  include "system/cellsets"
+  include system / mm / none
 
 else:
-  include "system/alloc"
-
-  include "system/cellsets"
-  when not leakDetector:
-    sysAssert(sizeof(TCell) == sizeof(TFreeCell), "sizeof TFreeCell")
-  when compileOption("gc", "v2"):
-    include "system/gc2"
+  when not defined(gcRegions):
+    include "system/alloc"
+
+    when not usesDestructors:
+      include "system/cellsets"
+    when not leakDetector and not useCellIds and not defined(nimV2):
+      sysAssert(sizeof(Cell) == sizeof(FreeCell), "sizeof FreeCell")
+  when defined(gcRegions):
+    # XXX due to bootstrapping reasons, we cannot use  compileOption("gc", "stack") here
+    include "system/gc_regions"
+  elif defined(nimV2) or usesDestructors:
+    when not defined(useNimRtl):
+      var allocator {.rtlThreadVar.}: MemRegion
+      instantiateForRegion(allocator)
+    when defined(gcHooks):
+      include "system/gc_hooks"
   elif defined(gcMarkAndSweep):
     # XXX use 'compileOption' here
     include "system/gc_ms"
-  elif defined(gcGenerational):
-    include "system/gc_genms"
   else:
     include "system/gc"
-  
+
+when not declared(nimNewSeqOfCap) and not defined(nimSeqsV2):
+  {.push overflowChecks: on.}
+  proc nimNewSeqOfCap(typ: PNimType, cap: int): pointer {.compilerproc.} =
+    when defined(gcRegions):
+      let s = cap * typ.base.size  # newStr already adds GenericSeqSize
+      result = newStr(typ, s, ntfNoRefs notin typ.base.flags)
+    else:
+      let s = align(GenericSeqSize, typ.base.align) + cap * typ.base.size
+      when declared(newObjNoInit):
+        result = if ntfNoRefs in typ.base.flags: newObjNoInit(typ, s) else: newObj(typ, s)
+      else:
+        result = newObj(typ, s)
+      cast[PGenericSeq](result).len = 0
+      cast[PGenericSeq](result).reserved = cap
+  {.pop.}
+
 {.pop.}
 
+when not declared(ForeignCell):
+  type ForeignCell* = object
+    data*: pointer
+
+  proc protect*(x: pointer): ForeignCell = ForeignCell(data: x)
+  proc dispose*(x: ForeignCell) = discard
+  proc isNotForeign*(x: ForeignCell): bool = false
diff --git a/lib/system/nimscript.nim b/lib/system/nimscript.nim
new file mode 100644
index 000000000..cf81f6d86
--- /dev/null
+++ b/lib/system/nimscript.nim
@@ -0,0 +1,452 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## To learn about scripting in Nim see `NimScript<nims.html>`_
+
+# Nim's configuration system now uses Nim for scripting. This module provides
+# a few things that are required for this to work.
+
+const
+  buildOS* {.magic: "BuildOS".}: string = ""
+    ## The OS this build is running on. Can be different from `system.hostOS`
+    ## for cross compilations.
+
+  buildCPU* {.magic: "BuildCPU".}: string = ""
+    ## The CPU this build is running on. Can be different from `system.hostCPU`
+    ## for cross compilations.
+
+template builtin = discard
+
+# We know the effects better than the compiler:
+{.push hint[XDeclaredButNotUsed]: off.}
+
+proc listDirsImpl(dir: string): seq[string] {.
+  tags: [ReadIOEffect], raises: [OSError].} = builtin
+proc listFilesImpl(dir: string): seq[string] {.
+  tags: [ReadIOEffect], raises: [OSError].} = builtin
+proc removeDir(dir: string, checkDir = true) {.
+  tags: [ReadIOEffect, WriteIOEffect], raises: [OSError].} = builtin
+proc removeFile(dir: string) {.
+  tags: [ReadIOEffect, WriteIOEffect], raises: [OSError].} = builtin
+proc moveFile(src, dest: string) {.
+  tags: [ReadIOEffect, WriteIOEffect], raises: [OSError].} = builtin
+proc moveDir(src, dest: string) {.
+  tags: [ReadIOEffect, WriteIOEffect], raises: [OSError].} = builtin
+proc copyFile(src, dest: string) {.
+  tags: [ReadIOEffect, WriteIOEffect], raises: [OSError].} = builtin
+proc copyDir(src, dest: string) {.
+  tags: [ReadIOEffect, WriteIOEffect], raises: [OSError].} = builtin
+proc createDir(dir: string) {.tags: [WriteIOEffect], raises: [OSError].} =
+  builtin
+
+proc getError: string = builtin
+proc setCurrentDir(dir: string) = builtin
+proc getCurrentDir*(): string =
+  ## Retrieves the current working directory.
+  builtin
+proc rawExec(cmd: string): int {.tags: [ExecIOEffect], raises: [OSError].} =
+  builtin
+
+proc warningImpl(arg, orig: string) = discard
+proc hintImpl(arg, orig: string) = discard
+
+proc paramStr*(i: int): string =
+  ## Retrieves the `i`'th command line parameter.
+  builtin
+
+proc paramCount*(): int =
+  ## Retrieves the number of command line parameters.
+  builtin
+
+proc switch*(key: string, val="") =
+  ## Sets a Nim compiler command line switch, for
+  ## example `switch("checks", "on")`.
+  builtin
+
+proc warning*(name: string; val: bool) =
+  ## Disables or enables a specific warning.
+  let v = if val: "on" else: "off"
+  warningImpl(name & ":" & v, "warning:" & name & ":" & v)
+
+proc hint*(name: string; val: bool) =
+  ## Disables or enables a specific hint.
+  let v = if val: "on" else: "off"
+  hintImpl(name & ":" & v, "hint:" & name & ":" & v)
+
+proc patchFile*(package, filename, replacement: string) =
+  ## Overrides the location of a given file belonging to the
+  ## passed package.
+  ## If the `replacement` is not an absolute path, the path
+  ## is interpreted to be local to the Nimscript file that contains
+  ## the call to `patchFile`, Nim's `--path` is not used at all
+  ## to resolve the filename!
+  ## The compiler also performs `path substitution <nimc.html#compiler-usage-commandminusline-switches>`_ on `replacement`.
+  ##
+  ## Example:
+  ##   ```nim
+  ##   patchFile("stdlib", "asyncdispatch", "patches/replacement")
+  ##   ```
+  discard
+
+proc getCommand*(): string =
+  ## Gets the Nim command that the compiler has been invoked with, for example
+  ## "c", "js", "build", "help".
+  builtin
+
+proc setCommand*(cmd: string; project="") =
+  ## Sets the Nim command that should be continued with after this Nimscript
+  ## has finished.
+  builtin
+
+proc cmpIgnoreStyle(a, b: string): int = builtin
+proc cmpIgnoreCase(a, b: string): int = builtin
+
+proc cmpic*(a, b: string): int =
+  ## Compares `a` and `b` ignoring case.
+  cmpIgnoreCase(a, b)
+
+proc getEnv*(key: string; default = ""): string {.tags: [ReadIOEffect].} =
+  ## Retrieves the environment variable of name `key`.
+  builtin
+
+proc existsEnv*(key: string): bool {.tags: [ReadIOEffect].} =
+  ## Checks for the existence of an environment variable named `key`.
+  builtin
+
+proc putEnv*(key, val: string) {.tags: [WriteIOEffect].} =
+  ## Sets the value of the environment variable named `key` to `val`.
+  builtin
+
+proc delEnv*(key: string) {.tags: [WriteIOEffect].} =
+  ## Deletes the environment variable named `key`.
+  builtin
+
+proc fileExists*(filename: string): bool {.tags: [ReadIOEffect].} =
+  ## Checks if the file exists.
+  builtin
+
+proc dirExists*(dir: string): bool {.
+  tags: [ReadIOEffect].} =
+  ## Checks if the directory `dir` exists.
+  builtin
+
+proc selfExe*(): string {.deprecated: "Deprecated since v1.7; Use getCurrentCompilerExe".} =
+  ## Returns the currently running nim or nimble executable.
+  builtin
+
+proc toExe*(filename: string): string =
+  ## On Windows adds ".exe" to `filename`, else returns `filename` unmodified.
+  (when defined(windows): filename & ".exe" else: filename)
+
+proc toDll*(filename: string): string =
+  ## On Windows adds ".dll" to `filename`, on Posix produces "lib$filename.so".
+  (when defined(windows): filename & ".dll" else: "lib" & filename & ".so")
+
+proc strip(s: string): string =
+  var i = 0
+  while s[i] in {' ', '\c', '\n'}: inc i
+  result = s.substr(i)
+  if result[0] == '"' and result[^1] == '"':
+    result = result[1..^2]
+
+template `--`*(key, val: untyped) =
+  ## A shortcut for `switch <#switch,string,string>`_
+  ## Example:
+  ##   ```nim
+  ##   --path:somePath # same as switch("path", "somePath")
+  ##   --path:"someOtherPath" # same as switch("path", "someOtherPath")
+  ##   --hint:"[Conf]:off" # same as switch("hint", "[Conf]:off")
+  ##   ```
+  switch(strip(astToStr(key)), strip(astToStr(val)))
+
+template `--`*(key: untyped) =
+  ## A shortcut for `switch <#switch,string,string>`_
+  ## Example:
+  ##   ```nim
+  ##   --listCmd # same as switch("listCmd")
+  ##   ```
+  switch(strip(astToStr(key)))
+
+type
+  ScriptMode* {.pure.} = enum ## Controls the behaviour of the script.
+    Silent,                   ## Be silent.
+    Verbose,                  ## Be verbose.
+    Whatif                    ## Do not run commands, instead just echo what
+                              ## would have been done.
+
+var
+  mode*: ScriptMode ## Set this to influence how mkDir, rmDir, rmFile etc.
+                    ## behave
+
+template checkError(exc: untyped): untyped =
+  let err = getError()
+  if err.len > 0: raise newException(exc, err)
+
+template checkOsError =
+  checkError(OSError)
+
+template log(msg: string, body: untyped) =
+  if mode in {ScriptMode.Verbose, ScriptMode.Whatif}:
+    echo "[NimScript] ", msg
+  if mode != ScriptMode.Whatif:
+    body
+
+proc listDirs*(dir: string): seq[string] =
+  ## Lists all the subdirectories (non-recursively) in the directory `dir`.
+  result = listDirsImpl(dir)
+  checkOsError()
+
+proc listFiles*(dir: string): seq[string] =
+  ## Lists all the files (non-recursively) in the directory `dir`.
+  result = listFilesImpl(dir)
+  checkOsError()
+
+proc rmDir*(dir: string, checkDir = false) {.raises: [OSError].} =
+  ## Removes the directory `dir`.
+  log "rmDir: " & dir:
+    removeDir(dir, checkDir = checkDir)
+    checkOsError()
+
+proc rmFile*(file: string) {.raises: [OSError].} =
+  ## Removes the `file`.
+  log "rmFile: " & file:
+    removeFile file
+    checkOsError()
+
+proc mkDir*(dir: string) {.raises: [OSError].} =
+  ## Creates the directory `dir` including all necessary subdirectories. If
+  ## the directory already exists, no error is raised.
+  log "mkDir: " & dir:
+    createDir dir
+    checkOsError()
+
+proc mvFile*(`from`, to: string) {.raises: [OSError].} =
+  ## Moves the file `from` to `to`.
+  log "mvFile: " & `from` & ", " & to:
+    moveFile `from`, to
+    checkOsError()
+
+proc mvDir*(`from`, to: string) {.raises: [OSError].} =
+  ## Moves the dir `from` to `to`.
+  log "mvDir: " & `from` & ", " & to:
+    moveDir `from`, to
+    checkOsError()
+
+proc cpFile*(`from`, to: string) {.raises: [OSError].} =
+  ## Copies the file `from` to `to`.
+  log "cpFile: " & `from` & ", " & to:
+    copyFile `from`, to
+    checkOsError()
+
+proc cpDir*(`from`, to: string) {.raises: [OSError].} =
+  ## Copies the dir `from` to `to`.
+  log "cpDir: " & `from` & ", " & to:
+    copyDir `from`, to
+    checkOsError()
+
+proc exec*(command: string) {.
+  raises: [OSError], tags: [ExecIOEffect, WriteIOEffect].} =
+  ## Executes an external process. If the external process terminates with
+  ## a non-zero exit code, an OSError exception is raised. The command is
+  ## executed relative to the current source path.
+  ##
+  ## .. note:: If you need a version of `exec` that returns the exit code
+  ##   and text output of the command, you can use `system.gorgeEx
+  ##   <system.html#gorgeEx,string,string,string>`_.
+  log "exec: " & command:
+    if rawExec(command) != 0:
+      raise newException(OSError, "FAILED: " & command)
+    checkOsError()
+
+proc exec*(command: string, input: string, cache = "") {.
+  raises: [OSError], tags: [ExecIOEffect, WriteIOEffect].} =
+  ## Executes an external process. If the external process terminates with
+  ## a non-zero exit code, an OSError exception is raised.
+  ##
+  ## .. warning:: This version of `exec` is executed relative to the nimscript
+  ##   module path, which affects how the command resolves relative paths. Thus
+  ##   it is generally better to use `gorgeEx` directly when you need more
+  ##   control over the execution environment or when working with commands
+  ##   that deal with relative paths.
+  log "exec: " & command:
+    let (output, exitCode) = gorgeEx(command, input, cache)
+    echo output
+    if exitCode != 0:
+      raise newException(OSError, "FAILED: " & command)
+
+proc selfExec*(command: string) {.
+  raises: [OSError], tags: [ExecIOEffect, WriteIOEffect].} =
+  ## Executes an external command with the current nim/nimble executable.
+  ## `Command` must not contain the "nim " part.
+  let c = selfExe() & " " & command
+  log "exec: " & c:
+    if rawExec(c) != 0:
+      raise newException(OSError, "FAILED: " & c)
+    checkOsError()
+
+proc put*(key, value: string) =
+  ## Sets a configuration 'key' like 'gcc.options.always' to its value.
+  builtin
+
+proc get*(key: string): string =
+  ## Retrieves a configuration 'key' like 'gcc.options.always'.
+  builtin
+
+proc exists*(key: string): bool =
+  ## Checks for the existence of a configuration 'key'
+  ## like 'gcc.options.always'.
+  builtin
+
+proc nimcacheDir*(): string =
+  ## Retrieves the location of 'nimcache'.
+  builtin
+
+proc projectName*(): string =
+  ## Retrieves the name of the current project
+  builtin
+
+proc projectDir*(): string =
+  ## Retrieves the absolute directory of the current project
+  builtin
+
+proc projectPath*(): string =
+  ## Retrieves the absolute path of the current project
+  builtin
+
+proc thisDir*(): string =
+  ## Retrieves the directory of the current `nims` script file. Its path is
+  ## obtained via `currentSourcePath` (although, currently,
+  ## `currentSourcePath` resolves symlinks, unlike `thisDir`).
+  builtin
+
+proc cd*(dir: string) {.raises: [OSError].} =
+  ## Changes the current directory.
+  ##
+  ## The change is permanent for the rest of the execution, since this is just
+  ## a shortcut for `os.setCurrentDir() <os.html#setCurrentDir,string>`_ . Use
+  ## the `withDir() <#withDir.t,string,untyped>`_ template if you want to
+  ## perform a temporary change only.
+  setCurrentDir(dir)
+  checkOsError()
+
+proc findExe*(bin: string): string =
+  ## Searches for bin in the current working directory and then in directories
+  ## listed in the PATH environment variable. Returns "" if the exe cannot be
+  ## found.
+  builtin
+
+template withDir*(dir: string; body: untyped): untyped =
+  ## Changes the current directory temporarily.
+  ##
+  ## If you need a permanent change, use the `cd() <#cd,string>`_ proc.
+  ## Usage example:
+  ##   ```nim
+  ##   # inside /some/path/
+  ##   withDir "foo":
+  ##     # move to /some/path/foo/
+  ##   # back in /some/path/
+  ##   ```
+  let curDir = getCurrentDir()
+  try:
+    cd(dir)
+    body
+  finally:
+    cd(curDir)
+
+proc writeTask(name, desc: string) =
+  if desc.len > 0:
+    var spaces = " "
+    for i in 0 ..< 20 - name.len: spaces.add ' '
+    echo name, spaces, desc
+
+proc cppDefine*(define: string) =
+  ## tell Nim that `define` is a C preprocessor `#define` and so always
+  ## needs to be mangled.
+  builtin
+
+proc stdinReadLine(): string {.
+  tags: [ReadIOEffect], raises: [IOError].} =
+  builtin
+
+proc stdinReadAll(): string {.
+  tags: [ReadIOEffect], raises: [IOError].} =
+  builtin
+
+proc readLineFromStdin*(): string {.raises: [IOError].} =
+  ## Reads a line of data from stdin - blocks until \n or EOF which happens when stdin is closed
+  log "readLineFromStdin":
+    result = stdinReadLine()
+    checkError(EOFError)
+
+proc readAllFromStdin*(): string {.raises: [IOError].} =
+  ## Reads all data from stdin - blocks until EOF which happens when stdin is closed
+  log "readAllFromStdin":
+    result = stdinReadAll()
+    checkError(EOFError)
+
+when not defined(nimble):
+  template `==?`(a, b: string): bool = cmpIgnoreStyle(a, b) == 0
+  template task*(name: untyped; description: string; body: untyped): untyped =
+    ## Defines a task. Hidden tasks are supported via an empty description.
+    ##
+    ## Example:
+    ##   ```nim
+    ##   task build, "default build is via the C backend":
+    ##     setCommand "c"
+    ##   ```
+    ##
+    ## For a task named `foo`, this template generates a `proc` named
+    ## `fooTask`.  This is useful if you need to call one task in
+    ## another in your Nimscript.
+    ##
+    ## Example:
+    ##
+    ##   ```nim
+    ##   task foo, "foo":        # > nim foo
+    ##     echo "Running foo"    # Running foo
+    ##
+    ##   task bar, "bar":        # > nim bar
+    ##     echo "Running bar"    # Running bar
+    ##     fooTask()             # Running foo
+    ##   ```
+    proc `name Task`*() =
+      setCommand "nop"
+      body
+
+    let cmd = getCommand()
+    if cmd.len == 0 or cmd ==? "help":
+      setCommand "help"
+      writeTask(astToStr(name), description)
+    elif cmd ==? astToStr(name):
+      `name Task`()
+
+  # nimble has its own implementation for these things.
+  var
+    packageName* = ""    ## Nimble support: Set this to the package name. It
+                         ## is usually not required to do that, nims' filename is
+                         ## the default.
+    version*: string     ## Nimble support: The package's version.
+    author*: string      ## Nimble support: The package's author.
+    description*: string ## Nimble support: The package's description.
+    license*: string     ## Nimble support: The package's license.
+    srcDir*: string      ## Nimble support: The package's source directory.
+    binDir*: string      ## Nimble support: The package's binary directory.
+    backend*: string     ## Nimble support: The package's backend.
+
+    skipDirs*, skipFiles*, skipExt*, installDirs*, installFiles*,
+      installExt*, bin*: seq[string] = @[] ## Nimble metadata.
+    requiresData*: seq[string] = @[] ## Exposes the list of requirements for read
+                                     ## and write accesses.
+
+  proc requires*(deps: varargs[string]) =
+    ## Nimble support: Call this to set the list of requirements of your Nimble
+    ## package.
+    for d in deps: requiresData.add(d)
+
+{.pop.}
diff --git a/lib/system/orc.nim b/lib/system/orc.nim
new file mode 100644
index 000000000..c02a24989
--- /dev/null
+++ b/lib/system/orc.nim
@@ -0,0 +1,543 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2020 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Cycle collector based on
+# https://www.cs.purdue.edu/homes/hosking/690M/Bacon01Concurrent.pdf
+# And ideas from Lins' in 2008 by the notion of "critical links", see
+# "Cyclic reference counting" by Rafael Dueire Lins
+# R.D. Lins / Information Processing Letters 109 (2008) 71–78
+#
+
+include cellseqs_v2
+
+const
+  colBlack = 0b000
+  colGray = 0b001
+  colWhite = 0b010
+  maybeCycle = 0b100 # possibly part of a cycle; this has to be a "sticky" bit
+  jumpStackFlag = 0b1000
+  colorMask = 0b011
+
+  logOrc = defined(nimArcIds)
+
+type
+  TraceProc = proc (p, env: pointer) {.nimcall, benign.}
+  DisposeProc = proc (p: pointer) {.nimcall, benign.}
+
+template color(c): untyped = c.rc and colorMask
+template setColor(c, col) =
+  when col == colBlack:
+    c.rc = c.rc and not colorMask
+  else:
+    c.rc = c.rc and not colorMask or col
+
+const
+  optimizedOrc = false # not defined(nimOldOrc)
+# XXX Still incorrect, see tests/arc/tdestroy_in_loopcond
+
+proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} =
+  let h = head(p)
+  inc h.rc, rcIncrement
+  when optimizedOrc:
+    if cyclic:
+      h.rc = h.rc or maybeCycle
+
+proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} =
+  when optimizedOrc:
+    if p != nil:
+      let h = head(p)
+      h.rc = h.rc or maybeCycle
+
+proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
+  # This is only used by the old RTTI mechanism and we know
+  # that 'dest[]' is nil and needs no destruction. Which is really handy
+  # as we cannot destroy the object reliably if it's an object of unknown
+  # compile-time type.
+  dest[] = src
+  if src != nil: nimIncRefCyclic(src, true)
+
+const
+  useJumpStack = false # for thavlak the jump stack doesn't improve the performance at all
+
+type
+  GcEnv = object
+    traceStack: CellSeq[ptr pointer]
+    when useJumpStack:
+      jumpStack: CellSeq[ptr pointer]   # Lins' jump stack in order to speed up traversals
+    toFree: CellSeq[Cell]
+    freed, touched, edges, rcSum: int
+    keepThreshold: bool
+
+proc trace(s: Cell; desc: PNimTypeV2; j: var GcEnv) {.inline.} =
+  if desc.traceImpl != nil:
+    var p = s +! sizeof(RefHeader)
+    cast[TraceProc](desc.traceImpl)(p, addr(j))
+
+include threadids
+
+when logOrc or orcLeakDetector:
+  proc writeCell(msg: cstring; s: Cell; desc: PNimTypeV2) =
+    when orcLeakDetector:
+      cfprintf(cstderr, "%s %s file: %s:%ld; color: %ld; thread: %ld\n",
+        msg, desc.name, s.filename, s.line, s.color, getThreadId())
+    else:
+      cfprintf(cstderr, "%s %s %ld root index: %ld; RC: %ld; color: %ld; thread: %ld\n",
+        msg, desc.name, s.refId, s.rootIdx, s.rc shr rcShift, s.color, getThreadId())
+
+proc free(s: Cell; desc: PNimTypeV2) {.inline.} =
+  when traceCollector:
+    cprintf("[From ] %p rc %ld color %ld\n", s, s.rc shr rcShift, s.color)
+  let p = s +! sizeof(RefHeader)
+
+  when logOrc: writeCell("free", s, desc)
+
+  if desc.destructor != nil:
+    cast[DestructorProc](desc.destructor)(p)
+
+  when false:
+    cstderr.rawWrite desc.name
+    cstderr.rawWrite " "
+    if desc.destructor == nil:
+      cstderr.rawWrite "lacks dispose"
+      if desc.traceImpl != nil:
+        cstderr.rawWrite ", but has trace\n"
+      else:
+        cstderr.rawWrite ", and lacks trace\n"
+    else:
+      cstderr.rawWrite "has dispose!\n"
+
+  nimRawDispose(p, desc.align)
+
+template orcAssert(cond, msg) =
+  when logOrc:
+    if not cond:
+      cfprintf(cstderr, "[Bug!] %s\n", msg)
+      rawQuit 1
+
+when logOrc:
+  proc strstr(s, sub: cstring): cstring {.header: "<string.h>", importc.}
+
+proc nimTraceRef(q: pointer; desc: PNimTypeV2; env: pointer) {.compilerRtl, inl.} =
+  let p = cast[ptr pointer](q)
+  if p[] != nil:
+
+    orcAssert strstr(desc.name, "TType") == nil, "following a TType but it's acyclic!"
+
+    var j = cast[ptr GcEnv](env)
+    j.traceStack.add(p, desc)
+
+proc nimTraceRefDyn(q: pointer; env: pointer) {.compilerRtl, inl.} =
+  let p = cast[ptr pointer](q)
+  if p[] != nil:
+    var j = cast[ptr GcEnv](env)
+    j.traceStack.add(p, cast[ptr PNimTypeV2](p[])[])
+
+var
+  roots {.threadvar.}: CellSeq[Cell]
+
+proc unregisterCycle(s: Cell) =
+  # swap with the last element. O(1)
+  let idx = s.rootIdx-1
+  when false:
+    if idx >= roots.len or idx < 0:
+      cprintf("[Bug!] %ld %ld\n", idx, roots.len)
+      rawQuit 1
+  roots.d[idx] = roots.d[roots.len-1]
+  roots.d[idx][0].rootIdx = idx+1
+  dec roots.len
+  s.rootIdx = 0
+
+proc scanBlack(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
+  #[
+  proc scanBlack(s: Cell) =
+    setColor(s, colBlack)
+    for t in sons(s):
+      t.rc = t.rc + rcIncrement
+      if t.color != colBlack:
+        scanBlack(t)
+  ]#
+  s.setColor colBlack
+  let until = j.traceStack.len
+  trace(s, desc, j)
+  when logOrc: writeCell("root still alive", s, desc)
+  while j.traceStack.len > until:
+    let (entry, desc) = j.traceStack.pop()
+    let t = head entry[]
+    inc t.rc, rcIncrement
+    if t.color != colBlack:
+      t.setColor colBlack
+      trace(t, desc, j)
+      when logOrc: writeCell("child still alive", t, desc)
+
+proc markGray(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
+  #[
+  proc markGray(s: Cell) =
+    if s.color != colGray:
+      setColor(s, colGray)
+      for t in sons(s):
+        t.rc = t.rc - rcIncrement
+        if t.color != colGray:
+          markGray(t)
+  ]#
+  if s.color != colGray:
+    s.setColor colGray
+    inc j.touched
+    # keep in mind that refcounts are zero based so add 1 here:
+    inc j.rcSum, (s.rc shr rcShift) + 1
+    orcAssert(j.traceStack.len == 0, "markGray: trace stack not empty")
+    trace(s, desc, j)
+    while j.traceStack.len > 0:
+      let (entry, desc) = j.traceStack.pop()
+      let t = head entry[]
+      dec t.rc, rcIncrement
+      inc j.edges
+      when useJumpStack:
+        if (t.rc shr rcShift) >= 0 and (t.rc and jumpStackFlag) == 0:
+          t.rc = t.rc or jumpStackFlag
+          when traceCollector:
+            cprintf("[Now in jumpstack] %p %ld color %ld in jumpstack %ld\n", t, t.rc shr rcShift, t.color, t.rc and jumpStackFlag)
+          j.jumpStack.add(entry, desc)
+      if t.color != colGray:
+        t.setColor colGray
+        inc j.touched
+        # we already decremented its refcount so account for that:
+        inc j.rcSum, (t.rc shr rcShift) + 2
+        trace(t, desc, j)
+
+proc scan(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
+  #[
+  proc scan(s: Cell) =
+    if s.color == colGray:
+      if s.rc > 0:
+        scanBlack(s)
+      else:
+        s.setColor(colWhite)
+        for t in sons(s): scan(t)
+  ]#
+  if s.color == colGray:
+    if (s.rc shr rcShift) >= 0:
+      scanBlack(s, desc, j)
+      # XXX this should be done according to Lins' paper but currently breaks
+      #when useJumpStack:
+      #  s.setColor colPurple
+    else:
+      when useJumpStack:
+        # first we have to repair all the nodes we have seen
+        # that are still alive; we also need to mark what they
+        # refer to as alive:
+        while j.jumpStack.len > 0:
+          let (entry, desc) = j.jumpStack.pop
+          let t = head entry[]
+          # not in jump stack anymore!
+          t.rc = t.rc and not jumpStackFlag
+          if t.color == colGray and (t.rc shr rcShift) >= 0:
+            scanBlack(t, desc, j)
+            # XXX this should be done according to Lins' paper but currently breaks
+            #t.setColor colPurple
+            when traceCollector:
+              cprintf("[jump stack] %p %ld\n", t, t.rc shr rcShift)
+
+      orcAssert(j.traceStack.len == 0, "scan: trace stack not empty")
+      s.setColor(colWhite)
+      trace(s, desc, j)
+      while j.traceStack.len > 0:
+        let (entry, desc) = j.traceStack.pop()
+        let t = head entry[]
+        if t.color == colGray:
+          if (t.rc shr rcShift) >= 0:
+            scanBlack(t, desc, j)
+          else:
+            when useJumpStack:
+              # first we have to repair all the nodes we have seen
+              # that are still alive; we also need to mark what they
+              # refer to as alive:
+              while j.jumpStack.len > 0:
+                let (entry, desc) = j.jumpStack.pop
+                let t = head entry[]
+                # not in jump stack anymore!
+                t.rc = t.rc and not jumpStackFlag
+                if t.color == colGray and (t.rc shr rcShift) >= 0:
+                  scanBlack(t, desc, j)
+                  # XXX this should be done according to Lins' paper but currently breaks
+                  #t.setColor colPurple
+                  when traceCollector:
+                    cprintf("[jump stack] %p %ld\n", t, t.rc shr rcShift)
+
+            t.setColor(colWhite)
+            trace(t, desc, j)
+
+when false:
+  proc writeCell(msg: cstring; s: Cell) =
+    cfprintf(cstderr, "%s %p root index: %ld; RC: %ld; color: %ld\n",
+      msg, s, s.rootIdx, s.rc shr rcShift, s.color)
+
+proc collectColor(s: Cell; desc: PNimTypeV2; col: int; j: var GcEnv) =
+  #[
+    was: 'collectWhite'.
+
+  proc collectWhite(s: Cell) =
+    if s.color == colWhite and not buffered(s):
+      s.setColor(colBlack)
+      for t in sons(s):
+        collectWhite(t)
+      free(s) # watch out, a bug here!
+  ]#
+  if s.color == col and s.rootIdx == 0:
+    orcAssert(j.traceStack.len == 0, "collectWhite: trace stack not empty")
+
+    s.setColor(colBlack)
+    j.toFree.add(s, desc)
+    trace(s, desc, j)
+    while j.traceStack.len > 0:
+      let (entry, desc) = j.traceStack.pop()
+      let t = head entry[]
+      entry[] = nil # ensure that the destructor does touch moribund objects!
+      if t.color == col and t.rootIdx == 0:
+        j.toFree.add(t, desc)
+        t.setColor(colBlack)
+        trace(t, desc, j)
+
+const
+  defaultThreshold = when defined(nimFixedOrc): 10_000 else: 128
+
+when defined(nimStressOrc):
+  const rootsThreshold = 10 # broken with -d:nimStressOrc: 10 and for havlak iterations 1..8
+else:
+  var rootsThreshold {.threadvar.}: int
+
+proc collectCyclesBacon(j: var GcEnv; lowMark: int) =
+  # pretty direct translation from
+  # https://researcher.watson.ibm.com/researcher/files/us-bacon/Bacon01Concurrent.pdf
+  # Fig. 2. Synchronous Cycle Collection
+  #[
+    for s in roots:
+      markGray(s)
+    for s in roots:
+      scan(s)
+    for s in roots:
+      remove s from roots
+      s.buffered = false
+      collectWhite(s)
+  ]#
+  let last = roots.len - 1
+  when logOrc:
+    for i in countdown(last, lowMark):
+      writeCell("root", roots.d[i][0], roots.d[i][1])
+
+  for i in countdown(last, lowMark):
+    markGray(roots.d[i][0], roots.d[i][1], j)
+
+  var colToCollect = colWhite
+  if j.rcSum == j.edges:
+    # short-cut: we know everything is garbage:
+    colToCollect = colGray
+    # remember the fact that we got so lucky:
+    j.keepThreshold = true
+  else:
+    for i in countdown(last, lowMark):
+      scan(roots.d[i][0], roots.d[i][1], j)
+
+  init j.toFree
+  for i in 0 ..< roots.len:
+    let s = roots.d[i][0]
+    s.rootIdx = 0
+    collectColor(s, roots.d[i][1], colToCollect, j)
+
+  # Bug #22927: `free` calls destructors which can append to `roots`.
+  # We protect against this here by setting `roots.len` to 0 and also
+  # setting the threshold so high that no cycle collection can be triggered
+  # until we are out of this critical section:
+  when not defined(nimStressOrc):
+    let oldThreshold = rootsThreshold
+    rootsThreshold = high(int)
+  roots.len = 0
+
+  for i in 0 ..< j.toFree.len:
+    when orcLeakDetector:
+      writeCell("CYCLIC OBJECT FREED", j.toFree.d[i][0], j.toFree.d[i][1])
+    free(j.toFree.d[i][0], j.toFree.d[i][1])
+
+  when not defined(nimStressOrc):
+    rootsThreshold = oldThreshold
+
+  inc j.freed, j.toFree.len
+  deinit j.toFree
+
+when defined(nimOrcStats):
+  var freedCyclicObjects {.threadvar.}: int
+
+proc partialCollect(lowMark: int) =
+  when false:
+    if roots.len < 10 + lowMark: return
+  when logOrc:
+    cfprintf(cstderr, "[partialCollect] begin\n")
+  var j: GcEnv
+  init j.traceStack
+  collectCyclesBacon(j, lowMark)
+  when logOrc:
+    cfprintf(cstderr, "[partialCollect] end; freed %ld touched: %ld work: %ld\n", j.freed, j.touched,
+      roots.len - lowMark)
+  roots.len = lowMark
+  deinit j.traceStack
+  when defined(nimOrcStats):
+    inc freedCyclicObjects, j.freed
+
+proc collectCycles() =
+  ## Collect cycles.
+  when logOrc:
+    cfprintf(cstderr, "[collectCycles] begin\n")
+
+  var j: GcEnv
+  init j.traceStack
+  when useJumpStack:
+    init j.jumpStack
+    collectCyclesBacon(j, 0)
+    while j.jumpStack.len > 0:
+      let (t, desc) = j.jumpStack.pop
+      # not in jump stack anymore!
+      t.rc = t.rc and not jumpStackFlag
+    deinit j.jumpStack
+  else:
+    collectCyclesBacon(j, 0)
+
+  deinit j.traceStack
+  if roots.len == 0:
+    deinit roots
+
+  when not defined(nimStressOrc):
+    # compute the threshold based on the previous history
+    # of the cycle collector's effectiveness:
+    # we're effective when we collected 50% or more of the nodes
+    # we touched. If we're effective, we can reset the threshold:
+    if j.keepThreshold:
+      discard
+    elif j.freed * 2 >= j.touched:
+      when not defined(nimFixedOrc):
+        rootsThreshold = max(rootsThreshold div 3 * 2, 16)
+      else:
+        rootsThreshold = 0
+      #cfprintf(cstderr, "[collectCycles] freed %ld, touched %ld new threshold %ld\n", j.freed, j.touched, rootsThreshold)
+    elif rootsThreshold < high(int) div 4:
+      rootsThreshold = (if rootsThreshold <= 0: defaultThreshold else: rootsThreshold) * 3 div 2
+  when logOrc:
+    cfprintf(cstderr, "[collectCycles] end; freed %ld new threshold %ld touched: %ld mem: %ld rcSum: %ld edges: %ld\n", j.freed, rootsThreshold, j.touched,
+      getOccupiedMem(), j.rcSum, j.edges)
+  when defined(nimOrcStats):
+    inc freedCyclicObjects, j.freed
+
+when defined(nimOrcStats):
+  type
+    OrcStats* = object ## Statistics of the cycle collector subsystem.
+      freedCyclicObjects*: int ## Number of freed cyclic objects.
+  proc GC_orcStats*(): OrcStats =
+    ## Returns the statistics of the cycle collector subsystem.
+    result = OrcStats(freedCyclicObjects: freedCyclicObjects)
+
+proc registerCycle(s: Cell; desc: PNimTypeV2) =
+  s.rootIdx = roots.len+1
+  if roots.d == nil: init(roots)
+  add(roots, s, desc)
+
+  if roots.len - defaultThreshold >= rootsThreshold:
+    collectCycles()
+  when logOrc:
+    writeCell("[added root]", s, desc)
+
+  orcAssert strstr(desc.name, "TType") == nil, "added a TType as a root!"
+
+proc GC_runOrc* =
+  ## Forces a cycle collection pass.
+  collectCycles()
+  orcAssert roots.len == 0, "roots not empty!"
+
+proc GC_enableOrc*() =
+  ## Enables the cycle collector subsystem of `--mm:orc`. This is a `--mm:orc`
+  ## specific API. Check with `when defined(gcOrc)` for its existence.
+  when not defined(nimStressOrc):
+    rootsThreshold = 0
+
+proc GC_disableOrc*() =
+  ## Disables the cycle collector subsystem of `--mm:orc`. This is a `--mm:orc`
+  ## specific API. Check with `when defined(gcOrc)` for its existence.
+  when not defined(nimStressOrc):
+    rootsThreshold = high(int)
+
+proc GC_prepareOrc*(): int {.inline.} = roots.len
+
+proc GC_partialCollect*(limit: int) =
+  partialCollect(limit)
+
+proc GC_fullCollect* =
+  ## Forces a full garbage collection pass. With `--mm:orc` triggers the cycle
+  ## collector. This is an alias for `GC_runOrc`.
+  collectCycles()
+
+proc GC_enableMarkAndSweep*() =
+  ## For `--mm:orc` an alias for `GC_enableOrc`.
+  GC_enableOrc()
+
+proc GC_disableMarkAndSweep*() =
+  ## For `--mm:orc` an alias for `GC_disableOrc`.
+  GC_disableOrc()
+
+const
+  acyclicFlag = 1 # see also cggtypes.nim, proc genTypeInfoV2Impl
+
+when optimizedOrc:
+  template markedAsCyclic(s: Cell; desc: PNimTypeV2): bool =
+    (desc.flags and acyclicFlag) == 0 and (s.rc and maybeCycle) != 0
+else:
+  template markedAsCyclic(s: Cell; desc: PNimTypeV2): bool =
+    (desc.flags and acyclicFlag) == 0
+
+proc rememberCycle(isDestroyAction: bool; s: Cell; desc: PNimTypeV2) {.noinline.} =
+  if isDestroyAction:
+    if s.rootIdx > 0:
+      unregisterCycle(s)
+  else:
+    # do not call 'rememberCycle' again unless this cell
+    # got an 'incRef' event:
+    if s.rootIdx == 0 and markedAsCyclic(s, desc):
+      s.setColor colBlack
+      registerCycle(s, desc)
+
+proc nimDecRefIsLastCyclicDyn(p: pointer): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+    if (cell.rc and not rcMask) == 0:
+      result = true
+      #cprintf("[DESTROY] %p\n", p)
+    else:
+      dec cell.rc, rcIncrement
+    #if cell.color == colPurple:
+    rememberCycle(result, cell, cast[ptr PNimTypeV2](p)[])
+
+proc nimDecRefIsLastDyn(p: pointer): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+    if (cell.rc and not rcMask) == 0:
+      result = true
+      #cprintf("[DESTROY] %p\n", p)
+    else:
+      dec cell.rc, rcIncrement
+    #if cell.color == colPurple:
+    if result:
+      if cell.rootIdx > 0:
+        unregisterCycle(cell)
+
+proc nimDecRefIsLastCyclicStatic(p: pointer; desc: PNimTypeV2): bool {.compilerRtl, inl.} =
+  if p != nil:
+    var cell = head(p)
+    if (cell.rc and not rcMask) == 0:
+      result = true
+      #cprintf("[DESTROY] %p %s\n", p, desc.name)
+    else:
+      dec cell.rc, rcIncrement
+    #if cell.color == colPurple:
+    rememberCycle(result, cell, desc)
diff --git a/lib/system/osalloc.nim b/lib/system/osalloc.nim
new file mode 100644
index 000000000..5509d0070
--- /dev/null
+++ b/lib/system/osalloc.nim
@@ -0,0 +1,218 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2016 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+proc roundup(x, v: int): int {.inline.} =
+  result = (x + (v-1)) and not (v-1)
+  sysAssert(result >= x, "roundup: result < x")
+  #return ((-x) and (v-1)) +% x
+
+sysAssert(roundup(14, PageSize) == PageSize, "invalid PageSize")
+sysAssert(roundup(15, 8) == 16, "roundup broken")
+sysAssert(roundup(65, 8) == 72, "roundup broken 2")
+
+# ------------ platform specific chunk allocation code -----------
+
+# some platforms have really weird unmap behaviour:
+# unmap(blockStart, PageSize)
+# really frees the whole block. Happens for Linux/PowerPC for example. Amd64
+# and x86 are safe though; Windows is special because MEM_RELEASE can only be
+# used with a size of 0. We also allow unmapping to be turned off with
+# -d:nimAllocNoUnmap:
+const doNotUnmap = not (defined(amd64) or defined(i386)) or
+                   defined(windows) or defined(nimAllocNoUnmap)
+
+
+when defined(nimAllocPagesViaMalloc):
+  when not defined(gcArc) and not defined(gcOrc) and not defined(gcAtomicArc):
+    {.error: "-d:nimAllocPagesViaMalloc is only supported with --mm:arc or --mm:atomicArc or --mm:orc".}
+
+  proc osTryAllocPages(size: int): pointer {.inline.} =
+    let base = c_malloc(csize_t size + PageSize - 1 + sizeof(uint32))
+    if base == nil: raiseOutOfMem()
+    # memory layout: padding + offset (4 bytes) + user_data
+    # in order to deallocate: read offset at user_data - 4 bytes,
+    # then deallocate user_data - offset
+    let offset = PageSize - (cast[int](base) and (PageSize - 1))
+    cast[ptr uint32](base +! (offset - sizeof(uint32)))[] = uint32(offset)
+    result = base +! offset
+
+  proc osAllocPages(size: int): pointer {.inline.} =
+    result = osTryAllocPages(size)
+    if result == nil: raiseOutOfMem()
+
+  proc osDeallocPages(p: pointer, size: int) {.inline.} =
+    # read offset at p - 4 bytes, then deallocate (p - offset) pointer
+    let offset = cast[ptr uint32](p -! sizeof(uint32))[]
+    c_free(p -! offset)
+
+elif defined(emscripten) and not defined(StandaloneHeapSize):
+  const
+    PROT_READ  = 1             # page can be read
+    PROT_WRITE = 2             # page can be written
+    MAP_PRIVATE = 2'i32        # Changes are private
+
+  var MAP_ANONYMOUS {.importc: "MAP_ANONYMOUS", header: "<sys/mman.h>".}: cint
+  type
+    PEmscriptenMMapBlock = ptr EmscriptenMMapBlock
+    EmscriptenMMapBlock {.pure, inheritable.} = object
+      realSize: int        # size of previous chunk; for coalescing
+      realPointer: pointer     # if < PageSize it is a small chunk
+
+  proc mmap(adr: pointer, len: int, prot, flags, fildes: cint,
+            off: int): pointer {.header: "<sys/mman.h>".}
+
+  proc munmap(adr: pointer, len: int) {.header: "<sys/mman.h>".}
+
+  proc osAllocPages(block_size: int): pointer {.inline.} =
+    let realSize = block_size + sizeof(EmscriptenMMapBlock) + PageSize + 1
+    result = mmap(nil, realSize, PROT_READ or PROT_WRITE,
+                             MAP_PRIVATE or MAP_ANONYMOUS, -1, 0)
+    if result == nil or result == cast[pointer](-1):
+      raiseOutOfMem()
+
+    let realPointer = result
+    let pos = cast[int](result)
+
+    # Convert pointer to PageSize correct one.
+    var new_pos = cast[int](pos) +% (PageSize - (pos %% PageSize))
+    if (new_pos-pos) < sizeof(EmscriptenMMapBlock):
+      new_pos = new_pos +% PageSize
+    result = cast[pointer](new_pos)
+
+    var mmapDescrPos = cast[int](result) -% sizeof(EmscriptenMMapBlock)
+
+    var mmapDescr = cast[EmscriptenMMapBlock](mmapDescrPos)
+    mmapDescr.realSize = realSize
+    mmapDescr.realPointer = realPointer
+
+    #c_fprintf(stdout, "[Alloc] size %d %d realSize:%d realPos:%d\n", block_size, cast[int](result), realSize, cast[int](realPointer))
+
+  proc osTryAllocPages(size: int): pointer = osAllocPages(size)
+
+  proc osDeallocPages(p: pointer, size: int) {.inline.} =
+    var mmapDescrPos = cast[int](p) -% sizeof(EmscriptenMMapBlock)
+    var mmapDescr = cast[EmscriptenMMapBlock](mmapDescrPos)
+    munmap(mmapDescr.realPointer, mmapDescr.realSize)
+
+elif defined(genode) and not defined(StandaloneHeapSize):
+  include genode/alloc # osAllocPages, osTryAllocPages, osDeallocPages
+
+elif defined(posix) and not defined(StandaloneHeapSize):
+  const
+    PROT_READ  = 1             # page can be read
+    PROT_WRITE = 2             # page can be written
+
+  when defined(netbsd) or defined(openbsd):
+    # OpenBSD security for setjmp/longjmp coroutines
+    var MAP_STACK {.importc: "MAP_STACK", header: "<sys/mman.h>".}: cint
+  else:
+    const MAP_STACK = 0             # avoid sideeffects
+
+  when defined(macosx) or defined(freebsd):
+    const MAP_ANONYMOUS = 0x1000
+    const MAP_PRIVATE = 0x02        # Changes are private
+  elif defined(solaris):
+    const MAP_ANONYMOUS = 0x100
+    const MAP_PRIVATE = 0x02        # Changes are private
+  elif defined(linux) and defined(amd64):
+    # actually, any architecture using asm-generic, but being conservative here,
+    # some arches like mips and alpha use different values
+    const MAP_ANONYMOUS = 0x20
+    const MAP_PRIVATE = 0x02        # Changes are private
+  elif defined(haiku):
+    const MAP_ANONYMOUS = 0x08
+    const MAP_PRIVATE = 0x02
+  else:  # posix including netbsd or openbsd
+    var
+      MAP_ANONYMOUS {.importc: "MAP_ANONYMOUS", header: "<sys/mman.h>".}: cint
+      MAP_PRIVATE {.importc: "MAP_PRIVATE", header: "<sys/mman.h>".}: cint
+
+  proc mmap(adr: pointer, len: csize_t, prot, flags, fildes: cint,
+            off: int): pointer {.header: "<sys/mman.h>".}
+
+  proc munmap(adr: pointer, len: csize_t): cint {.header: "<sys/mman.h>".}
+
+  proc osAllocPages(size: int): pointer {.inline.} =
+    result = mmap(nil, cast[csize_t](size), PROT_READ or PROT_WRITE,
+                             MAP_ANONYMOUS or MAP_PRIVATE or MAP_STACK, -1, 0)
+    if result == nil or result == cast[pointer](-1):
+      raiseOutOfMem()
+
+  proc osTryAllocPages(size: int): pointer {.inline.} =
+    result = mmap(nil, cast[csize_t](size), PROT_READ or PROT_WRITE,
+                             MAP_ANONYMOUS or MAP_PRIVATE or MAP_STACK, -1, 0)
+    if result == cast[pointer](-1): result = nil
+
+  proc osDeallocPages(p: pointer, size: int) {.inline.} =
+    when reallyOsDealloc: discard munmap(p, cast[csize_t](size))
+
+elif defined(windows) and not defined(StandaloneHeapSize):
+  const
+    MEM_RESERVE = 0x2000
+    MEM_COMMIT = 0x1000
+    MEM_TOP_DOWN = 0x100000
+    PAGE_READWRITE = 0x04
+
+    MEM_DECOMMIT = 0x4000
+    MEM_RELEASE = 0x8000
+
+  proc virtualAlloc(lpAddress: pointer, dwSize: int, flAllocationType,
+                    flProtect: int32): pointer {.
+                    header: "<windows.h>", stdcall, importc: "VirtualAlloc".}
+
+  proc virtualFree(lpAddress: pointer, dwSize: int,
+                   dwFreeType: int32): cint {.header: "<windows.h>", stdcall,
+                   importc: "VirtualFree".}
+
+  proc osAllocPages(size: int): pointer {.inline.} =
+    result = virtualAlloc(nil, size, MEM_RESERVE or MEM_COMMIT,
+                          PAGE_READWRITE)
+    if result == nil: raiseOutOfMem()
+
+  proc osTryAllocPages(size: int): pointer {.inline.} =
+    result = virtualAlloc(nil, size, MEM_RESERVE or MEM_COMMIT,
+                          PAGE_READWRITE)
+
+  proc osDeallocPages(p: pointer, size: int) {.inline.} =
+    # according to Microsoft, 0 is the only correct value for MEM_RELEASE:
+    # This means that the OS has some different view over how big the block is
+    # that we want to free! So, we cannot reliably release the memory back to
+    # Windows :-(. We have to live with MEM_DECOMMIT instead.
+    # Well that used to be the case but MEM_DECOMMIT fragments the address
+    # space heavily, so we now treat Windows as a strange unmap target.
+    when reallyOsDealloc:
+      if virtualFree(p, 0, MEM_RELEASE) == 0:
+        cprintf "virtualFree failing!"
+        rawQuit 1
+    #VirtualFree(p, size, MEM_DECOMMIT)
+
+elif hostOS == "standalone" or defined(StandaloneHeapSize):
+  const StandaloneHeapSize {.intdefine.}: int = 1024 * PageSize
+  var
+    theHeap: array[StandaloneHeapSize div sizeof(float64), float64] # 'float64' for alignment
+    bumpPointer = cast[int](addr theHeap)
+
+  proc osAllocPages(size: int): pointer {.inline.} =
+    if size+bumpPointer < cast[int](addr theHeap) + sizeof(theHeap):
+      result = cast[pointer](bumpPointer)
+      inc bumpPointer, size
+    else:
+      raiseOutOfMem()
+
+  proc osTryAllocPages(size: int): pointer {.inline.} =
+    if size+bumpPointer < cast[int](addr theHeap) + sizeof(theHeap):
+      result = cast[pointer](bumpPointer)
+      inc bumpPointer, size
+
+  proc osDeallocPages(p: pointer, size: int) {.inline.} =
+    if bumpPointer-size == cast[int](p):
+      dec bumpPointer, size
+
+else:
+  {.error: "Port memory manager to your platform".}
diff --git a/lib/system/platforms.nim b/lib/system/platforms.nim
new file mode 100644
index 000000000..0619f3fca
--- /dev/null
+++ b/lib/system/platforms.nim
@@ -0,0 +1,105 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Platform detection for NimScript. This module is included by the system module!
+## Do not import it directly!
+
+# CPU architectures have alias names mapped in tools/niminst/makefile.nimf
+
+type
+  CpuPlatform* {.pure.} = enum ## the CPU this program will run on.
+    none,                      ## unknown CPU
+    i386,                      ## 32 bit x86 compatible CPU
+    m68k,                      ## M68k based processor
+    alpha,                     ## Alpha processor
+    powerpc,                   ## 32 bit PowerPC
+    powerpc64,                 ## 64 bit PowerPC
+    powerpc64el,               ## Little Endian 64 bit PowerPC
+    sparc,                     ## Sparc based processor
+    sparc64,                   ## 64-bit Sparc based processor
+    hppa,                      ## HP PA-RISC
+    ia64,                      ## Intel Itanium
+    amd64,                     ## x86_64 (AMD64); 64 bit x86 compatible CPU
+    mips,                      ## Mips based processor
+    mipsel,                    ## Little Endian Mips based processor
+    mips64,                    ## 64-bit MIPS processor
+    mips64el,                  ## Little Endian 64-bit MIPS processor
+    arm,                       ## ARM based processor
+    arm64,                     ## ARM64 based processor
+    vm,                        ## Some Virtual machine: Nim's VM or JavaScript
+    avr,                       ## AVR based processor
+    msp430,                    ## TI MSP430 microcontroller
+    riscv32,                   ## RISC-V 32-bit processor
+    riscv64,                   ## RISC-V 64-bit processor
+    wasm32,                    ## WASM, 32-bit
+    e2k,                       ## MCST Elbrus 2000
+    loongarch64,               ## LoongArch 64-bit processor
+    s390x                      ## IBM Z
+
+  OsPlatform* {.pure.} = enum ## the OS this program will run on.
+    none, dos, windows, os2, linux, morphos, skyos, solaris,
+    irix, netbsd, freebsd, openbsd, aix, palmos, qnx, amiga,
+    atari, netware, macos, macosx, haiku, android, js, standalone, nintendoswitch
+
+const
+  targetOS* = when defined(windows): OsPlatform.windows
+              elif defined(dos): OsPlatform.dos
+              elif defined(os2): OsPlatform.os2
+              elif defined(linux): OsPlatform.linux
+              elif defined(morphos): OsPlatform.morphos
+              elif defined(skyos): OsPlatform.skyos
+              elif defined(solaris): OsPlatform.solaris
+              elif defined(irix): OsPlatform.irix
+              elif defined(netbsd): OsPlatform.netbsd
+              elif defined(freebsd): OsPlatform.freebsd
+              elif defined(openbsd): OsPlatform.openbsd
+              elif defined(aix): OsPlatform.aix
+              elif defined(palmos): OsPlatform.palmos
+              elif defined(qnx): OsPlatform.qnx
+              elif defined(amiga): OsPlatform.amiga
+              elif defined(atari): OsPlatform.atari
+              elif defined(netware): OsPlatform.netware
+              elif defined(macosx): OsPlatform.macosx
+              elif defined(macos): OsPlatform.macos
+              elif defined(haiku): OsPlatform.haiku
+              elif defined(android): OsPlatform.android
+              elif defined(js): OsPlatform.js
+              elif defined(standalone): OsPlatform.standalone
+              elif defined(nintendoswitch): OsPlatform.nintendoswitch
+              else: OsPlatform.none
+    ## the OS this program will run on.
+
+  targetCPU* = when defined(i386): CpuPlatform.i386
+               elif defined(m68k): CpuPlatform.m68k
+               elif defined(alpha): CpuPlatform.alpha
+               elif defined(powerpc): CpuPlatform.powerpc
+               elif defined(powerpc64): CpuPlatform.powerpc64
+               elif defined(powerpc64el): CpuPlatform.powerpc64el
+               elif defined(sparc): CpuPlatform.sparc
+               elif defined(sparc64): CpuPlatform.sparc64
+               elif defined(hppa): CpuPlatform.hppa
+               elif defined(ia64): CpuPlatform.ia64
+               elif defined(amd64): CpuPlatform.amd64
+               elif defined(mips): CpuPlatform.mips
+               elif defined(mipsel): CpuPlatform.mipsel
+               elif defined(mips64): CpuPlatform.mips64
+               elif defined(mips64el): CpuPlatform.mips64el
+               elif defined(arm): CpuPlatform.arm
+               elif defined(arm64): CpuPlatform.arm64
+               elif defined(vm): CpuPlatform.vm
+               elif defined(avr): CpuPlatform.avr
+               elif defined(msp430): CpuPlatform.msp430
+               elif defined(riscv32): CpuPlatform.riscv32
+               elif defined(riscv64): CpuPlatform.riscv64
+               elif defined(wasm32): CpuPlatform.wasm32
+               elif defined(e2k): CpuPlatform.e2k
+               elif defined(loongarch64): CpuPlatform.loongarch64
+               elif defined(s390x): CpuPlatform.s390x
+               else: CpuPlatform.none
+    ## the CPU this program will run on.
diff --git a/lib/system/profiler.nim b/lib/system/profiler.nim
index 8e4c51dd9..e7eb6ac82 100755..100644
--- a/lib/system/profiler.nim
+++ b/lib/system/profiler.nim
@@ -1,37 +1,45 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2012 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
 #
 
-# This file implements the Nimrod profiler. The profiler needs support by the
+# This file implements the Nim profiler. The profiler needs support by the
 # code generator. The idea is to inject the instruction stream
 # with 'nimProfile()' calls. These calls are injected at every loop end
 # (except perhaps loops that have no side-effects). At every Nth call a
 # stack trace is taken. A stack tace is a list of cstrings.
 
+when defined(profiler) and defined(memProfiler):
+  {.error: "profiler and memProfiler cannot be defined at the same time (See Embedded Stack Trace Profiler (ESTP) User Guide) for more details".}
+
 {.push profiler: off.}
 
 const
   MaxTraceLen = 20 # tracking the last 20 calls is enough
 
 type
-  TStackTrace* = array [0..MaxTraceLen-1, cstring]
-  TProfilerHook* = proc (st: TStackTrace) {.nimcall.}
+  StackTrace* = object
+    lines*: array[0..MaxTraceLen-1, cstring]
+    files*: array[0..MaxTraceLen-1, cstring]
+  ProfilerHook* = proc (st: StackTrace) {.nimcall.}
+
+proc `[]`*(st: StackTrace, i: int): cstring = st.lines[i]
 
-proc captureStackTrace(f: PFrame, st: var TStackTrace) =
+proc captureStackTrace(f: PFrame, st: var StackTrace) =
   const
     firstCalls = 5
   var
     it = f
     i = 0
     total = 0
-  while it != nil and i <= high(st)-(firstCalls-1):
+  while it != nil and i <= high(st.lines)-(firstCalls-1):
     # the (-1) is for the "..." entry
-    st[i] = it.procname
+    st.lines[i] = it.procname
+    st.files[i] = it.filename
     inc(i)
     inc(total)
     it = it.prev
@@ -39,61 +47,57 @@ proc captureStackTrace(f: PFrame, st: var TStackTrace) =
   while it != nil:
     inc(total)
     it = it.prev
-  for j in 1..total-i-(firstCalls-1): 
+  for j in 1..total-i-(firstCalls-1):
     if b != nil: b = b.prev
   if total != i:
-    st[i] = "..."
+    st.lines[i] = "..."
+    st.files[i] = "..."
     inc(i)
-  while b != nil and i <= high(st):
-    st[i] = b.procname
+  while b != nil and i <= high(st.lines):
+    st.lines[i] = b.procname
+    st.files[i] = b.filename
     inc(i)
     b = b.prev
 
+var
+  profilingRequestedHook*: proc (): bool {.nimcall, gcsafe.}
+    ## set this variable to provide a procedure that implements a profiler in
+    ## user space. See the `nimprof` module for a reference implementation.
+
 when defined(memProfiler):
   type
-    TMemProfilerHook* = proc (st: TStackTrace, requestedSize: int) {.nimcall.}
+    MemProfilerHook* = proc (st: StackTrace, requestedSize: int) {.nimcall, gcsafe.}
+
   var
-    profilerHook*: TMemProfilerHook
+    profilerHook*: MemProfilerHook
       ## set this variable to provide a procedure that implements a profiler in
       ## user space. See the `nimprof` module for a reference implementation.
 
-  proc callProfilerHook(hook: TMemProfilerHook, requestedSize: int) =
-    var st: TStackTrace
+  proc callProfilerHook(hook: MemProfilerHook, requestedSize: int) =
+    var st: StackTrace
     captureStackTrace(framePtr, st)
     hook(st, requestedSize)
 
   proc nimProfile(requestedSize: int) =
-    if not isNil(profilerHook):
+    if not isNil(profilingRequestedHook) and profilingRequestedHook():
       callProfilerHook(profilerHook, requestedSize)
 else:
-  const
-    SamplingInterval = 50_000
-      # set this to change the default sampling interval
   var
-    profilerHook*: TProfilerHook
+    profilerHook*: ProfilerHook
       ## set this variable to provide a procedure that implements a profiler in
       ## user space. See the `nimprof` module for a reference implementation.
-    gTicker {.threadvar.}: int
 
-  proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
+  proc callProfilerHook(hook: ProfilerHook) {.noinline.} =
     # 'noinline' so that 'nimProfile' does not perform the stack allocation
     # in the common case.
-    var st: TStackTrace
-    captureStackTrace(framePtr, st)
-    hook(st)
+    when not defined(nimdoc):
+      var st: StackTrace
+      captureStackTrace(framePtr, st)
+      hook(st)
 
   proc nimProfile() =
     ## This is invoked by the compiler in every loop and on every proc entry!
-    if gTicker == 0:
-      gTicker = -1
-      if not isNil(profilerHook):
-        # disable recursive calls: XXX should use try..finally,
-        # but that's too expensive!
-        let oldHook = profilerHook
-        profilerHook = nil
-        callProfilerHook(oldHook)
-        profilerHook = oldHook
-      gTicker = SamplingInterval
-    dec gTicker
+    if not isNil(profilingRequestedHook) and profilingRequestedHook():
+      callProfilerHook(profilerHook)
 
 {.pop.}
diff --git a/lib/system/rawquits.nim b/lib/system/rawquits.nim
new file mode 100644
index 000000000..f0ead10c6
--- /dev/null
+++ b/lib/system/rawquits.nim
@@ -0,0 +1,27 @@
+import system/ctypes
+
+when defined(nimNoQuit):
+  proc rawQuit(errorcode: int = QuitSuccess) = discard "ignoring quit"
+
+elif defined(genode):
+  import genode/env
+
+  var systemEnv {.exportc: runtimeEnvSym.}: GenodeEnvPtr
+
+  type GenodeEnv = GenodeEnvPtr
+    ## Opaque type representing Genode environment.
+
+  proc rawQuit(env: GenodeEnv; errorcode: int) {.magic: "Exit", noreturn,
+    importcpp: "#->parent().exit(@); Genode::sleep_forever()", header: "<base/sleep.h>".}
+
+  proc rawQuit(errorcode: int = QuitSuccess) {.inline, noreturn.} =
+    systemEnv.rawQuit(errorcode)
+
+
+elif defined(js) and defined(nodejs) and not defined(nimscript):
+  proc rawQuit(errorcode: int = QuitSuccess) {.magic: "Exit",
+    importc: "process.exit", noreturn.}
+
+else:
+  proc rawQuit(errorcode: cint) {.
+    magic: "Exit", importc: "exit", header: "<stdlib.h>", noreturn.}
\ No newline at end of file
diff --git a/lib/system/repr.nim b/lib/system/repr.nim
index 3c9ce73ac..13118e40b 100755..100644
--- a/lib/system/repr.nim
+++ b/lib/system/repr.nim
@@ -1,6 +1,6 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2012 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
@@ -10,39 +10,38 @@
 # The generic ``repr`` procedure. It is an invaluable debugging tool.
 
 when not defined(useNimRtl):
-  proc reprAny(p: pointer, typ: PNimType): string {.compilerRtl.}
+  proc reprAny(p: pointer, typ: PNimType): string {.compilerRtl, gcsafe.}
 
 proc reprInt(x: int64): string {.compilerproc.} = return $x
 proc reprFloat(x: float): string {.compilerproc.} = return $x
 
 proc reprPointer(x: pointer): string {.compilerproc.} =
-  var buf: array [0..59, char]
-  c_sprintf(buf, "%p", x)
-  return $buf
+  result = newString(60)
+  let n = c_snprintf(cast[cstring](addr result[0]), csize_t(60), "%p", x)
+  setLen(result, n)
 
-proc `$`(x: uint64): string =
-  var buf: array [0..59, char]
-  c_sprintf(buf, "%llu", x)
-  return $buf
-
-proc reprStrAux(result: var string, s: string) =
+proc reprStrAux(result: var string, s: cstring; len: int) =
   if cast[pointer](s) == nil:
     add result, "nil"
     return
-  add result, reprPointer(cast[pointer](s)) & "\""
-  for c in items(s):
+  if len > 0:
+    add result, reprPointer(cast[pointer](s))
+  add result, "\""
+  for i in 0 .. pred(len):
+    let c = s[i]
     case c
     of '"': add result, "\\\""
     of '\\': add result, "\\\\" # BUGFIX: forgotten
     of '\10': add result, "\\10\"\n\"" # " \n " # better readability
-    of '\128' .. '\255', '\0'..'\9', '\11'..'\31':
+    of '\127' .. '\255', '\0'..'\9', '\11'..'\31':
       add result, "\\" & reprInt(ord(c))
-    else: result.add(c)
+    else:
+      result.add(c)
   add result, "\""
 
 proc reprStr(s: string): string {.compilerRtl.} =
   result = ""
-  reprStrAux(result, s)
+  reprStrAux(result, s, s.len)
 
 proc reprBool(x: bool): string {.compilerRtl.} =
   if x: result = "true"
@@ -53,28 +52,32 @@ proc reprChar(x: char): string {.compilerRtl.} =
   case x
   of '"': add result, "\\\""
   of '\\': add result, "\\\\"
-  of '\128' .. '\255', '\0'..'\31': add result, "\\" & reprInt(ord(x))
+  of '\127' .. '\255', '\0'..'\31': add result, "\\" & reprInt(ord(x))
   else: add result, x
   add result, "\'"
 
 proc reprEnum(e: int, typ: PNimType): string {.compilerRtl.} =
-  # we read an 'int' but this may have been too large, so mask the other bits:
-  let e = e and (1 shl (typ.size*8)-1)
+  ## Return string representation for enumeration values
+  var n = typ.node
   if ntfEnumHole notin typ.flags:
-    if e <% typ.node.len:
-      return $typ.node.sons[e].name
+    let o = e - n.sons[0].offset
+    if o >= 0 and o <% typ.node.len:
+      return $n.sons[o].name
   else:
     # ugh we need a slow linear search:
-    var n = typ.node
     var s = n.sons
     for i in 0 .. n.len-1:
-      if s[i].offset == e: return $s[i].name
+      if s[i].offset == e:
+        return $s[i].name
+
   result = $e & " (invalid data!)"
 
+include system/repr_impl
+
 type
-  pbyteArray = ptr array[0.. 0xffff, int8]
+  PByteArray = ptr UncheckedArray[byte] # array[0xffff, byte]
 
-proc addSetElem(result: var string, elem: int, typ: PNimType) =
+proc addSetElem(result: var string, elem: int, typ: PNimType) {.benign.} =
   case typ.kind
   of tyEnum: add result, reprEnum(elem, typ)
   of tyBool: add result, reprBool(bool(elem))
@@ -89,22 +92,23 @@ proc reprSetAux(result: var string, p: pointer, typ: PNimType) =
   var elemCounter = 0  # we need this flag for adding the comma at
                        # the right places
   add result, "{"
-  var u: int64
+  var u: uint64
   case typ.size
-  of 1: u = ze64(cast[ptr int8](p)[])
-  of 2: u = ze64(cast[ptr int16](p)[])
-  of 4: u = ze64(cast[ptr int32](p)[])
-  of 8: u = cast[ptr int64](p)[]
+  of 1: u = cast[ptr uint8](p)[]
+  of 2: u = cast[ptr uint16](p)[]
+  of 4: u = cast[ptr uint32](p)[]
+  of 8: u = cast[ptr uint64](p)[]
   else:
-    var a = cast[pbyteArray](p)
+    u = uint64(0)
+    var a = cast[PByteArray](p)
     for i in 0 .. typ.size*8-1:
-      if (ze(a[i div 8]) and (1 shl (i mod 8))) != 0:
+      if (uint(a[i shr 3]) and (1'u shl (i and 7))) != 0:
         if elemCounter > 0: add result, ", "
         addSetElem(result, i+typ.node.len, typ.base)
         inc(elemCounter)
   if typ.size <= 8:
     for i in 0..sizeof(int64)*8-1:
-      if (u and (1'i64 shl int64(i))) != 0'i64:
+      if (u and (1'u64 shl uint64(i))) != 0'u64:
         if elemCounter > 0: add result, ", "
         addSetElem(result, i+typ.node.len, typ.base)
         inc(elemCounter)
@@ -115,164 +119,209 @@ proc reprSet(p: pointer, typ: PNimType): string {.compilerRtl.} =
   reprSetAux(result, p, typ)
 
 type
-  TReprClosure {.final.} = object # we cannot use a global variable here
+  ReprClosure {.final.} = object # we cannot use a global variable here
                                   # as this wouldn't be thread-safe
-    marked: TCellSet
+    when declared(CellSet):
+      marked: CellSet
     recdepth: int       # do not recurse endlessly
     indent: int         # indentation
 
 when not defined(useNimRtl):
-  proc initReprClosure(cl: var TReprClosure) =
+  proc initReprClosure(cl: var ReprClosure) =
     # Important: cellsets does not lock the heap when doing allocations! We
     # have to do it here ...
-    when hasThreadSupport and hasSharedHeap and defined(heapLock):
+    when hasThreadSupport and hasSharedHeap and declared(heapLock):
       AcquireSys(HeapLock)
-    Init(cl.marked)
+    when declared(CellSet):
+      init(cl.marked)
     cl.recdepth = -1      # default is to display everything!
     cl.indent = 0
 
-  proc deinitReprClosure(cl: var TReprClosure) =
-    Deinit(cl.marked)
-    when hasThreadSupport and hasSharedHeap and defined(heapLock): 
+  proc deinitReprClosure(cl: var ReprClosure) =
+    when declared(CellSet): deinit(cl.marked)
+    when hasThreadSupport and hasSharedHeap and declared(heapLock):
       ReleaseSys(HeapLock)
 
-  proc reprBreak(result: var string, cl: TReprClosure) =
+  proc reprBreak(result: var string, cl: ReprClosure) =
     add result, "\n"
     for i in 0..cl.indent-1: add result, ' '
 
   proc reprAux(result: var string, p: pointer, typ: PNimType,
-               cl: var TReprClosure)
+               cl: var ReprClosure) {.benign.}
 
   proc reprArray(result: var string, p: pointer, typ: PNimType,
-                 cl: var TReprClosure) =
+                 cl: var ReprClosure) =
     add result, "["
     var bs = typ.base.size
     for i in 0..typ.size div bs - 1:
       if i > 0: add result, ", "
-      reprAux(result, cast[pointer](cast[TAddress](p) + i*bs), typ.base, cl)
+      reprAux(result, cast[pointer](cast[int](p) + i*bs), typ.base, cl)
     add result, "]"
 
+  when defined(nimSeqsV2):
+    type
+      GenericSeq = object
+        len: int
+        p: pointer
+      PGenericSeq = ptr GenericSeq
+    const payloadOffset = sizeof(int) + sizeof(pointer)
+      # see seqs.nim:    cap: int
+      #                  region: Allocator
+
+    template payloadPtr(x: untyped): untyped = cast[PGenericSeq](x).p
+  else:
+    const payloadOffset = GenericSeqSize ## the payload offset always depends on the alignment of the member type.
+    template payloadPtr(x: untyped): untyped = x
+
   proc reprSequence(result: var string, p: pointer, typ: PNimType,
-                    cl: var TReprClosure) =
+                    cl: var ReprClosure) =
     if p == nil:
-      add result, "nil"
+      add result, "[]"
       return
-    result.add(reprPointer(p) & "[")
+    result.add(reprPointer(p))
+    result.add "@["
     var bs = typ.base.size
     for i in 0..cast[PGenericSeq](p).len-1:
       if i > 0: add result, ", "
-      reprAux(result, cast[pointer](cast[TAddress](p) + GenericSeqSize + i*bs),
-              typ.Base, cl)
+      reprAux(result, cast[pointer](cast[int](payloadPtr(p)) + align(payloadOffset, typ.align) + i*bs),
+              typ.base, cl)
     add result, "]"
 
   proc reprRecordAux(result: var string, p: pointer, n: ptr TNimNode,
-                     cl: var TReprClosure) =
+                     cl: var ReprClosure) {.benign.} =
     case n.kind
     of nkNone: sysAssert(false, "reprRecordAux")
     of nkSlot:
       add result, $n.name
       add result, " = "
-      reprAux(result, cast[pointer](cast[TAddress](p) + n.offset), n.typ, cl)
+      reprAux(result, cast[pointer](cast[int](p) + n.offset), n.typ, cl)
     of nkList:
       for i in 0..n.len-1:
         if i > 0: add result, ",\n"
         reprRecordAux(result, p, n.sons[i], cl)
     of nkCase:
       var m = selectBranch(p, n)
-      reprAux(result, cast[pointer](cast[TAddress](p) + n.offset), n.typ, cl)
+      reprAux(result, cast[pointer](cast[int](p) + n.offset), n.typ, cl)
       if m != nil: reprRecordAux(result, p, m, cl)
 
   proc reprRecord(result: var string, p: pointer, typ: PNimType,
-                  cl: var TReprClosure) =
+                  cl: var ReprClosure) =
     add result, "["
-    let oldLen = result.len
-    reprRecordAux(result, p, typ.node, cl)
-    if typ.base != nil: 
-      if oldLen != result.len: add result, ",\n"
-      reprRecordAux(result, p, typ.base.node, cl)
+    var curTyp = typ
+    var first = true
+    while curTyp != nil:
+      var part = ""
+      reprRecordAux(part, p, curTyp.node, cl)
+      if part.len > 0:
+        if not first:
+          add result, ",\n"
+        add result, part
+        first = false
+      curTyp = curTyp.base
     add result, "]"
 
   proc reprRef(result: var string, p: pointer, typ: PNimType,
-               cl: var TReprClosure) =
+               cl: var ReprClosure) =
     # we know that p is not nil here:
-    when defined(boehmGC) or defined(nogc):
-      var cell = cast[PCell](p)
-    else:
-      var cell = usrToCell(p)
-    add result, "ref " & reprPointer(p)
-    if cell notin cl.marked:
-      # only the address is shown:
-      incl(cl.marked, cell)
-      add result, " --> "
-      reprAux(result, p, typ.base, cl)
+    when declared(CellSet):
+      when defined(boehmGC) or defined(gogc) or defined(nogc) or usesDestructors:
+        var cell = cast[PCell](p)
+      else:
+        var cell = usrToCell(p)
+      add result, if typ.kind == tyPtr: "ptr " else: "ref "
+      add result, reprPointer(p)
+      if cell notin cl.marked:
+        # only the address is shown:
+        incl(cl.marked, cell)
+        add result, " --> "
+        reprAux(result, p, typ.base, cl)
+
+  proc getInt(p: pointer, size: int): int =
+    case size
+    of 1: return int(cast[ptr uint8](p)[])
+    of 2: return int(cast[ptr uint16](p)[])
+    of 4: return int(cast[ptr uint32](p)[])
+    of 8: return int(cast[ptr uint64](p)[])
+    else: discard
 
   proc reprAux(result: var string, p: pointer, typ: PNimType,
-               cl: var TReprClosure) =
+               cl: var ReprClosure) =
     if cl.recdepth == 0:
       add result, "..."
       return
     dec(cl.recdepth)
     case typ.kind
     of tySet: reprSetAux(result, p, typ)
-    of tyArray: reprArray(result, p, typ, cl)
+    of tyArray, tyArrayConstr: reprArray(result, p, typ, cl)
     of tyTuple: reprRecord(result, p, typ, cl)
-    of tyObject: 
+    of tyObject:
       var t = cast[ptr PNimType](p)[]
       reprRecord(result, p, t, cl)
     of tyRef, tyPtr:
       sysAssert(p != nil, "reprAux")
-      if cast[ppointer](p)[] == nil: add result, "nil"
-      else: reprRef(result, cast[ppointer](p)[], typ, cl)
+      if cast[PPointer](p)[] == nil: add result, "nil"
+      else: reprRef(result, cast[PPointer](p)[], typ, cl)
     of tySequence:
-      reprSequence(result, cast[ppointer](p)[], typ, cl)
+      reprSequence(result, cast[PPointer](p)[], typ, cl)
     of tyInt: add result, $(cast[ptr int](p)[])
-    of tyInt8: add result, $int(cast[ptr Int8](p)[])
-    of tyInt16: add result, $int(cast[ptr Int16](p)[])
-    of tyInt32: add result, $int(cast[ptr Int32](p)[])
-    of tyInt64: add result, $(cast[ptr Int64](p)[])
-    of tyUInt8: add result, $ze(cast[ptr Int8](p)[])
-    of tyUInt16: add result, $ze(cast[ptr Int16](p)[])
-    
+    of tyInt8: add result, $int(cast[ptr int8](p)[])
+    of tyInt16: add result, $int(cast[ptr int16](p)[])
+    of tyInt32: add result, $int(cast[ptr int32](p)[])
+    of tyInt64: add result, $(cast[ptr int64](p)[])
+    of tyUInt: add result, $(cast[ptr uint](p)[])
+    of tyUInt8: add result, $(cast[ptr uint8](p)[])
+    of tyUInt16: add result, $(cast[ptr uint16](p)[])
+    of tyUInt32: add result, $(cast[ptr uint32](p)[])
+    of tyUInt64: add result, $(cast[ptr uint64](p)[])
+
     of tyFloat: add result, $(cast[ptr float](p)[])
     of tyFloat32: add result, $(cast[ptr float32](p)[])
     of tyFloat64: add result, $(cast[ptr float64](p)[])
-    of tyEnum: add result, reprEnum(cast[ptr int](p)[], typ)
+    of tyEnum: add result, reprEnum(getInt(p, typ.size), typ)
     of tyBool: add result, reprBool(cast[ptr bool](p)[])
     of tyChar: add result, reprChar(cast[ptr char](p)[])
-    of tyString: reprStrAux(result, cast[ptr string](p)[])
-    of tyCString: reprStrAux(result, $(cast[ptr cstring](p)[]))
+    of tyString:
+      let sp = cast[ptr string](p)
+      reprStrAux(result, sp[].cstring, sp[].len)
+    of tyCstring:
+      let cs = cast[ptr cstring](p)[]
+      if cs.isNil: add result, "nil"
+      else: reprStrAux(result, cs, cs.len)
     of tyRange: reprAux(result, p, typ.base, cl)
     of tyProc, tyPointer:
-      if cast[ppointer](p)[] == nil: add result, "nil"
-      else: add result, reprPointer(cast[ppointer](p)[])
+      if cast[PPointer](p)[] == nil: add result, "nil"
+      else: add result, reprPointer(cast[PPointer](p)[])
+    of tyUncheckedArray:
+      add result, "[...]"
     else:
       add result, "(invalid data!)"
     inc(cl.recdepth)
 
-proc reprOpenArray(p: pointer, length: int, elemtyp: PNimType): string {.
-                   compilerRtl.} =
-  var
-    cl: TReprClosure
-  initReprClosure(cl)
-  result = "["
-  var bs = elemtyp.size
-  for i in 0..length - 1:
-    if i > 0: add result, ", "
-    reprAux(result, cast[pointer](cast[TAddress](p) + i*bs), elemtyp, cl)
-  add result, "]"
-  deinitReprClosure(cl)
+when not defined(useNimRtl):
+  proc reprOpenArray(p: pointer, length: int, elemtyp: PNimType): string {.
+                     compilerRtl.} =
+    var
+      cl: ReprClosure
+    initReprClosure(cl)
+    result = "["
+    var bs = elemtyp.size
+    for i in 0..length - 1:
+      if i > 0: add result, ", "
+      reprAux(result, cast[pointer](cast[int](p) + i*bs), elemtyp, cl)
+    add result, "]"
+    deinitReprClosure(cl)
 
 when not defined(useNimRtl):
   proc reprAny(p: pointer, typ: PNimType): string =
     var
-      cl: TReprClosure
+      cl: ReprClosure
     initReprClosure(cl)
     result = ""
-    if typ.kind in {tyObject, tyTuple, tyArray, tySet}:
+    if typ.kind in {tyObject, tyTuple, tyArray, tyArrayConstr, tySet}:
       reprAux(result, p, typ, cl)
     else:
       var p = p
       reprAux(result, addr(p), typ, cl)
-    add result, "\n"
+    when defined(nimLegacyReprWithNewline): # see PR #16034
+      add result, "\n"
     deinitReprClosure(cl)
-
diff --git a/lib/system/repr_impl.nim b/lib/system/repr_impl.nim
new file mode 100644
index 000000000..b9ec1890f
--- /dev/null
+++ b/lib/system/repr_impl.nim
@@ -0,0 +1,15 @@
+#[
+other APIs common to system/repr and system/reprjs could be refactored here, eg:
+* reprChar
+* reprBool
+* reprStr
+
+Another possibility in future work would be to have a single include file instead
+of system/repr and system/reprjs, and use `when defined(js)` inside it.
+]#
+
+proc reprDiscriminant*(e: int, typ: PNimType): string {.compilerRtl.} =
+  case typ.kind
+  of tyEnum: reprEnum(e, typ)
+  of tyBool: $(e != 0)
+  else: $e
diff --git a/lib/system/repr_v2.nim b/lib/system/repr_v2.nim
new file mode 100644
index 000000000..d2aef536c
--- /dev/null
+++ b/lib/system/repr_v2.nim
@@ -0,0 +1,194 @@
+include system/inclrtl
+
+when defined(nimPreviewSlimSystem):
+  import std/formatfloat
+
+proc isNamedTuple(T: typedesc): bool {.magic: "TypeTrait".}
+  ## imported from typetraits
+
+proc distinctBase(T: typedesc, recursive: static bool = true): typedesc {.magic: "TypeTrait".}
+  ## imported from typetraits
+
+proc rangeBase(T: typedesc): typedesc {.magic: "TypeTrait".}
+  # skip one level of range; return the base type of a range type
+
+proc repr*(x: NimNode): string {.magic: "Repr", noSideEffect.}
+
+proc repr*(x: int): string =
+  ## Same as $x
+  $x
+
+proc repr*(x: int64): string =
+  ## Same as $x
+  $x
+
+proc repr*(x: uint64): string {.noSideEffect.} =
+  ## Same as $x
+  $x
+
+proc repr*(x: float): string =
+  ## Same as $x
+  $x
+
+proc repr*(x: bool): string {.magic: "BoolToStr", noSideEffect.}
+  ## repr for a boolean argument. Returns `x`
+  ## converted to the string "false" or "true".
+
+proc repr*(x: char): string {.noSideEffect, raises: [].} =
+  ## repr for a character argument. Returns `x`
+  ## converted to an escaped string.
+  ##
+  ##   ```Nim
+  ##   assert repr('c') == "'c'"
+  ##   ```
+  result = "'"
+  # Elides string creations if not needed
+  if x in {'\\', '\0'..'\31', '\127'..'\255'}:
+    result.add '\\'
+  if x in {'\0'..'\31', '\127'..'\255'}:
+    result.add $x.uint8
+  else:
+    result.add x
+  result.add '\''
+
+proc repr*(x: string | cstring): string {.noSideEffect, raises: [].} =
+  ## repr for a string argument. Returns `x`
+  ## converted to a quoted and escaped string.
+  result = "\""
+  for i in 0..<x.len:
+    if x[i] in {'"', '\\', '\0'..'\31', '\127'..'\255'}:
+      result.add '\\'
+    case x[i]:
+    of '\n':
+      result.add "n\n"
+    of '\0'..'\9', '\11'..'\31', '\127'..'\255':
+      result.add $x[i].uint8
+    else:
+      result.add x[i]
+  result.add '\"'
+
+proc repr*[Enum: enum](x: Enum): string {.magic: "EnumToStr", noSideEffect, raises: [].}
+  ## repr for an enumeration argument. This works for
+  ## any enumeration type thanks to compiler magic.
+  ##
+  ## If a `repr` operator for a concrete enumeration is provided, this is
+  ## used instead. (In other words: *Overwriting* is possible.)
+
+proc reprDiscriminant*(e: int): string {.compilerproc.} =
+  # repr and reprjs can use `PNimType` to symbolize `e`; making this work here
+  # would require a way to pass the set of enum stringified values to cgen.
+  $e
+
+proc repr*(p: pointer): string =
+  ## repr of pointer as its hexadecimal value
+  if p == nil:
+    result = "nil"
+  else:
+    when nimvm:
+      result = "ptr"
+    else:
+      const HexChars = "0123456789ABCDEF"
+      const len = sizeof(pointer) * 2
+      var n = cast[uint](p)
+      result = newString(len)
+      for j in countdown(len-1, 0):
+        result[j] = HexChars[n and 0xF]
+        n = n shr 4
+
+proc repr*(p: proc | iterator {.closure.}): string =
+  ## repr of a proc as its address
+  repr(cast[ptr pointer](unsafeAddr p)[])
+
+template repr*[T: distinct|(range and not enum)](x: T): string =
+  when T is range: # add a branch to handle range
+    repr(rangeBase(typeof(x))(x))
+  elif T is distinct:
+    repr(distinctBase(typeof(x))(x))
+  else:
+    {.error: "cannot happen".}
+
+template repr*(t: typedesc): string = $t
+
+proc reprObject[T: tuple|object](res: var string, x: T) {.noSideEffect, raises: [].} =
+  res.add '('
+  var firstElement = true
+  const isNamed = T is object or isNamedTuple(T)
+  when not isNamed:
+    var count = 0
+  for name, value in fieldPairs(x):
+    if not firstElement: res.add(", ")
+    when isNamed:
+      res.add(name)
+      res.add(": ")
+    else:
+      count.inc
+    res.add repr(value)
+    firstElement = false
+  when not isNamed:
+    if count == 1:
+      res.add(',') # $(1,) should print as the semantically legal (1,)
+  res.add(')')
+
+
+proc repr*[T: tuple|object](x: T): string {.noSideEffect, raises: [].} =
+  ## Generic `repr` operator for tuples that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##   $(23, 45) == "(23, 45)"
+  ##   $(a: 23, b: 45) == "(a: 23, b: 45)"
+  ##   $() == "()"
+  ##   ```
+  when T is object:
+    result = $typeof(x)
+  reprObject(result, x)
+
+proc repr*[T](x: ref T | ptr T): string {.noSideEffect, raises: [].} =
+  if isNil(x): return "nil"
+  when T is object:
+    result = $typeof(x)
+    reprObject(result, x[])
+  else:
+    result = when typeof(x) is ref: "ref " else: "ptr "
+    result.add repr(x[])
+
+proc collectionToRepr[T](x: T, prefix, separator, suffix: string): string {.noSideEffect, raises: [].} =
+  result = prefix
+  var firstElement = true
+  for value in items(x):
+    if firstElement:
+      firstElement = false
+    else:
+      result.add(separator)
+    result.add repr(value)
+  result.add(suffix)
+
+proc repr*[T](x: set[T]): string =
+  ## Generic `repr` operator for sets that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##   ${23, 45} == "{23, 45}"
+  ##   ```
+  collectionToRepr(x, "{", ", ", "}")
+
+proc repr*[T](x: seq[T]): string =
+  ## Generic `repr` operator for seqs that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##   $(@[23, 45]) == "@[23, 45]"
+  ##   ```
+  collectionToRepr(x, "@[", ", ", "]")
+
+proc repr*[T, IDX](x: array[IDX, T]): string =
+  ## Generic `repr` operator for arrays that is lifted from the components.
+  collectionToRepr(x, "[", ", ", "]")
+
+proc repr*[T](x: openArray[T]): string =
+  ## Generic `repr` operator for openarrays that is lifted from the components
+  ## of `x`. Example:
+  ##   ```Nim
+  ##   $(@[23, 45].toOpenArray(0, 1)) == "[23, 45]"
+  ##   ```
+  collectionToRepr(x, "[", ", ", "]")
+
+proc repr*[T](x: UncheckedArray[T]): string =
+  "[...]"
diff --git a/lib/system/reprjs.nim b/lib/system/reprjs.nim
index fd1cb5c8b..761d66aec 100644
--- a/lib/system/reprjs.nim
+++ b/lib/system/reprjs.nim
@@ -1,23 +1,251 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2012 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
 #
+# The generic ``repr`` procedure for the javascript backend.
 
-proc reprInt(x: int64): string {.compilerproc.} = return $x
+when defined(nimPreviewSlimSystem):
+  import std/formatfloat
+
+proc reprInt(x: int64): string {.compilerproc.} = $x
+proc reprInt(x: uint64): string {.compilerproc.} = $x
+proc reprInt(x: int): string {.compilerproc.} = $x
+proc reprFloat(x: float): string {.compilerproc.} = $x
+
+proc reprPointer(p: pointer): string {.compilerproc.} =
+  # Do we need to generate the full 8bytes ? In js a pointer is an int anyway
+  var tmp: int
+  {.emit: "`tmp` = `p`_Idx || 0;".}
+  result = $tmp
+
+proc reprBool(x: bool): string {.compilerRtl.} =
+  if x: result = "true"
+  else: result = "false"
 
 proc reprEnum(e: int, typ: PNimType): string {.compilerRtl.} =
-  if ntfEnumHole notin typ.flags:
-    if e <% typ.node.len:
-      return $typ.node.sons[e].name
+  var tmp: bool
+  let item = typ.node.sons[e]
+  {.emit: "`tmp` = `item` !== undefined;".}
+  if tmp:
+    result = makeNimstrLit(item.name)
+  else:
+    result = $e & " (invalid data!)"
+
+include system/repr_impl
+
+proc reprChar(x: char): string {.compilerRtl.} =
+  result = "\'"
+  case x
+  of '"': add(result, "\\\"")
+  of '\\': add(result, "\\\\")
+  of '\127'..'\255', '\0'..'\31': add(result, "\\" & reprInt(ord(x)))
+  else: add(result, x)
+  add(result, "\'")
+
+proc reprStrAux(result: var string, s: cstring | string, len: int) =
+  add(result, "\"")
+  for i in 0 .. len-1:
+    let c = s[i]
+    case c
+    of '"': add(result, "\\\"")
+    of '\\': add(result, "\\\\")
+    #of '\10': add(result, "\\10\"\n\"")
+    of '\127'..'\255', '\0'..'\31':
+      add(result, "\\" & reprInt(ord(c)))
+    else:
+      add(result, c)
+  add(result, "\"")
+
+proc reprStr(s: string): string {.compilerRtl.} =
+  reprStrAux(result, s, s.len)
+
+proc addSetElem(result: var string, elem: int, typ: PNimType) =
+  # Dispatch each set element to the correct repr<Type> proc
+  case typ.kind:
+  of tyEnum: add(result, reprEnum(elem, typ))
+  of tyBool: add(result, reprBool(bool(elem)))
+  of tyChar: add(result, reprChar(chr(elem)))
+  of tyRange: addSetElem(result, elem, typ.base) # Note the base to advance towards the element type
+  of tyInt..tyInt64, tyUInt8, tyUInt16: add result, reprInt(elem)
+  else: # data corrupt --> inform the user
+    add(result, " (invalid data!)")
+
+iterator setKeys(s: int): int {.inline.} =
+  # The type of s is a lie, but it's expected to be a set.
+  # Iterate over the JS object representing a set
+  # and returns the keys as int.
+  var len: int
+  var yieldRes: int
+  var i: int = 0
+  {. emit: """
+  var setObjKeys = Object.getOwnPropertyNames(`s`);
+  `len` = setObjKeys.length;
+  """ .}
+  while i < len:
+    {. emit: "`yieldRes` = parseInt(setObjKeys[`i`],10);\n" .}
+    yield yieldRes
+    inc i
+
+proc reprSetAux(result: var string, s: int, typ: PNimType) =
+  add(result, "{")
+  var first: bool = true
+  for el in setKeys(s):
+    if first:
+      first = false
+    else:
+      add(result, ", ")
+    addSetElem(result, el, typ.base)
+  add(result, "}")
+
+proc reprSet(e: int, typ: PNimType): string {.compilerRtl.} =
+  reprSetAux(result, e, typ)
+
+type
+  ReprClosure {.final.} = object
+    recDepth: int       # do not recurse endlessly
+    indent: int         # indentation
+
+proc initReprClosure(cl: var ReprClosure) =
+  cl.recDepth = -1 # default is to display everything!
+  cl.indent = 0
+
+proc reprAux(result: var string, p: pointer, typ: PNimType, cl: var ReprClosure)
+
+proc reprArray(a: pointer, typ: PNimType,
+              cl: var ReprClosure): string {.compilerRtl.} =
+  # We prepend @ to seq, the C backend prepends the pointer to the seq.
+  result = if typ.kind == tySequence: "@[" else: "["
+  var len: int = 0
+
+  {. emit: "`len` = `a`.length;\n" .}
+  var dereffed: pointer = a
+  for i in 0 .. len-1:
+    if i > 0 :
+      add(result, ", ")
+    # advance pointer and point to element at index
+    {. emit: """
+    `dereffed`_Idx = `i`;
+    `dereffed` = `a`[`dereffed`_Idx];
+    """ .}
+    reprAux(result, dereffed, typ.base, cl)
+
+  add(result, "]")
+
+proc isPointedToNil(p: pointer): bool =
+  {. emit: "if (`p` === null) {`result` = true;}\n" .}
+
+proc reprRef(result: var string, p: pointer, typ: PNimType,
+          cl: var ReprClosure) =
+  if p.isPointedToNil:
+    add(result, "nil")
+    return
+  add(result, "ref " & reprPointer(p))
+  add(result, " --> ")
+  if typ.base.kind != tyArray:
+    {. emit: """
+    if (`p` != null && `p`.length > 0) {
+      `p` = `p`[`p`_Idx];
+    }
+    """ .}
+  reprAux(result, p, typ.base, cl)
+
+proc reprRecordAux(result: var string, o: pointer, typ: PNimType, cl: var ReprClosure) =
+  add(result, "[")
+
+  var first = true
+  var val = o
+  if typ.node.len == 0:
+    # if the object has only one field, len is 0  and sons is nil, the field is in node
+    let key: cstring = typ.node.name
+    add(result, $key & " = ")
+    {. emit: "`val` = `o`[`key`];\n" .}
+    reprAux(result, val, typ.node.typ, cl)
+  else:
+    # if the object has more than one field, sons is not nil and contains the fields.
+    for i in 0 .. typ.node.len-1:
+      if first: first = false
+      else: add(result, ",\n")
+
+      let key: cstring = typ.node.sons[i].name
+      add(result, $key & " = ")
+      {. emit: "`val` = `o`[`key`];\n" .} # access the field by name
+      reprAux(result, val, typ.node.sons[i].typ, cl)
+  add(result, "]")
+
+proc reprRecord(o: pointer, typ: PNimType, cl: var ReprClosure): string {.compilerRtl.} =
+  reprRecordAux(result, o, typ, cl)
+
+
+proc reprJsonStringify(p: int): string {.compilerRtl.} =
+  # As a last resort, use stringify
+  # We use this for tyOpenArray, tyVarargs while genTypeInfo is not implemented
+  var tmp: cstring
+  {. emit: "`tmp` = JSON.stringify(`p`);\n" .}
+  result = $tmp
+
+proc reprAux(result: var string, p: pointer, typ: PNimType,
+            cl: var ReprClosure) =
+  if cl.recDepth == 0:
+    add(result, "...")
+    return
+  dec(cl.recDepth)
+  case typ.kind
+  of tyInt..tyInt32, tyUInt..tyUInt32:
+    add(result, reprInt(cast[int](p)))
+  of tyInt64:
+    add(result, reprInt(cast[int64](p)))
+  of tyUInt64:
+    add(result, reprInt(cast[uint64](p)))
+  of tyChar:
+    add(result, reprChar(cast[char](p)))
+  of tyBool:
+    add(result, reprBool(cast[bool](p)))
+  of tyFloat..tyFloat128:
+    add(result, reprFloat(cast[float](p)))
+  of tyString:
+    var fp: int
+    {. emit: "`fp` = `p`;\n" .}
+    add(result, reprStr(cast[string](p)))
+  of tyCstring:
+    var fp: cstring
+    {. emit: "`fp` = `p`;\n" .}
+    if fp.isNil:
+      add(result, "nil")
+    else:
+      reprStrAux(result, fp, fp.len)
+  of tyEnum, tyOrdinal:
+    var fp: int
+    {. emit: "`fp` = `p`;\n" .}
+    add(result, reprEnum(fp, typ))
+  of tySet:
+    var fp: int
+    {. emit: "`fp` = `p`;\n" .}
+    add(result, reprSet(fp, typ))
+  of tyRange: reprAux(result, p, typ.base, cl)
+  of tyObject, tyTuple:
+    add(result, reprRecord(p, typ, cl))
+  of tyArray, tyArrayConstr, tySequence:
+    add(result, reprArray(p, typ, cl))
+  of tyPointer:
+    add(result, reprPointer(p))
+  of tyPtr, tyRef:
+    reprRef(result, p, typ, cl)
+  of tyProc:
+    if p.isPointedToNil:
+      add(result, "nil")
+    else:
+      add(result, reprPointer(p))
   else:
-    # ugh we need a slow linear search:
-    var n = typ.node
-    var s = n.sons
-    for i in 0 .. n.len-1:
-      if s[i].offset == e: return $s[i].name
-  result = $e & " (invalid data!)"
+    add(result, "(invalid data!)" & reprJsonStringify(cast[int](p)))
+  inc(cl.recDepth)
 
+proc reprAny(p: pointer, typ: PNimType): string {.compilerRtl.} =
+  var cl: ReprClosure
+  initReprClosure(cl)
+  reprAux(result, p, typ, cl)
+  when defined(nimLegacyReprWithNewline): # see PR #16034
+    add result, "\n"
diff --git a/lib/system/seqs_v2.nim b/lib/system/seqs_v2.nim
new file mode 100644
index 000000000..572e77408
--- /dev/null
+++ b/lib/system/seqs_v2.nim
@@ -0,0 +1,227 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2017 Nim contributors
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+
+# import std/typetraits
+# strs already imported allocateds for us.
+
+
+# Some optimizations here may be not to empty-seq-initialize some symbols, then StrictNotNil complains.
+{.push warning[StrictNotNil]: off.}  # See https://github.com/nim-lang/Nim/issues/21401
+
+
+## Default seq implementation used by Nim's core.
+type
+  NimSeqPayloadBase = object
+    cap: int
+
+  NimSeqPayload[T] = object
+    cap: int
+    data: UncheckedArray[T]
+
+  NimSeqV2*[T] = object # \
+    # if you change this implementation, also change seqs_v2_reimpl.nim!
+    len: int
+    p: ptr NimSeqPayload[T]
+
+  NimRawSeq = object
+    len: int
+    p: pointer
+
+const nimSeqVersion {.core.} = 2
+
+# XXX make code memory safe for overflows in '*'
+
+proc newSeqPayload(cap, elemSize, elemAlign: int): pointer {.compilerRtl, raises: [].} =
+  # we have to use type erasure here as Nim does not support generic
+  # compilerProcs. Oh well, this will all be inlined anyway.
+  if cap > 0:
+    var p = cast[ptr NimSeqPayloadBase](alignedAlloc0(align(sizeof(NimSeqPayloadBase), elemAlign) + cap * elemSize, elemAlign))
+    p.cap = cap
+    result = p
+  else:
+    result = nil
+
+proc newSeqPayloadUninit(cap, elemSize, elemAlign: int): pointer {.compilerRtl, raises: [].} =
+  # Used in `newSeqOfCap()`.
+  if cap > 0:
+    var p = cast[ptr NimSeqPayloadBase](alignedAlloc(align(sizeof(NimSeqPayloadBase), elemAlign) + cap * elemSize, elemAlign))
+    p.cap = cap
+    result = p
+  else:
+    result = nil
+
+template `+!`(p: pointer, s: int): pointer =
+  cast[pointer](cast[int](p) +% s)
+
+template `-!`(p: pointer, s: int): pointer =
+  cast[pointer](cast[int](p) -% s)
+
+proc prepareSeqAdd(len: int; p: pointer; addlen, elemSize, elemAlign: int): pointer {.
+    noSideEffect, tags: [], raises: [], compilerRtl.} =
+  {.noSideEffect.}:
+    let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
+    if addlen <= 0:
+      result = p
+    elif p == nil:
+      result = newSeqPayload(len+addlen, elemSize, elemAlign)
+    else:
+      # Note: this means we cannot support things that have internal pointers as
+      # they get reallocated here. This needs to be documented clearly.
+      var p = cast[ptr NimSeqPayloadBase](p)
+      let oldCap = p.cap and not strlitFlag
+      let newCap = max(resize(oldCap), len+addlen)
+      var q: ptr NimSeqPayloadBase
+      if (p.cap and strlitFlag) == strlitFlag:
+        q = cast[ptr NimSeqPayloadBase](alignedAlloc(headerSize + elemSize * newCap, elemAlign))
+        copyMem(q +! headerSize, p +! headerSize, len * elemSize)
+      else:
+        let oldSize = headerSize + elemSize * oldCap
+        let newSize = headerSize + elemSize * newCap
+        q = cast[ptr NimSeqPayloadBase](alignedRealloc(p, oldSize, newSize, elemAlign))
+
+      zeroMem(q +! headerSize +! len * elemSize, addlen * elemSize)
+      q.cap = newCap
+      result = q
+
+proc zeroNewElements(len: int; q: pointer; addlen, elemSize, elemAlign: int) {.
+    noSideEffect, tags: [], raises: [], compilerRtl.} =
+  {.noSideEffect.}:
+    let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
+    zeroMem(q +! headerSize +! len * elemSize, addlen * elemSize)
+
+proc prepareSeqAddUninit(len: int; p: pointer; addlen, elemSize, elemAlign: int): pointer {.
+    noSideEffect, tags: [], raises: [], compilerRtl.} =
+  {.noSideEffect.}:
+    let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
+    if addlen <= 0:
+      result = p
+    elif p == nil:
+      result = newSeqPayloadUninit(len+addlen, elemSize, elemAlign)
+    else:
+      # Note: this means we cannot support things that have internal pointers as
+      # they get reallocated here. This needs to be documented clearly.
+      var p = cast[ptr NimSeqPayloadBase](p)
+      let oldCap = p.cap and not strlitFlag
+      let newCap = max(resize(oldCap), len+addlen)
+      if (p.cap and strlitFlag) == strlitFlag:
+        var q = cast[ptr NimSeqPayloadBase](alignedAlloc(headerSize + elemSize * newCap, elemAlign))
+        copyMem(q +! headerSize, p +! headerSize, len * elemSize)
+        q.cap = newCap
+        result = q
+      else:
+        let oldSize = headerSize + elemSize * oldCap
+        let newSize = headerSize + elemSize * newCap
+        var q = cast[ptr NimSeqPayloadBase](alignedRealloc(p, oldSize, newSize, elemAlign))
+        q.cap = newCap
+        result = q
+
+proc shrink*[T](x: var seq[T]; newLen: Natural) {.tags: [], raises: [].} =
+  when nimvm:
+    {.cast(tags: []).}:
+      setLen(x, newLen)
+  else:
+    #sysAssert newLen <= x.len, "invalid newLen parameter for 'shrink'"
+    when not supportsCopyMem(T):
+      for i in countdown(x.len - 1, newLen):
+        reset x[i]
+    # XXX This is wrong for const seqs that were moved into 'x'!
+    {.noSideEffect.}:
+      cast[ptr NimSeqV2[T]](addr x).len = newLen
+
+proc grow*[T](x: var seq[T]; newLen: Natural; value: T) {.nodestroy.} =
+  let oldLen = x.len
+  #sysAssert newLen >= x.len, "invalid newLen parameter for 'grow'"
+  if newLen <= oldLen: return
+  var xu = cast[ptr NimSeqV2[T]](addr x)
+  if xu.p == nil or (xu.p.cap and not strlitFlag) < newLen:
+    xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newLen - oldLen, sizeof(T), alignof(T)))
+  xu.len = newLen
+  for i in oldLen .. newLen-1:
+    wasMoved(xu.p.data[i])
+    `=copy`(xu.p.data[i], value)
+
+proc add*[T](x: var seq[T]; y: sink T) {.magic: "AppendSeqElem", noSideEffect, nodestroy.} =
+  ## Generic proc for adding a data item `y` to a container `x`.
+  ##
+  ## For containers that have an order, `add` means *append*. New generic
+  ## containers should also call their adding proc `add` for consistency.
+  ## Generic code becomes much easier to write if the Nim naming scheme is
+  ## respected.
+  {.cast(noSideEffect).}:
+    let oldLen = x.len
+    var xu = cast[ptr NimSeqV2[T]](addr x)
+    if xu.p == nil or (xu.p.cap and not strlitFlag) < oldLen+1:
+      xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, 1, sizeof(T), alignof(T)))
+    xu.len = oldLen+1
+    # .nodestroy means `xu.p.data[oldLen] = value` is compiled into a
+    # copyMem(). This is fine as know by construction that
+    # in `xu.p.data[oldLen]` there is nothing to destroy.
+    # We also save the `wasMoved + destroy` pair for the sink parameter.
+    xu.p.data[oldLen] = y
+
+proc setLen[T](s: var seq[T], newlen: Natural) {.nodestroy.} =
+  {.noSideEffect.}:
+    if newlen < s.len:
+      shrink(s, newlen)
+    else:
+      let oldLen = s.len
+      if newlen <= oldLen: return
+      var xu = cast[ptr NimSeqV2[T]](addr s)
+      if xu.p == nil or (xu.p.cap and not strlitFlag) < newlen:
+        xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newlen - oldLen, sizeof(T), alignof(T)))
+      xu.len = newlen
+      for i in oldLen..<newlen:
+        xu.p.data[i] = default(T)
+
+proc newSeq[T](s: var seq[T], len: Natural) =
+  shrink(s, 0)
+  setLen(s, len)
+
+proc sameSeqPayload(x: pointer, y: pointer): bool {.compilerRtl, inl.} =
+  result = cast[ptr NimRawSeq](x)[].p == cast[ptr NimRawSeq](y)[].p
+
+
+func capacity*[T](self: seq[T]): int {.inline.} =
+  ## Returns the current capacity of the seq.
+  # See https://github.com/nim-lang/RFCs/issues/460
+  runnableExamples:
+    var lst = newSeqOfCap[string](cap = 42)
+    lst.add "Nim"
+    assert lst.capacity == 42
+
+  let sek = cast[ptr NimSeqV2[T]](unsafeAddr self)
+  result = if sek.p != nil: sek.p.cap and not strlitFlag else: 0
+
+func setLenUninit*[T](s: var seq[T], newlen: Natural) {.nodestroy.} =
+  ## Sets the length of seq `s` to `newlen`. `T` may be any sequence type.
+  ## New slots will not be initialized.
+  ##
+  ## If the current length is greater than the new length,
+  ## `s` will be truncated.
+  ##   ```nim
+  ##   var x = @[10, 20]
+  ##   x.setLenUninit(5)
+  ##   x[4] = 50
+  ##   assert x[4] == 50
+  ##   x.setLenUninit(1)
+  ##   assert x == @[10]
+  ##   ```
+  {.noSideEffect.}:
+    if newlen < s.len:
+      shrink(s, newlen)
+    else:
+      let oldLen = s.len
+      if newlen <= oldLen: return
+      var xu = cast[ptr NimSeqV2[T]](addr s)
+      if xu.p == nil or (xu.p.cap and not strlitFlag) < newlen:
+        xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newlen - oldLen, sizeof(T), alignof(T)))
+      xu.len = newlen
+
+{.pop.}  # See https://github.com/nim-lang/Nim/issues/21401
diff --git a/lib/system/seqs_v2_reimpl.nim b/lib/system/seqs_v2_reimpl.nim
new file mode 100644
index 000000000..09b7e7ac4
--- /dev/null
+++ b/lib/system/seqs_v2_reimpl.nim
@@ -0,0 +1,24 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2020 Nim contributors
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+type
+  NimSeqPayloadReimpl = object
+    cap: int
+    data: pointer
+
+  NimSeqV2Reimpl = object
+    len: int
+    p: ptr NimSeqPayloadReimpl
+
+template frees(s: NimSeqV2Reimpl) =
+  if s.p != nil and (s.p.cap and strlitFlag) != strlitFlag:
+    when compileOption("threads"):
+      deallocShared(s.p)
+    else:
+      dealloc(s.p)
\ No newline at end of file
diff --git a/lib/system/setops.nim b/lib/system/setops.nim
new file mode 100644
index 000000000..67aa3097a
--- /dev/null
+++ b/lib/system/setops.nim
@@ -0,0 +1,89 @@
+func incl*[T](x: var set[T], y: T) {.magic: "Incl".} =
+  ## Includes element `y` in the set `x`.
+  ##
+  ## This is the same as `x = x + {y}`, but it might be more efficient.
+  runnableExamples:
+    var a = {1, 3, 5}
+    a.incl(2)
+    assert a == {1, 2, 3, 5}
+    a.incl(4)
+    assert a == {1, 2, 3, 4, 5}
+
+when not defined(nimHasCallsitePragma):
+  {.pragma: callsite.}
+
+template incl*[T](x: var set[T], y: set[T]) {.callsite.} =
+  ## Includes the set `y` in the set `x`.
+  runnableExamples:
+    var a = {1, 3, 5, 7}
+    var b = {4, 5, 6}
+    a.incl(b)
+    assert a == {1, 3, 4, 5, 6, 7}
+  x = x + y
+
+func excl*[T](x: var set[T], y: T) {.magic: "Excl".} =
+  ## Excludes element `y` from the set `x`.
+  ##
+  ## This is the same as `x = x - {y}`, but it might be more efficient.
+  runnableExamples:
+    var b = {2, 3, 5, 6, 12, 54}
+    b.excl(5)
+    assert b == {2, 3, 6, 12, 54}
+
+template excl*[T](x: var set[T], y: set[T]) {.callsite.} =
+  ## Excludes the set `y` from the set `x`.
+  runnableExamples:
+    var a = {1, 3, 5, 7}
+    var b = {3, 4, 5}
+    a.excl(b) 
+    assert a == {1, 7}
+  x = x - y
+
+func card*[T](x: set[T]): int {.magic: "Card".} =
+  ## Returns the cardinality of the set `x`, i.e. the number of elements
+  ## in the set.
+  runnableExamples:
+    var a = {1, 3, 5, 7}
+    assert card(a) == 4
+    var b = {1, 3, 5, 7, 5}
+    assert card(b) == 4 # repeated 5 doesn't count
+
+func len*[T](x: set[T]): int {.magic: "Card".}
+  ## An alias for `card(x)`.
+
+
+func `*`*[T](x, y: set[T]): set[T] {.magic: "MulSet".} =
+  ## This operator computes the intersection of two sets.
+  runnableExamples:
+    assert {1, 2, 3} * {2, 3, 4} == {2, 3}
+
+func `+`*[T](x, y: set[T]): set[T] {.magic: "PlusSet".} =
+  ## This operator computes the union of two sets.
+  runnableExamples:
+    assert {1, 2, 3} + {2, 3, 4} == {1, 2, 3, 4}
+
+func `-`*[T](x, y: set[T]): set[T] {.magic: "MinusSet".} =
+  ## This operator computes the difference of two sets.
+  runnableExamples:
+    assert {1, 2, 3} - {2, 3, 4} == {1}
+
+func contains*[T](x: set[T], y: T): bool {.magic: "InSet".} =
+  ## One should overload this proc if one wants to overload the `in` operator.
+  ##
+  ## The parameters are in reverse order! `a in b` is a template for
+  ## `contains(b, a)`.
+  ## This is because the unification algorithm that Nim uses for overload
+  ## resolution works from left to right.
+  ## But for the `in` operator that would be the wrong direction for this
+  ## piece of code:
+  runnableExamples:
+    var s: set[range['a'..'z']] = {'a'..'c'}
+    assert s.contains('c')
+    assert 'b' in s
+    assert 'd' notin s
+    assert set['a'..'z'] is set[range['a'..'z']]
+  ## If `in` had been declared as `[T](elem: T, s: set[T])` then `T` would
+  ## have been bound to `char`. But `s` is not compatible to type
+  ## `set[char]`! The solution is to bind `T` to `range['a'..'z']`. This
+  ## is achieved by reversing the parameters for `contains`; `in` then
+  ## passes its arguments in reverse order.
diff --git a/lib/system/sets.nim b/lib/system/sets.nim
index 043d37533..97431c296 100755..100644
--- a/lib/system/sets.nim
+++ b/lib/system/sets.nim
@@ -1,6 +1,6 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2012 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
@@ -9,20 +9,20 @@
 
 # set handling
 
-type
-  TNimSet = array [0..4*2048-1, int8]
 
-proc countBits32(n: int32): int {.compilerproc.} =
-  var v = n
-  v = v -% ((v shr 1'i32) and 0x55555555'i32)
-  v = (v and 0x33333333'i32) +% ((v shr 2'i32) and 0x33333333'i32)
-  result = ((v +% (v shr 4'i32) and 0xF0F0F0F'i32) *% 0x1010101'i32) shr 24'i32
+proc cardSetImpl(s: ptr UncheckedArray[uint8], len: int): int {.inline.} =
+  var i = 0
+  result = 0
+  var num = 0'u64
+  when defined(x86) or defined(amd64):
+    while i < len - 8:
+      copyMem(addr num, addr s[i], 8)
+      inc(result, countBits64(num))
+      inc(i, 8)
 
-proc countBits64(n: int64): int {.compilerproc.} = 
-  result = countBits32(toU32(n and 0xffff'i64)) +
-           countBits32(toU32(n shr 16'i64))
+  while i < len:
+    inc(result, countBits32(uint32(s[i])))
+    inc(i, 1)
 
-proc cardSet(s: TNimSet, len: int): int {.compilerproc.} =
-  result = 0
-  for i in countup(0, len-1):
-    inc(result, countBits32(int32(ze(s[i]))))
+proc cardSet(s: ptr UncheckedArray[uint8], len: int): int {.compilerproc, inline.} =
+  result = cardSetImpl(s, len)
diff --git a/lib/system/stacktraces.nim b/lib/system/stacktraces.nim
new file mode 100644
index 000000000..42be9d94f
--- /dev/null
+++ b/lib/system/stacktraces.nim
@@ -0,0 +1,83 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Additional code for customizable stack traces. Unstable API, for internal
+# usage only.
+
+const
+  reraisedFromBegin* = -10
+  reraisedFromEnd* = -100
+  maxStackTraceLines* = 128
+
+when defined(nimStackTraceOverride):
+  ## Procedure types for overriding the default stack trace.
+  type
+    cuintptr_t* {.importc: "uintptr_t", nodecl.} = uint
+      ## This is the same as the type `uintptr_t` in C.
+
+    StackTraceOverrideGetTracebackProc* = proc (): string {.
+      nimcall, gcsafe, raises: [], tags: [], noinline.}
+    StackTraceOverrideGetProgramCountersProc* = proc (maxLength: cint): seq[cuintptr_t] {.
+      nimcall, gcsafe, raises: [], tags: [], noinline.}
+    StackTraceOverrideGetDebuggingInfoProc* =
+      proc (programCounters: seq[cuintptr_t], maxLength: cint): seq[StackTraceEntry] {.
+        nimcall, gcsafe, raises: [], tags: [], noinline.}
+
+  # Default procedures (not normally used, because people opting in on this
+  # override are supposed to register their own versions).
+  var
+    stackTraceOverrideGetTraceback: StackTraceOverrideGetTracebackProc =
+      proc (): string {.nimcall, gcsafe, raises: [], tags: [], noinline.} =
+        discard
+        #result = "Stack trace override procedure not registered.\n"
+    stackTraceOverrideGetProgramCounters: StackTraceOverrideGetProgramCountersProc =
+      proc (maxLength: cint): seq[cuintptr_t] {.nimcall, gcsafe, raises: [], tags: [], noinline.} =
+        discard
+    stackTraceOverrideGetDebuggingInfo: StackTraceOverrideGetDebuggingInfoProc =
+      proc (programCounters: seq[cuintptr_t], maxLength: cint): seq[StackTraceEntry] {.
+        nimcall, gcsafe, raises: [], tags: [], noinline.} =
+          discard
+
+  # Custom procedure registration.
+  proc registerStackTraceOverride*(overrideProc: StackTraceOverrideGetTracebackProc) =
+    ## Override the default stack trace inside rawWriteStackTrace() with your
+    ## own procedure.
+    stackTraceOverrideGetTraceback = overrideProc
+  proc registerStackTraceOverrideGetProgramCounters*(overrideProc: StackTraceOverrideGetProgramCountersProc) =
+    stackTraceOverrideGetProgramCounters = overrideProc
+  proc registerStackTraceOverrideGetDebuggingInfo*(overrideProc: StackTraceOverrideGetDebuggingInfoProc) =
+    stackTraceOverrideGetDebuggingInfo = overrideProc
+
+  # Custom stack trace manipulation.
+  proc auxWriteStackTraceWithOverride*(s: var string) =
+    add(s, stackTraceOverrideGetTraceback())
+
+  proc auxWriteStackTraceWithOverride*(s: var seq[StackTraceEntry]) =
+    let programCounters = stackTraceOverrideGetProgramCounters(maxStackTraceLines)
+    if s.len == 0:
+      s = newSeqOfCap[StackTraceEntry](programCounters.len)
+    for programCounter in programCounters:
+      s.add(StackTraceEntry(programCounter: cast[uint](programCounter)))
+
+  # We may have more stack trace lines in the output, due to inlined procedures.
+  proc addDebuggingInfo*(s: seq[StackTraceEntry]): seq[StackTraceEntry] =
+    var programCounters: seq[cuintptr_t]
+    # We process program counters in groups from complete stack traces, because
+    # we have logic that keeps track of certain functions being inlined or not.
+    for entry in s:
+      if entry.procname.isNil and entry.programCounter != 0:
+        programCounters.add(cast[cuintptr_t](entry.programCounter))
+      elif entry.procname.isNil and (entry.line == reraisedFromBegin or entry.line == reraisedFromEnd):
+        result.add(stackTraceOverrideGetDebuggingInfo(programCounters, maxStackTraceLines))
+        programCounters = @[]
+        result.add(entry)
+      else:
+        result.add(entry)
+    if programCounters.len > 0:
+      result.add(stackTraceOverrideGetDebuggingInfo(programCounters, maxStackTraceLines))
diff --git a/lib/system/strmantle.nim b/lib/system/strmantle.nim
new file mode 100644
index 000000000..89046253b
--- /dev/null
+++ b/lib/system/strmantle.nim
@@ -0,0 +1,263 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2018 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Compilerprocs for strings that do not depend on the string implementation.
+
+import std/private/digitsutils
+
+
+proc cmpStrings(a, b: string): int {.inline, compilerproc.} =
+  let alen = a.len
+  let blen = b.len
+  let minlen = min(alen, blen)
+  if minlen > 0:
+    result = c_memcmp(unsafeAddr a[0], unsafeAddr b[0], cast[csize_t](minlen)).int
+    if result == 0:
+      result = alen - blen
+  else:
+    result = alen - blen
+
+proc leStrings(a, b: string): bool {.inline, compilerproc.} =
+  # required by upcoming backends (NIR).
+  cmpStrings(a, b) <= 0
+
+proc ltStrings(a, b: string): bool {.inline, compilerproc.} =
+  # required by upcoming backends (NIR).
+  cmpStrings(a, b) < 0
+
+proc eqStrings(a, b: string): bool {.inline, compilerproc.} =
+  let alen = a.len
+  let blen = b.len
+  if alen == blen:
+    if alen == 0: return true
+    return equalMem(unsafeAddr(a[0]), unsafeAddr(b[0]), alen)
+
+proc hashString(s: string): int {.compilerproc.} =
+  # the compiler needs exactly the same hash function!
+  # this used to be used for efficient generation of string case statements
+  var h = 0'u
+  for i in 0..len(s)-1:
+    h = h + uint(s[i])
+    h = h + h shl 10
+    h = h xor (h shr 6)
+  h = h + h shl 3
+  h = h xor (h shr 11)
+  h = h + h shl 15
+  result = cast[int](h)
+
+proc eqCstrings(a, b: cstring): bool {.inline, compilerproc.} =
+  if pointer(a) == pointer(b): result = true
+  elif a.isNil or b.isNil: result = false
+  else: result = c_strcmp(a, b) == 0
+
+proc hashCstring(s: cstring): int {.compilerproc.} =
+  # the compiler needs exactly the same hash function!
+  # this used to be used for efficient generation of cstring case statements
+  if s.isNil: return 0
+  var h : uint = 0
+  var i = 0
+  while true:
+    let c = s[i]
+    if c == '\0': break
+    h = h + uint(c)
+    h = h + h shl 10
+    h = h xor (h shr 6)
+    inc i
+  h = h + h shl 3
+  h = h xor (h shr 11)
+  h = h + h shl 15
+  result = cast[int](h)
+
+proc c_strtod(buf: cstring, endptr: ptr cstring): float64 {.
+  importc: "strtod", header: "<stdlib.h>", noSideEffect.}
+
+const
+  IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
+  powtens =  [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
+              1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
+              1e20, 1e21, 1e22]
+
+
+{.push staticBoundChecks: off.}
+
+proc nimParseBiggestFloat(s: openArray[char], number: var BiggestFloat,
+                         ): int {.compilerproc.} =
+  # This routine attempt to parse float that can parsed quickly.
+  # i.e. whose integer part can fit inside a 53bits integer.
+  # their real exponent must also be <= 22. If the float doesn't follow
+  # these restrictions, transform the float into this form:
+  #  INTEGER * 10 ^ exponent and leave the work to standard `strtod()`.
+  # This avoid the problems of decimal character portability.
+  # see: http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+  var
+    i = 0
+    sign = 1.0
+    kdigits, fdigits = 0
+    exponent = 0
+    integer = uint64(0)
+    fracExponent = 0
+    expSign = 1
+    firstDigit = -1
+    hasSign = false
+
+  # Sign?
+  if i < s.len and (s[i] == '+' or s[i] == '-'):
+    hasSign = true
+    if s[i] == '-':
+      sign = -1.0
+    inc(i)
+
+  # NaN?
+  if i+2 < s.len and (s[i] == 'N' or s[i] == 'n'):
+    if s[i+1] == 'A' or s[i+1] == 'a':
+      if s[i+2] == 'N' or s[i+2] == 'n':
+        if i+3 >= s.len or s[i+3] notin IdentChars:
+          number = NaN
+          return i+3
+    return 0
+
+  # Inf?
+  if i+2 < s.len and (s[i] == 'I' or s[i] == 'i'):
+    if s[i+1] == 'N' or s[i+1] == 'n':
+      if s[i+2] == 'F' or s[i+2] == 'f':
+        if i+3 >= s.len or s[i+3] notin IdentChars:
+          number = Inf*sign
+          return i+3
+    return 0
+
+  if i < s.len and s[i] in {'0'..'9'}:
+    firstDigit = (s[i].ord - '0'.ord)
+  # Integer part?
+  while i < s.len and s[i] in {'0'..'9'}:
+    inc(kdigits)
+    integer = integer * 10'u64 + (s[i].ord - '0'.ord).uint64
+    inc(i)
+    while i < s.len and s[i] == '_': inc(i)
+
+  # Fractional part?
+  if i < s.len and s[i] == '.':
+    inc(i)
+    # if no integer part, Skip leading zeros
+    if kdigits <= 0:
+      while i < s.len and s[i] == '0':
+        inc(fracExponent)
+        inc(i)
+        while i < s.len and s[i] == '_': inc(i)
+
+    if firstDigit == -1 and i < s.len and s[i] in {'0'..'9'}:
+      firstDigit = (s[i].ord - '0'.ord)
+    # get fractional part
+    while i < s.len and s[i] in {'0'..'9'}:
+      inc(fdigits)
+      inc(fracExponent)
+      integer = integer * 10'u64 + (s[i].ord - '0'.ord).uint64
+      inc(i)
+      while i < s.len and s[i] == '_': inc(i)
+
+  # if has no digits: return error
+  if kdigits + fdigits <= 0 and
+     (i == 0 or # no char consumed (empty string).
+     (i == 1 and hasSign)): # or only '+' or '-
+    return 0
+
+  if i+1 < s.len and s[i] in {'e', 'E'}:
+    inc(i)
+    if s[i] == '+' or s[i] == '-':
+      if s[i] == '-':
+        expSign = -1
+
+      inc(i)
+    if s[i] notin {'0'..'9'}:
+      return 0
+    while i < s.len and s[i] in {'0'..'9'}:
+      exponent = exponent * 10 + (ord(s[i]) - ord('0'))
+      inc(i)
+      while i < s.len and s[i] == '_': inc(i) # underscores are allowed and ignored
+
+  var realExponent = expSign*exponent - fracExponent
+  let expNegative = realExponent < 0
+  var absExponent = abs(realExponent)
+
+  # if exponent greater than can be represented: +/- zero or infinity
+  if absExponent > 999:
+    if integer == 0:
+      number = 0.0
+    elif expNegative:
+      number = 0.0*sign
+    else:
+      number = Inf*sign
+    return i
+
+  # if integer is representable in 53 bits:  fast path
+  # max fast path integer is  1<<53 - 1 or  8999999999999999 (16 digits)
+  let digits = kdigits + fdigits
+  if digits <= 15 or (digits <= 16 and firstDigit <= 8):
+    # max float power of ten with set bits above the 53th bit is 10^22
+    if absExponent <= 22:
+      if expNegative:
+        number = sign * integer.float / powtens[absExponent]
+      else:
+        number = sign * integer.float * powtens[absExponent]
+      return i
+
+    # if exponent is greater try to fit extra exponent above 22 by multiplying
+    # integer part is there is space left.
+    let slop = 15 - kdigits - fdigits
+    if absExponent <= 22 + slop and not expNegative:
+      number = sign * integer.float * powtens[slop] * powtens[absExponent-slop]
+      return i
+
+  # if failed: slow path with strtod.
+  var t: array[500, char] # flaviu says: 325 is the longest reasonable literal
+  var ti = 0
+  let maxlen = t.high - "e+000".len # reserve enough space for exponent
+
+  let endPos = i
+  result = endPos
+  i = 0
+  # re-parse without error checking, any error should be handled by the code above.
+  if i < endPos and s[i] == '.': i.inc
+  while i < endPos and s[i] in {'0'..'9','+','-'}:
+    if ti < maxlen:
+      t[ti] = s[i]; inc(ti)
+    inc(i)
+    while i < endPos and s[i] in {'.', '_'}: # skip underscore and decimal point
+      inc(i)
+
+  # insert exponent
+  t[ti] = 'E'
+  inc(ti)
+  t[ti] = if expNegative: '-' else: '+'
+  inc(ti, 4)
+
+  # insert adjusted exponent
+  t[ti-1] = ('0'.ord + absExponent mod 10).char
+  absExponent = absExponent div 10
+  t[ti-2] = ('0'.ord + absExponent mod 10).char
+  absExponent = absExponent div 10
+  t[ti-3] = ('0'.ord + absExponent mod 10).char
+  number = c_strtod(cast[cstring](addr t), nil)
+
+{.pop.} # staticBoundChecks
+
+proc nimBoolToStr(x: bool): string {.compilerRtl.} =
+  return if x: "true" else: "false"
+
+proc nimCharToStr(x: char): string {.compilerRtl.} =
+  result = newString(1)
+  result[0] = x
+
+when defined(gcDestructors):
+  proc GC_getStatistics*(): string =
+    result = "[GC] total memory: "
+    result.addInt getTotalMem()
+    result.add "\n[GC] occupied memory: "
+    result.addInt getOccupiedMem()
+    result.add '\n'
+    #"[GC] cycle collections: " & $gch.stat.cycleCollections & "\n" &
diff --git a/lib/system/strs_v2.nim b/lib/system/strs_v2.nim
new file mode 100644
index 000000000..404b4f78d
--- /dev/null
+++ b/lib/system/strs_v2.nim
@@ -0,0 +1,224 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2017 Nim contributors
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Default new string implementation used by Nim's core.
+
+type
+  NimStrPayloadBase = object
+    cap: int
+
+  NimStrPayload {.core.} = object
+    cap: int
+    data: UncheckedArray[char]
+
+  NimStringV2 {.core.} = object
+    len: int
+    p: ptr NimStrPayload ## can be nil if len == 0.
+
+const nimStrVersion {.core.} = 2
+
+template isLiteral(s): bool = (s.p == nil) or (s.p.cap and strlitFlag) == strlitFlag
+
+template contentSize(cap): int = cap + 1 + sizeof(NimStrPayloadBase)
+
+template frees(s) =
+  if not isLiteral(s):
+    when compileOption("threads"):
+      deallocShared(s.p)
+    else:
+      dealloc(s.p)
+
+template allocPayload(newLen: int): ptr NimStrPayload =
+  when compileOption("threads"):
+    cast[ptr NimStrPayload](allocShared(contentSize(newLen)))
+  else:
+    cast[ptr NimStrPayload](alloc(contentSize(newLen)))
+
+template allocPayload0(newLen: int): ptr NimStrPayload =
+  when compileOption("threads"):
+    cast[ptr NimStrPayload](allocShared0(contentSize(newLen)))
+  else:
+    cast[ptr NimStrPayload](alloc0(contentSize(newLen)))
+
+template reallocPayload(p: pointer, newLen: int): ptr NimStrPayload =
+  when compileOption("threads"):
+    cast[ptr NimStrPayload](reallocShared(p, contentSize(newLen)))
+  else:
+    cast[ptr NimStrPayload](realloc(p, contentSize(newLen)))
+
+template reallocPayload0(p: pointer; oldLen, newLen: int): ptr NimStrPayload =
+  when compileOption("threads"):
+    cast[ptr NimStrPayload](reallocShared0(p, contentSize(oldLen), contentSize(newLen)))
+  else:
+    cast[ptr NimStrPayload](realloc0(p, contentSize(oldLen), contentSize(newLen)))
+
+proc resize(old: int): int {.inline.} =
+  if old <= 0: result = 4
+  elif old <= high(int16): result = old * 2
+  else: result = old * 3 div 2 # for large arrays * 3/2 is better
+
+proc prepareAdd(s: var NimStringV2; addLen: int) {.compilerRtl.} =
+  let newLen = s.len + addLen
+  if isLiteral(s):
+    let oldP = s.p
+    # can't mutate a literal, so we need a fresh copy here:
+    s.p = allocPayload(newLen)
+    s.p.cap = newLen
+    if s.len > 0:
+      # we are about to append, so there is no need to copy the \0 terminator:
+      copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], min(s.len, newLen))
+    elif oldP == nil:
+      # In the case of `newString(0) & ""`, since `src.len == 0`, `appendString`
+      # will not set the `\0` terminator, so we set it here.
+      s.p.data[0] = '\0'
+  else:
+    let oldCap = s.p.cap and not strlitFlag
+    if newLen > oldCap:
+      let newCap = max(newLen, resize(oldCap))
+      s.p = reallocPayload(s.p, newCap)
+      s.p.cap = newCap
+      if newLen < newCap:
+        zeroMem(cast[pointer](addr s.p.data[newLen+1]), newCap - newLen)
+
+proc nimAddCharV1(s: var NimStringV2; c: char) {.compilerRtl, inl.} =
+  #if (s.p == nil) or (s.len+1 > s.p.cap and not strlitFlag):
+  prepareAdd(s, 1)
+  s.p.data[s.len] = c
+  inc s.len
+  s.p.data[s.len] = '\0'
+
+proc toNimStr(str: cstring, len: int): NimStringV2 {.compilerproc.} =
+  if len <= 0:
+    result = NimStringV2(len: 0, p: nil)
+  else:
+    var p = allocPayload(len)
+    p.cap = len
+    copyMem(unsafeAddr p.data[0], str, len+1)
+    result = NimStringV2(len: len, p: p)
+
+proc cstrToNimstr(str: cstring): NimStringV2 {.compilerRtl.} =
+  if str == nil: toNimStr(str, 0)
+  else: toNimStr(str, str.len)
+
+proc nimToCStringConv(s: NimStringV2): cstring {.compilerproc, nonReloadable, inline.} =
+  if s.len == 0: result = cstring""
+  else: result = cast[cstring](unsafeAddr s.p.data)
+
+proc appendString(dest: var NimStringV2; src: NimStringV2) {.compilerproc, inline.} =
+  if src.len > 0:
+    # also copy the \0 terminator:
+    copyMem(unsafeAddr dest.p.data[dest.len], unsafeAddr src.p.data[0], src.len+1)
+    inc dest.len, src.len
+
+proc appendChar(dest: var NimStringV2; c: char) {.compilerproc, inline.} =
+  dest.p.data[dest.len] = c
+  inc dest.len
+  dest.p.data[dest.len] = '\0'
+
+proc rawNewString(space: int): NimStringV2 {.compilerproc.} =
+  # this is also 'system.newStringOfCap'.
+  if space <= 0:
+    result = NimStringV2(len: 0, p: nil)
+  else:
+    var p = allocPayload(space)
+    p.cap = space
+    p.data[0] = '\0'
+    result = NimStringV2(len: 0, p: p)
+
+proc mnewString(len: int): NimStringV2 {.compilerproc.} =
+  if len <= 0:
+    result = NimStringV2(len: 0, p: nil)
+  else:
+    var p = allocPayload0(len)
+    p.cap = len
+    result = NimStringV2(len: len, p: p)
+
+proc setLengthStrV2(s: var NimStringV2, newLen: int) {.compilerRtl.} =
+  if newLen == 0:
+    discard "do not free the buffer here, pattern 's.setLen 0' is common for avoiding allocations"
+  else:
+    if isLiteral(s):
+      let oldP = s.p
+      s.p = allocPayload(newLen)
+      s.p.cap = newLen
+      if s.len > 0:
+        copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], min(s.len, newLen))
+        if newLen > s.len:
+          zeroMem(cast[pointer](addr s.p.data[s.len]), newLen - s.len + 1)
+        else:
+          s.p.data[newLen] = '\0'
+      else:
+        zeroMem(cast[pointer](addr s.p.data[0]), newLen + 1)
+    elif newLen > s.len:
+      let oldCap = s.p.cap and not strlitFlag
+      if newLen > oldCap:
+        let newCap = max(newLen, resize(oldCap))
+        s.p = reallocPayload0(s.p, oldCap, newCap)
+        s.p.cap = newCap
+
+    s.p.data[newLen] = '\0'
+  s.len = newLen
+
+proc nimAsgnStrV2(a: var NimStringV2, b: NimStringV2) {.compilerRtl.} =
+  if a.p == b.p and a.len == b.len: return
+  if isLiteral(b):
+    # we can shallow copy literals:
+    frees(a)
+    a.len = b.len
+    a.p = b.p
+  else:
+    if isLiteral(a) or (a.p.cap and not strlitFlag) < b.len:
+      # we have to allocate the 'cap' here, consider
+      # 'let y = newStringOfCap(); var x = y'
+      # on the other hand... These get turned into moves now.
+      frees(a)
+      a.p = allocPayload(b.len)
+      a.p.cap = b.len
+    a.len = b.len
+    copyMem(unsafeAddr a.p.data[0], unsafeAddr b.p.data[0], b.len+1)
+
+proc nimPrepareStrMutationImpl(s: var NimStringV2) =
+  let oldP = s.p
+  # can't mutate a literal, so we need a fresh copy here:
+  s.p = allocPayload(s.len)
+  s.p.cap = s.len
+  copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], s.len+1)
+
+proc nimPrepareStrMutationV2(s: var NimStringV2) {.compilerRtl, inl.} =
+  if s.p != nil and (s.p.cap and strlitFlag) == strlitFlag:
+    nimPrepareStrMutationImpl(s)
+
+proc prepareMutation*(s: var string) {.inline.} =
+  # string literals are "copy on write", so you need to call
+  # `prepareMutation` before modifying the strings via `addr`.
+  {.cast(noSideEffect).}:
+    let s = unsafeAddr s
+    nimPrepareStrMutationV2(cast[ptr NimStringV2](s)[])
+
+proc nimAddStrV1(s: var NimStringV2; src: NimStringV2) {.compilerRtl, inl.} =
+  #if (s.p == nil) or (s.len+1 > s.p.cap and not strlitFlag):
+  prepareAdd(s, src.len)
+  appendString s, src
+
+proc nimDestroyStrV1(s: NimStringV2) {.compilerRtl, inl.} =
+  frees(s)
+
+proc nimStrAtLe(s: string; idx: int; ch: char): bool {.compilerRtl, inl.} =
+  result = idx < s.len and s[idx] <= ch
+
+func capacity*(self: string): int {.inline.} =
+  ## Returns the current capacity of the string.
+  # See https://github.com/nim-lang/RFCs/issues/460
+  runnableExamples:
+    var str = newStringOfCap(cap = 42)
+    str.add "Nim"
+    assert str.capacity == 42
+
+  let str = cast[ptr NimStringV2](unsafeAddr self)
+  result = if str.p != nil: str.p.cap and not strlitFlag else: 0
diff --git a/lib/system/sysio.nim b/lib/system/sysio.nim
deleted file mode 100755
index 3d5a53f03..000000000
--- a/lib/system/sysio.nim
+++ /dev/null
@@ -1,260 +0,0 @@
-#
-#
-#            Nimrod's Runtime Library
-#        (c) Copyright 2013 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-
-# Nimrod's standard IO library. It contains high-performance
-# routines for reading and writing data to (buffered) files or
-# TTYs.
-
-{.push debugger:off .} # the user does not want to trace a part
-                       # of the standard library!
-
-
-proc fputs(c: cstring, f: TFile) {.importc: "fputs", noDecl, tags: [FWriteIO].}
-proc fgets(c: cstring, n: int, f: TFile): cstring {.importc: "fgets", noDecl,
-                                                    tags: [FReadIO].}
-proc fgetc(stream: TFile): cint {.importc: "fgetc", nodecl, tags: [FReadIO].}
-proc ungetc(c: cint, f: TFile) {.importc: "ungetc", nodecl, tags: [].}
-proc putc(c: Char, stream: TFile) {.importc: "putc", nodecl, tags: [FWriteIO].}
-proc fprintf(f: TFile, frmt: CString) {.importc: "fprintf", nodecl, varargs,
-                                        tags: [FWriteIO].}
-proc strlen(c: cstring): int {.importc: "strlen", nodecl, tags: [].}
-
-
-# C routine that is used here:
-proc fread(buf: Pointer, size, n: int, f: TFile): int {.
-  importc: "fread", noDecl, tags: [FReadIO].}
-proc fseek(f: TFile, offset: clong, whence: int): int {.
-  importc: "fseek", noDecl, tags: [].}
-proc ftell(f: TFile): int {.importc: "ftell", noDecl, tags: [].}
-proc setvbuf(stream: TFile, buf: pointer, typ, size: cint): cint {.
-  importc, nodecl, tags: [].}
-
-{.push stackTrace:off, profiler:off.}
-proc write(f: TFile, c: cstring) = fputs(c, f)
-{.pop.}
-
-var
-  IOFBF {.importc: "_IOFBF", nodecl.}: cint
-  IONBF {.importc: "_IONBF", nodecl.}: cint
-
-const
-  buf_size = 4000
-
-proc raiseEIO(msg: string) {.noinline, noreturn.} =
-  raise newException(EIO, msg)
-
-proc readLine(f: TFile, line: var TaintedString): bool =
-  # of course this could be optimized a bit; but IO is slow anyway...
-  # and it was difficult to get this CORRECT with Ansi C's methods
-  setLen(line.string, 0) # reuse the buffer!
-  while True:
-    var c = fgetc(f)
-    if c < 0'i32:
-      if line.len > 0: break
-      else: return false
-    if c == 10'i32: break # LF
-    if c == 13'i32:  # CR
-      c = fgetc(f) # is the next char LF?
-      if c != 10'i32: ungetc(c, f) # no, put the character back
-      break
-    add line.string, chr(int(c))
-  result = true
-
-proc readLine(f: TFile): TaintedString =
-  result = TaintedString(newStringOfCap(80))
-  if not readLine(f, result): raiseEIO("EOF reached")
-
-proc write(f: TFile, i: int) = 
-  when sizeof(int) == 8:
-    fprintf(f, "%lld", i)
-  else:
-    fprintf(f, "%ld", i)
-
-proc write(f: TFile, i: biggestInt) = 
-  when sizeof(biggestint) == 8:
-    fprintf(f, "%lld", i)
-  else:
-    fprintf(f, "%ld", i)
-    
-proc write(f: TFile, b: bool) =
-  if b: write(f, "true")
-  else: write(f, "false")
-proc write(f: TFile, r: float) = fprintf(f, "%g", r)
-proc write(f: TFile, r: biggestFloat) = fprintf(f, "%g", r)
-
-proc write(f: TFile, c: Char) = putc(c, f)
-proc write(f: TFile, a: varargs[string, `$`]) =
-  for x in items(a): write(f, x)
-
-proc readAllBuffer(file: TFile): string = 
-  # This proc is for TFile we want to read but don't know how many
-  # bytes we need to read before the buffer is empty.
-  result = ""
-  var buffer = newString(buf_size)
-  var bytesRead = buf_size
-  while bytesRead == buf_size:
-    bytesRead = readBuffer(file, addr(buffer[0]), buf_size)
-    result.add(buffer)
-  
-proc rawFileSize(file: TFile): int = 
-  # this does not raise an error opposed to `getFileSize`
-  var oldPos = ftell(file)
-  discard fseek(file, 0, 2) # seek the end of the file
-  result = ftell(file)
-  discard fseek(file, clong(oldPos), 0)
-
-proc readAllFile(file: TFile, len: int): string =
-  # We aquire the filesize beforehand and hope it doesn't change.
-  # Speeds things up.
-  result = newString(int(len))
-  if readBuffer(file, addr(result[0]), int(len)) != len:
-    raiseEIO("error while reading from file")
-
-proc readAllFile(file: TFile): string =
-  var len = rawFileSize(file)
-  result = readAllFile(file, len)
-  
-proc readAll(file: TFile): TaintedString = 
-  # Separate handling needed because we need to buffer when we
-  # don't know the overall length of the TFile.
-  var len = rawFileSize(file)
-  if len >= 0:
-    result = readAllFile(file, len).TaintedString
-  else:
-    result = readAllBuffer(file).TaintedString
-  
-proc readFile(filename: string): TaintedString =
-  var f = open(filename)
-  try:
-    result = readAllFile(f).TaintedString
-  finally:
-    close(f)
-
-proc writeFile(filename, content: string) =
-  var f = open(filename, fmWrite)
-  try:
-    f.write(content)
-  finally:
-    close(f)
-
-proc EndOfFile(f: TFile): bool =
-  # do not blame me; blame the ANSI C standard this is so brain-damaged
-  var c = fgetc(f)
-  ungetc(c, f)
-  return c < 0'i32
-
-proc writeln[Ty](f: TFile, x: varargs[Ty, `$`]) =
-  for i in items(x): write(f, i)
-  write(f, "\n")
-
-proc rawEcho(x: string) {.inline, compilerproc.} = write(stdout, x)
-proc rawEchoNL() {.inline, compilerproc.} = write(stdout, "\n")
-
-# interface to the C procs:
-
-when defined(windows) and not defined(useWinAnsi):
-  include "system/widestrs"
-  
-  proc wfopen(filename, mode: widecstring): pointer {.
-    importc: "_wfopen", nodecl.}
-  proc wfreopen(filename, mode: widecstring, stream: TFile): TFile {.
-    importc: "_wfreopen", nodecl.}
-
-  proc fopen(filename, mode: CString): pointer =
-    var f = allocWideCString(filename)
-    var m = allocWideCString(mode)
-    result = wfopen(f, m)
-    dealloc m
-    dealloc f
-
-  proc freopen(filename, mode: cstring, stream: TFile): TFile =
-    var f = allocWideCString(filename)
-    var m = allocWideCString(mode)
-    result = wfreopen(f, m, stream)
-    dealloc m
-    dealloc f
-
-else:
-  proc fopen(filename, mode: CString): pointer {.importc: "fopen", noDecl.}
-  proc freopen(filename, mode: cstring, stream: TFile): TFile {.
-    importc: "freopen", nodecl.}
-
-const
-  FormatOpen: array [TFileMode, string] = ["rb", "wb", "w+b", "r+b", "ab"]
-    #"rt", "wt", "w+t", "r+t", "at"
-    # we always use binary here as for Nimrod the OS line ending
-    # should not be translated.
-
-
-proc Open(f: var TFile, filename: string,
-          mode: TFileMode = fmRead,
-          bufSize: int = -1): Bool =
-  var p: pointer = fopen(filename, FormatOpen[mode])
-  result = (p != nil)
-  f = cast[TFile](p)
-  if bufSize > 0 and bufSize <= high(cint).int:
-    if setvbuf(f, nil, IOFBF, bufSize.cint) != 0'i32:
-      raise newException(EOutOfMemory, "out of memory")
-  elif bufSize == 0:
-    discard setvbuf(f, nil, IONBF, 0)
-
-proc reopen(f: TFile, filename: string, mode: TFileMode = fmRead): bool = 
-  var p: pointer = freopen(filename, FormatOpen[mode], f)
-  result = p != nil
-
-proc fdopen(filehandle: TFileHandle, mode: cstring): TFile {.
-  importc: pccHack & "fdopen", header: "<stdio.h>".}
-
-proc open(f: var TFile, filehandle: TFileHandle, mode: TFileMode): bool =
-  f = fdopen(filehandle, FormatOpen[mode])
-  result = f != nil
-
-proc fwrite(buf: Pointer, size, n: int, f: TFile): int {.
-  importc: "fwrite", noDecl.}
-
-proc readBuffer(f: TFile, buffer: pointer, len: int): int =
-  result = fread(buffer, 1, len, f)
-
-proc ReadBytes(f: TFile, a: var openarray[int8], start, len: int): int =
-  result = readBuffer(f, addr(a[start]), len)
-
-proc ReadChars(f: TFile, a: var openarray[char], start, len: int): int =
-  result = readBuffer(f, addr(a[start]), len)
-
-{.push stackTrace:off, profiler:off.}
-proc writeBytes(f: TFile, a: openarray[int8], start, len: int): int =
-  var x = cast[ptr array[0..1000_000_000, int8]](a)
-  result = writeBuffer(f, addr(x[start]), len)
-proc writeChars(f: TFile, a: openarray[char], start, len: int): int =
-  var x = cast[ptr array[0..1000_000_000, int8]](a)
-  result = writeBuffer(f, addr(x[start]), len)
-proc writeBuffer(f: TFile, buffer: pointer, len: int): int =
-  result = fwrite(buffer, 1, len, f)
-
-proc write(f: TFile, s: string) =
-  if writeBuffer(f, cstring(s), s.len) != s.len:
-    raiseEIO("cannot write string to file")
-{.pop.}
-
-proc setFilePos(f: TFile, pos: int64) =
-  if fseek(f, clong(pos), 0) != 0:
-    raiseEIO("cannot set file position")
-
-proc getFilePos(f: TFile): int64 =
-  result = ftell(f)
-  if result < 0: raiseEIO("cannot retrieve file position")
-
-proc getFileSize(f: TFile): int64 =
-  var oldPos = getFilePos(f)
-  discard fseek(f, 0, 2) # seek the end of the file
-  result = getFilePos(f)
-  setFilePos(f, oldPos)
-
-{.pop.}
diff --git a/lib/system/syslocks.nim b/lib/system/syslocks.nim
deleted file mode 100755
index 4619eaddb..000000000
--- a/lib/system/syslocks.nim
+++ /dev/null
@@ -1,101 +0,0 @@
-#
-#
-#            Nimrod's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## Low level system locks and condition vars.
-
-when defined(Windows):
-  type
-    THandle = int
-    TSysLock {.final, pure.} = object # CRITICAL_SECTION in WinApi
-      DebugInfo: pointer
-      LockCount: int32
-      RecursionCount: int32
-      OwningThread: int
-      LockSemaphore: int
-      Reserved: int32
-          
-    TSysCond = THandle
-          
-  proc InitSysLock(L: var TSysLock) {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "InitializeCriticalSection".}
-    ## Initializes the lock `L`.
-
-  proc TryAcquireSysAux(L: var TSysLock): int32 {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "TryEnterCriticalSection".}
-    ## Tries to acquire the lock `L`.
-    
-  proc TryAcquireSys(L: var TSysLock): bool {.inline.} = 
-    result = TryAcquireSysAux(L) != 0'i32
-
-  proc AcquireSys(L: var TSysLock) {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "EnterCriticalSection".}
-    ## Acquires the lock `L`.
-    
-  proc ReleaseSys(L: var TSysLock) {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "LeaveCriticalSection".}
-    ## Releases the lock `L`.
-
-  proc DeinitSys(L: var TSysLock) {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "DeleteCriticalSection".}
-
-  proc CreateEvent(lpEventAttributes: pointer, 
-                   bManualReset, bInitialState: int32,
-                   lpName: cstring): TSysCond {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "CreateEventA".}
-  
-  proc CloseHandle(hObject: THandle) {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "CloseHandle".}
-  proc WaitForSingleObject(hHandle: THandle, dwMilliseconds: int32): int32 {.
-    stdcall, dynlib: "kernel32", importc: "WaitForSingleObject".}
-
-  proc SignalSysCond(hEvent: TSysCond) {.stdcall, noSideEffect,
-    dynlib: "kernel32", importc: "SetEvent".}
-  
-  proc InitSysCond(cond: var TSysCond) {.inline.} =
-    cond = CreateEvent(nil, 0'i32, 0'i32, nil)
-  proc DeinitSysCond(cond: var TSysCond) {.inline.} =
-    CloseHandle(cond)
-  proc WaitSysCond(cond: var TSysCond, lock: var TSysLock) =
-    releaseSys(lock)
-    discard WaitForSingleObject(cond, -1'i32)
-    acquireSys(lock)
-
-else:
-  type
-    TSysLock {.importc: "pthread_mutex_t", pure, final,
-               header: "<sys/types.h>".} = object
-    TSysCond {.importc: "pthread_cond_t", pure, final,
-               header: "<sys/types.h>".} = object
-
-  proc InitSysLock(L: var TSysLock, attr: pointer = nil) {.
-    importc: "pthread_mutex_init", header: "<pthread.h>", noSideEffect.}
-
-  proc AcquireSys(L: var TSysLock) {.noSideEffect,
-    importc: "pthread_mutex_lock", header: "<pthread.h>".}
-  proc TryAcquireSysAux(L: var TSysLock): cint {.noSideEffect,
-    importc: "pthread_mutex_trylock", header: "<pthread.h>".}
-
-  proc TryAcquireSys(L: var TSysLock): bool {.inline.} = 
-    result = TryAcquireSysAux(L) == 0'i32
-
-  proc ReleaseSys(L: var TSysLock) {.noSideEffect,
-    importc: "pthread_mutex_unlock", header: "<pthread.h>".}
-  proc DeinitSys(L: var TSysLock) {.
-    importc: "pthread_mutex_destroy", header: "<pthread.h>".}
-
-  proc InitSysCond(cond: var TSysCond, cond_attr: pointer = nil) {.
-    importc: "pthread_cond_init", header: "<pthread.h>".}
-  proc WaitSysCond(cond: var TSysCond, lock: var TSysLock) {.
-    importc: "pthread_cond_wait", header: "<pthread.h>".}
-  proc SignalSysCond(cond: var TSysCond) {.
-    importc: "pthread_cond_signal", header: "<pthread.h>".}
-  
-  proc DeinitSysCond(cond: var TSysCond) {.
-    importc: "pthread_cond_destroy", header: "<pthread.h>".}
-  
diff --git a/lib/system/sysstr.nim b/lib/system/sysstr.nim
index d62a987ff..3621c4960 100755..100644
--- a/lib/system/sysstr.nim
+++ b/lib/system/sysstr.nim
@@ -1,6 +1,6 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2012 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
@@ -15,111 +15,154 @@
 # we don't use refcounts because that's a behaviour
 # the programmer may not want
 
+
+proc dataPointer(a: PGenericSeq, elemAlign: int): pointer =
+  cast[pointer](cast[int](a) +% align(GenericSeqSize, elemAlign))
+
+proc dataPointer(a: PGenericSeq, elemAlign, elemSize, index: int): pointer =
+  cast[pointer](cast[int](a) +% align(GenericSeqSize, elemAlign) +% (index*%elemSize))
+
 proc resize(old: int): int {.inline.} =
   if old <= 0: result = 4
   elif old < 65536: result = old * 2
   else: result = old * 3 div 2 # for large arrays * 3/2 is better
 
-proc cmpStrings(a, b: NimString): int {.inline, compilerProc.} =
-  if a == b: return 0
-  if a == nil: return -1
-  if b == nil: return 1
-  return c_strcmp(a.data, b.data)
+when declared(allocAtomic):
+  template allocStr(size: untyped): untyped =
+    cast[NimString](allocAtomic(size))
 
-proc eqStrings(a, b: NimString): bool {.inline, compilerProc.} =
-  if a == b: return true
-  if a == nil or b == nil: return false
-  return a.len == b.len and
-    c_memcmp(a.data, b.data, a.len * sizeof(char)) == 0'i32
+  template allocStrNoInit(size: untyped): untyped =
+    cast[NimString](boehmAllocAtomic(size))
+elif defined(gcRegions):
+  template allocStr(size: untyped): untyped =
+    cast[NimString](newStr(addr(strDesc), size, true))
+
+  template allocStrNoInit(size: untyped): untyped =
+    cast[NimString](newStr(addr(strDesc), size, false))
 
-when defined(allocAtomic):
-  template allocStr(size: expr): expr =
-    cast[NimString](allocAtomic(size))
 else:
-  template allocStr(size: expr): expr =
+  template allocStr(size: untyped): untyped =
     cast[NimString](newObj(addr(strDesc), size))
 
-proc rawNewString(space: int): NimString {.compilerProc.} =
-  var s = space
-  if s < 8: s = 7
-  result = allocStr(sizeof(TGenericSeq) + s + 1)
+  template allocStrNoInit(size: untyped): untyped =
+    cast[NimString](newObjNoInit(addr(strDesc), size))
+
+proc rawNewStringNoInit(space: int): NimString =
+  let s = max(space, 7)
+  result = allocStrNoInit(sizeof(TGenericSeq) + s + 1)
   result.reserved = s
+  when defined(gogc):
+    result.elemSize = 1
+
+proc rawNewString(space: int): NimString {.compilerproc.} =
+  result = rawNewStringNoInit(space)
+  result.len = 0
+  result.data[0] = '\0'
 
-proc mnewString(len: int): NimString {.compilerProc.} =
-  result = rawNewString(len)
+proc mnewString(len: int): NimString {.compilerproc.} =
+  result = rawNewStringNoInit(len)
   result.len = len
+  zeroMem(addr result.data[0], len + 1)
 
-proc copyStrLast(s: NimString, start, last: int): NimString {.compilerProc.} =
-  var start = max(start, 0)
-  var len = min(last, s.len-1) - start + 1
-  if len > 0:
-    result = rawNewString(len)
-    result.len = len
-    c_memcpy(result.data, addr(s.data[start]), len * sizeof(Char))
-    #result.data[len] = '\0'
-  else:
-    result = rawNewString(len)
+proc copyStrLast(s: NimString, start, last: int): NimString {.compilerproc.} =
+  # This is not used by most recent versions of the compiler anymore, but
+  # required for bootstrapping purposes.
+  let start = max(start, 0)
+  if s == nil: return nil
+  let len = min(last, s.len-1) - start + 1
+  result = rawNewStringNoInit(len)
+  result.len = len
+  copyMem(addr(result.data), addr(s.data[start]), len)
+  result.data[len] = '\0'
 
-proc copyStr(s: NimString, start: int): NimString {.compilerProc.} =
+proc copyStr(s: NimString, start: int): NimString {.compilerproc.} =
+  # This is not used by most recent versions of the compiler anymore, but
+  # required for bootstrapping purposes.
+  if s == nil: return nil
   result = copyStrLast(s, start, s.len-1)
 
-proc toNimStr(str: CString, len: int): NimString {.compilerProc.} =
-  result = rawNewString(len)
+proc nimToCStringConv(s: NimString): cstring {.compilerproc, nonReloadable, inline.} =
+  if s == nil or s.len == 0: result = cstring""
+  else: result = cast[cstring](addr s.data)
+
+proc toNimStr(str: cstring, len: int): NimString {.compilerproc.} =
+  result = rawNewStringNoInit(len)
   result.len = len
-  c_memcpy(result.data, str, (len+1) * sizeof(Char))
-  #result.data[len] = '\0' # readline relies on this!
+  copyMem(addr(result.data), str, len)
+  result.data[len] = '\0'
 
-proc cstrToNimstr(str: CString): NimString {.compilerRtl.} =
-  result = toNimstr(str, c_strlen(str))
+proc cstrToNimstr(str: cstring): NimString {.compilerRtl.} =
+  if str == nil: NimString(nil)
+  else: toNimStr(str, str.len)
 
 proc copyString(src: NimString): NimString {.compilerRtl.} =
   if src != nil:
     if (src.reserved and seqShallowFlag) != 0:
       result = src
     else:
-      result = rawNewString(src.space)
+      result = rawNewStringNoInit(src.len)
       result.len = src.len
-      c_memcpy(result.data, src.data, (src.len + 1) * sizeof(Char))
+      copyMem(addr(result.data), addr(src.data), src.len + 1)
+      sysAssert((seqShallowFlag and result.reserved) == 0, "copyString")
+      when defined(nimShallowStrings):
+        if (src.reserved and strlitFlag) != 0:
+          result.reserved = (result.reserved and not strlitFlag) or seqShallowFlag
+
+proc newOwnedString(src: NimString; n: int): NimString =
+  result = rawNewStringNoInit(n)
+  result.len = n
+  copyMem(addr(result.data), addr(src.data), n)
+  result.data[n] = '\0'
 
 proc copyStringRC1(src: NimString): NimString {.compilerRtl.} =
   if src != nil:
-    var s = src.space
-    if s < 8: s = 7
-    when defined(newObjRC1):
-      result = cast[NimString](newObjRC1(addr(strDesc), sizeof(TGenericSeq) +
-                               s+1))
+    if (src.reserved and seqShallowFlag) != 0:
+      result = src
+      when declared(incRef):
+        incRef(usrToCell(result))
     else:
-      result = allocStr(sizeof(TGenericSeq) + s + 1)
-    result.reserved = s
+      when declared(newObjRC1) and not defined(gcRegions):
+        var s = src.len
+        if s < 7: s = 7
+        result = cast[NimString](newObjRC1(addr(strDesc), sizeof(TGenericSeq) +
+                                s+1))
+        result.reserved = s
+        when defined(gogc):
+          result.elemSize = 1
+      else:
+        result = rawNewStringNoInit(src.len)
+      result.len = src.len
+      copyMem(addr(result.data), addr(src.data), src.len + 1)
+      sysAssert((seqShallowFlag and result.reserved) == 0, "copyStringRC1")
+      when defined(nimShallowStrings):
+        if (src.reserved and strlitFlag) != 0:
+          result.reserved = (result.reserved and not strlitFlag) or seqShallowFlag
+
+proc copyDeepString(src: NimString): NimString {.inline.} =
+  if src != nil:
+    result = rawNewStringNoInit(src.len)
     result.len = src.len
-    c_memcpy(result.data, src.data, src.len + 1)
-
-proc hashString(s: string): int {.compilerproc.} =
-  # the compiler needs exactly the same hash function!
-  # this used to be used for efficient generation of string case statements
-  var h = 0
-  for i in 0..Len(s)-1:
-    h = h +% Ord(s[i])
-    h = h +% h shl 10
-    h = h xor (h shr 6)
-  h = h +% h shl 3
-  h = h xor (h shr 11)
-  h = h +% h shl 15
-  result = h
+    copyMem(addr(result.data), addr(src.data), src.len + 1)
 
 proc addChar(s: NimString, c: char): NimString =
   # is compilerproc!
-  result = s
-  if result.len >= result.space:
-    result.reserved = resize(result.space)
-    result = cast[NimString](growObj(result,
-      sizeof(TGenericSeq) + (result.reserved+1) * sizeof(char)))
+  if s == nil:
+    result = rawNewStringNoInit(1)
+    result.len = 0
+  else:
+    result = s
+    if result.len >= result.space:
+      let r = resize(result.space)
+      result = rawNewStringNoInit(r)
+      result.len = s.len
+      copyMem(addr result.data[0], unsafeAddr(s.data[0]), s.len+1)
+      result.reserved = r
   result.data[result.len] = c
   result.data[result.len+1] = '\0'
   inc(result.len)
 
 # These routines should be used like following:
-#   <Nimrod code>
+#   <Nim code>
 #   s &= "Hello " & name & ", how do you feel?"
 #
 #   <generated C code>
@@ -130,7 +173,7 @@ proc addChar(s: NimString, c: char): NimString =
 #     appendString(s, strLit3);
 #   }
 #
-#   <Nimrod code>
+#   <Nim code>
 #   s = "Hello " & name & ", how do you feel?"
 #
 #   <generated C code>
@@ -143,26 +186,31 @@ proc addChar(s: NimString, c: char): NimString =
 #     s = tmp0;
 #   }
 #
-#   <Nimrod code>
+#   <Nim code>
 #   s = ""
 #
 #   <generated C code>
 #   s = rawNewString(0);
 
 proc resizeString(dest: NimString, addlen: int): NimString {.compilerRtl.} =
-  if dest.len + addLen <= dest.space:
+  if dest == nil:
+    result = rawNewString(addlen)
+  elif dest.len + addlen <= dest.space:
     result = dest
   else: # slow path:
-    var sp = max(resize(dest.space), dest.len + addLen)
-    result = cast[NimString](growObj(dest, sizeof(TGenericSeq) + sp + 1))
+    let sp = max(resize(dest.space), dest.len + addlen)
+    result = rawNewStringNoInit(sp)
+    result.len = dest.len
+    copyMem(addr result.data[0], unsafeAddr(dest.data[0]), dest.len+1)
     result.reserved = sp
     #result = rawNewString(sp)
-    #copyMem(result, dest, dest.len * sizeof(char) + sizeof(TGenericSeq))
+    #copyMem(result, dest, dest.len + sizeof(TGenericSeq))
     # DO NOT UPDATE LEN YET: dest.len = newLen
 
 proc appendString(dest, src: NimString) {.compilerproc, inline.} =
-  c_memcpy(addr(dest.data[dest.len]), src.data, src.len + 1)
-  inc(dest.len, src.len)
+  if src != nil:
+    copyMem(addr(dest.data[dest.len]), addr(src.data), src.len + 1)
+    inc(dest.len, src.len)
 
 proc appendChar(dest: NimString, c: char) {.compilerproc, inline.} =
   dest.data[dest.len] = c
@@ -170,17 +218,27 @@ proc appendChar(dest: NimString, c: char) {.compilerproc, inline.} =
   inc(dest.len)
 
 proc setLengthStr(s: NimString, newLen: int): NimString {.compilerRtl.} =
-  var n = max(newLen, 0)
-  if n <= s.space:
+  let n = max(newLen, 0)
+  if s == nil:
+    if n == 0:
+      return s
+    else:
+      result = mnewString(n)
+  elif n <= s.space:
     result = s
   else:
-    result = resizeString(s, n)
+    let sp = max(resize(s.space), n)
+    result = rawNewStringNoInit(sp)
+    result.len = s.len
+    copyMem(addr result.data[0], unsafeAddr(s.data[0]), s.len)
+    zeroMem(addr result.data[s.len], n - s.len)
+    result.reserved = sp
   result.len = n
   result.data[n] = '\0'
 
 # ----------------- sequences ----------------------------------------------
 
-proc incrSeq(seq: PGenericSeq, elemSize: int): PGenericSeq {.compilerProc.} =
+proc incrSeq(seq: PGenericSeq, elemSize, elemAlign: int): PGenericSeq {.compilerproc.} =
   # increments the length by one:
   # this is needed for supporting ``add``;
   #
@@ -189,108 +247,117 @@ proc incrSeq(seq: PGenericSeq, elemSize: int): PGenericSeq {.compilerProc.} =
   #  seq[seq->len-1] = x;
   result = seq
   if result.len >= result.space:
-    result.reserved = resize(result.space)
-    result = cast[PGenericSeq](growObj(result, elemSize * result.reserved +
-                               GenericSeqSize))
+    let r = resize(result.space)
+    result = cast[PGenericSeq](growObj(result, align(GenericSeqSize, elemAlign) + elemSize * r))
+    result.reserved = r
   inc(result.len)
 
-proc setLengthSeq(seq: PGenericSeq, elemSize, newLen: int): PGenericSeq {.
-    compilerRtl.} =
+proc incrSeqV2(seq: PGenericSeq, elemSize, elemAlign: int): PGenericSeq {.compilerproc.} =
+  # incrSeq version 2
+  result = seq
+  if result.len >= result.space:
+    let r = resize(result.space)
+    result = cast[PGenericSeq](growObj(result, align(GenericSeqSize, elemAlign) + elemSize * r))
+    result.reserved = r
+
+proc incrSeqV3(s: PGenericSeq, typ: PNimType): PGenericSeq {.compilerproc.} =
+  if s == nil:
+    result = cast[PGenericSeq](newSeq(typ, 1))
+    result.len = 0
+  else:
+    result = s
+    if result.len >= result.space:
+      let r = resize(result.space)
+      result = cast[PGenericSeq](newSeq(typ, r))
+      result.len = s.len
+      copyMem(dataPointer(result, typ.base.align), dataPointer(s, typ.base.align), s.len * typ.base.size)
+      # since we steal the content from 's', it's crucial to set s's len to 0.
+      s.len = 0
+
+proc setLengthSeq(seq: PGenericSeq, elemSize, elemAlign, newLen: int): PGenericSeq {.
+    compilerRtl, inl.} =
   result = seq
   if result.space < newLen:
-    result.reserved = max(resize(result.space), newLen)
-    result = cast[PGenericSeq](growObj(result, elemSize * result.reserved +
-                               GenericSeqSize))
+    let r = max(resize(result.space), newLen)
+    result = cast[PGenericSeq](growObj(result, align(GenericSeqSize, elemAlign) + elemSize * r))
+    result.reserved = r
   elif newLen < result.len:
     # we need to decref here, otherwise the GC leaks!
-    when not defined(boehmGC) and not defined(nogc) and 
-         not defined(gcMarkAndSweep):
-      when compileOption("gc", "v2"):
+    when not defined(boehmGC) and not defined(nogc) and
+         not defined(gcMarkAndSweep) and not defined(gogc) and
+         not defined(gcRegions):
+      if ntfNoRefs notin extGetCellType(result).base.flags:
         for i in newLen..result.len-1:
-          let len0 = gch.tempStack.len
-          forAllChildrenAux(cast[pointer](cast[TAddress](result) +%
-                            GenericSeqSize +% (i*%elemSize)),
-                            extGetCellType(result).base, waPush)
-          let len1 = gch.tempStack.len
-          for i in len0 .. <len1:
-            doDecRef(gch.tempStack.d[i], LocalHeap, MaybeCyclic)
-          gch.tempStack.len = len0
-      else:
-        for i in newLen..result.len-1:
-          forAllChildrenAux(cast[pointer](cast[TAddress](result) +%
-                            GenericSeqSize +% (i*%elemSize)),
+          forAllChildrenAux(dataPointer(result, elemAlign, elemSize, i),
                             extGetCellType(result).base, waZctDecRef)
-      
+
     # XXX: zeroing out the memory can still result in crashes if a wiped-out
-    # cell is aliased by another pointer (ie proc paramter or a let variable).
-    # This is a tought problem, because even if we don't zeroMem here, in the
-    # presense of user defined destructors, the user will expect the cell to be
-    # "destroyed" thus creating the same problem. We can destoy the cell in the
+    # cell is aliased by another pointer (ie proc parameter or a let variable).
+    # This is a tough problem, because even if we don't zeroMem here, in the
+    # presence of user defined destructors, the user will expect the cell to be
+    # "destroyed" thus creating the same problem. We can destroy the cell in the
     # finalizer of the sequence, but this makes destruction non-deterministic.
-    zeroMem(cast[pointer](cast[TAddress](result) +% GenericSeqSize +%
-           (newLen*%elemSize)), (result.len-%newLen) *% elemSize)
+    zeroMem(dataPointer(result, elemAlign, elemSize, newLen), (result.len-%newLen) *% elemSize)
   result.len = newLen
 
-# --------------- other string routines ----------------------------------
-proc nimIntToStr(x: int): string {.compilerRtl.} =
-  result = newString(sizeof(x)*4)
-  var i = 0
-  var y = x
-  while True:
-    var d = y div 10
-    result[i] = chr(abs(int(y - d*10)) + ord('0'))
-    inc(i)
-    y = d
-    if y == 0: break
-  if x < 0:
-    result[i] = '-'
-    inc(i)
-  setLen(result, i)
-  # mirror the string:
-  for j in 0..i div 2 - 1:
-    swap(result[j], result[i-j-1])
-
-proc nimFloatToStr(x: float): string {.compilerproc.} =
-  var buf: array [0..59, char]
-  c_sprintf(buf, "%#.16e", x)
-  return $buf
-
-proc nimInt64ToStr(x: int64): string {.compilerRtl.} =
-  result = newString(sizeof(x)*4)
-  var i = 0
-  var y = x
-  while True:
-    var d = y div 10
-    result[i] = chr(abs(int(y - d*10)) + ord('0'))
-    inc(i)
-    y = d
-    if y == 0: break
-  if x < 0:
-    result[i] = '-'
-    inc(i)
-  setLen(result, i)
-  # mirror the string:
-  for j in 0..i div 2 - 1:
-    swap(result[j], result[i-j-1])
-
-proc nimBoolToStr(x: bool): string {.compilerRtl.} =
-  return if x: "true" else: "false"
-
-proc nimCharToStr(x: char): string {.compilerRtl.} =
-  result = newString(1)
-  result[0] = x
-
-proc binaryStrSearch(x: openarray[string], y: string): int {.compilerproc.} =
-  var
-    a = 0
-    b = len(x)
-  while a < b:
-    var mid = (a + b) div 2
-    if x[mid] < y:
-      a = mid + 1
+proc setLengthSeqV2(s: PGenericSeq, typ: PNimType, newLen: int): PGenericSeq {.
+    compilerRtl.} =
+  sysAssert typ.kind == tySequence, "setLengthSeqV2: type is not a seq"
+  if s == nil:
+    if newLen == 0:
+      result = s
     else:
-      b = mid
-  if a < len(x) and x[a] == y:
-    result = a
+      result = cast[PGenericSeq](newSeq(typ, newLen))
   else:
-    result = -1
+    let elemSize = typ.base.size
+    let elemAlign = typ.base.align
+    if s.space < newLen:
+      let r = max(resize(s.space), newLen)
+      result = cast[PGenericSeq](newSeq(typ, r))
+      copyMem(dataPointer(result, elemAlign), dataPointer(s, elemAlign), s.len * elemSize)
+      # since we steal the content from 's', it's crucial to set s's len to 0.
+      s.len = 0
+    elif newLen < s.len:
+      result = s
+      # we need to decref here, otherwise the GC leaks!
+      when not defined(boehmGC) and not defined(nogc) and
+          not defined(gcMarkAndSweep) and not defined(gogc) and
+          not defined(gcRegions):
+        if ntfNoRefs notin typ.base.flags:
+          for i in newLen..result.len-1:
+            forAllChildrenAux(dataPointer(result, elemAlign, elemSize, i),
+                              extGetCellType(result).base, waZctDecRef)
+
+      # XXX: zeroing out the memory can still result in crashes if a wiped-out
+      # cell is aliased by another pointer (ie proc parameter or a let variable).
+      # This is a tough problem, because even if we don't zeroMem here, in the
+      # presence of user defined destructors, the user will expect the cell to be
+      # "destroyed" thus creating the same problem. We can destroy the cell in the
+      # finalizer of the sequence, but this makes destruction non-deterministic.
+      zeroMem(dataPointer(result, elemAlign, elemSize, newLen), (result.len-%newLen) *% elemSize)
+    else:
+      result = s
+      zeroMem(dataPointer(result, elemAlign, elemSize, result.len), (newLen-%result.len) *% elemSize)
+    result.len = newLen
+
+func capacity*(self: string): int {.inline.} =
+  ## Returns the current capacity of the string.
+  # See https://github.com/nim-lang/RFCs/issues/460
+  runnableExamples:
+    var str = newStringOfCap(cap = 42)
+    str.add "Nim"
+    assert str.capacity == 42
+
+  let str = cast[NimString](self)
+  result = if str != nil: str.space else: 0
+
+func capacity*[T](self: seq[T]): int {.inline.} =
+  ## Returns the current capacity of the seq.
+  # See https://github.com/nim-lang/RFCs/issues/460
+  runnableExamples:
+    var lst = newSeqOfCap[string](cap = 42)
+    lst.add "Nim"
+    assert lst.capacity == 42
+
+  let sek = cast[PGenericSeq](self)
+  result = if sek != nil: sek.space else: 0
diff --git a/lib/system/threadids.nim b/lib/system/threadids.nim
new file mode 100644
index 000000000..3a6eadcbb
--- /dev/null
+++ b/lib/system/threadids.nim
@@ -0,0 +1,103 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2020 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# we need to cache current threadId to not perform syscall all the time
+var threadId {.threadvar.}: int
+
+when defined(windows):
+  proc getCurrentThreadId(): int32 {.
+    stdcall, dynlib: "kernel32", importc: "GetCurrentThreadId".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(getCurrentThreadId())
+    result = threadId
+
+elif defined(linux):
+  proc syscall(arg: clong): clong {.varargs, importc: "syscall", header: "<unistd.h>".}
+  when defined(amd64):
+    const NR_gettid = clong(186)
+  else:
+    var NR_gettid {.importc: "__NR_gettid", header: "<sys/syscall.h>".}: clong
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(syscall(NR_gettid))
+    result = threadId
+
+elif defined(dragonfly):
+  proc lwp_gettid(): int32 {.importc, header: "unistd.h".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(lwp_gettid())
+    result = threadId
+
+elif defined(openbsd):
+  proc getthrid(): int32 {.importc: "getthrid", header: "<unistd.h>".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(getthrid())
+    result = threadId
+
+elif defined(netbsd):
+  proc lwp_self(): int32 {.importc: "_lwp_self", header: "<lwp.h>".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(lwp_self())
+    result = threadId
+
+elif defined(freebsd):
+  proc syscall(arg: cint, arg0: ptr cint): cint {.varargs, importc: "syscall", header: "<unistd.h>".}
+  var SYS_thr_self {.importc:"SYS_thr_self", header:"<sys/syscall.h>".}: cint
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    var tid = 0.cint
+    if threadId == 0:
+      discard syscall(SYS_thr_self, addr tid)
+      threadId = tid
+    result = threadId
+
+elif defined(macosx):
+  proc syscall(arg: cint): cint {.varargs, importc: "syscall", header: "<unistd.h>".}
+  var SYS_thread_selfid {.importc:"SYS_thread_selfid", header:"<sys/syscall.h>".}: cint
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(syscall(SYS_thread_selfid))
+    result = threadId
+
+elif defined(solaris):
+  type thread_t {.importc: "thread_t", header: "<thread.h>".} = distinct int
+  proc thr_self(): thread_t {.importc, header: "<thread.h>".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(thr_self())
+    result = threadId
+
+elif defined(haiku):
+  type thr_id {.importc: "thread_id", header: "<OS.h>".} = distinct int32
+  proc find_thread(name: cstring): thr_id {.importc, header: "<OS.h>".}
+
+  proc getThreadId*(): int =
+    ## Gets the ID of the currently running thread.
+    if threadId == 0:
+      threadId = int(find_thread(nil))
+    result = threadId
diff --git a/lib/system/threadimpl.nim b/lib/system/threadimpl.nim
new file mode 100644
index 000000000..285b8f5e7
--- /dev/null
+++ b/lib/system/threadimpl.nim
@@ -0,0 +1,111 @@
+var
+  nimThreadDestructionHandlers* {.rtlThreadVar.}: seq[proc () {.closure, gcsafe, raises: [].}]
+when not defined(boehmgc) and not hasSharedHeap and not defined(gogc) and not defined(gcRegions):
+  proc deallocOsPages() {.rtl, raises: [].}
+proc threadTrouble() {.raises: [], gcsafe.}
+# create for the main thread. Note: do not insert this data into the list
+# of all threads; it's not to be stopped etc.
+when not defined(useNimRtl):
+  #when not defined(createNimRtl): initStackBottom()
+  when declared(initGC):
+    initGC()
+    when not emulatedThreadVars:
+      type ThreadType {.pure.} = enum
+        None = 0,
+        NimThread = 1,
+        ForeignThread = 2
+      var
+        threadType {.rtlThreadVar.}: ThreadType
+
+      threadType = ThreadType.NimThread
+
+when defined(gcDestructors):
+  proc deallocThreadStorage(p: pointer) = c_free(p)
+else:
+  template deallocThreadStorage(p: pointer) = deallocShared(p)
+
+template afterThreadRuns() =
+  for i in countdown(nimThreadDestructionHandlers.len-1, 0):
+    nimThreadDestructionHandlers[i]()
+
+proc onThreadDestruction*(handler: proc () {.closure, gcsafe, raises: [].}) =
+  ## Registers a *thread local* handler that is called at the thread's
+  ## destruction.
+  ##
+  ## A thread is destructed when the `.thread` proc returns
+  ## normally or when it raises an exception. Note that unhandled exceptions
+  ## in a thread nevertheless cause the whole process to die.
+  nimThreadDestructionHandlers.add handler
+
+when defined(boehmgc):
+  type GCStackBaseProc = proc(sb: pointer, t: pointer) {.noconv.}
+  proc boehmGC_call_with_stack_base(sbp: GCStackBaseProc, p: pointer)
+    {.importc: "GC_call_with_stack_base", boehmGC.}
+  proc boehmGC_register_my_thread(sb: pointer)
+    {.importc: "GC_register_my_thread", boehmGC.}
+  proc boehmGC_unregister_my_thread()
+    {.importc: "GC_unregister_my_thread", boehmGC.}
+
+  proc threadProcWrapDispatch[TArg](sb: pointer, thrd: pointer) {.noconv, raises: [].} =
+    boehmGC_register_my_thread(sb)
+    try:
+      let thrd = cast[ptr Thread[TArg]](thrd)
+      when TArg is void:
+        thrd.dataFn()
+      else:
+        thrd.dataFn(thrd.data)
+    except:
+      threadTrouble()
+    finally:
+      afterThreadRuns()
+    boehmGC_unregister_my_thread()
+else:
+  proc threadProcWrapDispatch[TArg](thrd: ptr Thread[TArg]) {.raises: [].} =
+    try:
+      when TArg is void:
+        thrd.dataFn()
+      else:
+        when defined(nimV2):
+          thrd.dataFn(thrd.data)
+        else:
+          var x: TArg
+          deepCopy(x, thrd.data)
+          thrd.dataFn(x)
+    except:
+      threadTrouble()
+    finally:
+      afterThreadRuns()
+      when hasAllocStack:
+        deallocThreadStorage(thrd.rawStack)
+
+proc threadProcWrapStackFrame[TArg](thrd: ptr Thread[TArg]) {.raises: [].} =
+  when defined(boehmgc):
+    boehmGC_call_with_stack_base(threadProcWrapDispatch[TArg], thrd)
+  elif not defined(nogc) and not defined(gogc) and not defined(gcRegions) and not usesDestructors:
+    var p {.volatile.}: pointer
+    # init the GC for refc/markandsweep
+    nimGC_setStackBottom(addr(p))
+    when declared(initGC):
+      initGC()
+    when declared(threadType):
+      threadType = ThreadType.NimThread
+    threadProcWrapDispatch[TArg](thrd)
+    when declared(deallocOsPages): deallocOsPages()
+  else:
+    threadProcWrapDispatch(thrd)
+
+template nimThreadProcWrapperBody*(closure: untyped): untyped =
+  var thrd = cast[ptr Thread[TArg]](closure)
+  var core = thrd.core
+  when declared(globalsSlot): threadVarSetValue(globalsSlot, thrd.core)
+  threadProcWrapStackFrame(thrd)
+  # Since an unhandled exception terminates the whole process (!), there is
+  # no need for a ``try finally`` here, nor would it be correct: The current
+  # exception is tried to be re-raised by the code-gen after the ``finally``!
+  # However this is doomed to fail, because we already unmapped every heap
+  # page!
+
+  # mark as not running anymore:
+  thrd.core = nil
+  thrd.dataFn = nil
+  deallocThreadStorage(cast[pointer](core))
diff --git a/lib/system/threadlocalstorage.nim b/lib/system/threadlocalstorage.nim
new file mode 100644
index 000000000..e6ad9dca5
--- /dev/null
+++ b/lib/system/threadlocalstorage.nim
@@ -0,0 +1,125 @@
+import std/private/threadtypes
+
+when defined(windows):
+  type
+    ThreadVarSlot = distinct int32
+
+  proc threadVarAlloc(): ThreadVarSlot {.
+    importc: "TlsAlloc", stdcall, header: "<windows.h>".}
+  proc threadVarSetValue(dwTlsIndex: ThreadVarSlot, lpTlsValue: pointer) {.
+    importc: "TlsSetValue", stdcall, header: "<windows.h>".}
+  proc tlsGetValue(dwTlsIndex: ThreadVarSlot): pointer {.
+    importc: "TlsGetValue", stdcall, header: "<windows.h>".}
+
+  proc getLastError(): uint32 {.
+    importc: "GetLastError", stdcall, header: "<windows.h>".}
+  proc setLastError(x: uint32) {.
+    importc: "SetLastError", stdcall, header: "<windows.h>".}
+
+  proc threadVarGetValue(dwTlsIndex: ThreadVarSlot): pointer =
+    let realLastError = getLastError()
+    result = tlsGetValue(dwTlsIndex)
+    setLastError(realLastError)
+
+elif defined(genode):
+  const
+    GenodeHeader = "genode_cpp/threads.h"
+
+  type
+    ThreadVarSlot = int
+
+  proc threadVarAlloc(): ThreadVarSlot = 0
+
+  proc offMainThread(): bool {.
+    importcpp: "Nim::SysThread::offMainThread",
+    header: GenodeHeader.}
+
+  proc threadVarSetValue(value: pointer) {.
+    importcpp: "Nim::SysThread::threadVarSetValue(@)",
+    header: GenodeHeader.}
+
+  proc threadVarGetValue(): pointer {.
+    importcpp: "Nim::SysThread::threadVarGetValue()",
+    header: GenodeHeader.}
+
+  var mainTls: pointer
+
+  proc threadVarSetValue(s: ThreadVarSlot, value: pointer) {.inline.} =
+    if offMainThread():
+      threadVarSetValue(value);
+    else:
+      mainTls = value
+
+  proc threadVarGetValue(s: ThreadVarSlot): pointer {.inline.} =
+    if offMainThread():
+      threadVarGetValue();
+    else:
+      mainTls
+
+else:
+  when not (defined(macosx) or defined(haiku)):
+    {.passl: "-pthread".}
+
+  when not defined(haiku):
+    {.passc: "-pthread".}
+
+  when (defined(linux) or defined(nintendoswitch)) and defined(amd64):
+    type
+      ThreadVarSlot {.importc: "pthread_key_t",
+                    header: "<sys/types.h>".} = distinct cuint
+  elif defined(openbsd) and defined(amd64):
+    type
+      ThreadVarSlot {.importc: "pthread_key_t",
+                     header: "<pthread.h>".} = cint
+  else:
+    type
+      ThreadVarSlot {.importc: "pthread_key_t",
+                     header: "<sys/types.h>".} = object
+
+  proc pthread_getspecific(a1: ThreadVarSlot): pointer {.
+    importc: "pthread_getspecific", header: pthreadh.}
+  proc pthread_key_create(a1: ptr ThreadVarSlot,
+                          destruct: proc (x: pointer) {.noconv.}): int32 {.
+    importc: "pthread_key_create", header: pthreadh.}
+  proc pthread_key_delete(a1: ThreadVarSlot): int32 {.
+    importc: "pthread_key_delete", header: pthreadh.}
+
+  proc pthread_setspecific(a1: ThreadVarSlot, a2: pointer): int32 {.
+    importc: "pthread_setspecific", header: pthreadh.}
+
+  proc threadVarAlloc(): ThreadVarSlot {.inline.} =
+    discard pthread_key_create(addr(result), nil)
+  proc threadVarSetValue(s: ThreadVarSlot, value: pointer) {.inline.} =
+    discard pthread_setspecific(s, value)
+  proc threadVarGetValue(s: ThreadVarSlot): pointer {.inline.} =
+    result = pthread_getspecific(s)
+
+
+when emulatedThreadVars:
+  # the compiler generates this proc for us, so that we can get the size of
+  # the thread local var block; we use this only for sanity checking though
+  proc nimThreadVarsSize(): int {.noconv, importc: "NimThreadVarsSize".}
+
+
+
+when emulatedThreadVars:
+  var globalsSlot: ThreadVarSlot
+
+  when not defined(useNimRtl):
+    var mainThread: GcThread
+
+  proc GetThreadLocalVars(): pointer {.compilerRtl, inl.} =
+    result = addr(cast[PGcThread](threadVarGetValue(globalsSlot)).tls)
+
+  proc initThreadVarsEmulation() {.compilerproc, inline.} =
+    when not defined(useNimRtl):
+      globalsSlot = threadVarAlloc()
+      when declared(mainThread):
+        threadVarSetValue(globalsSlot, addr(mainThread))
+
+when not defined(useNimRtl):
+  when emulatedThreadVars:
+    if nimThreadVarsSize() > sizeof(ThreadLocalStorage):
+      c_fprintf(cstderr, """too large thread local storage size requested,
+use -d:\"nimTlsSize=X\" to setup even more or stop using unittest.nim""")
+      rawQuit 1
diff --git a/lib/system/threads.nim b/lib/system/threads.nim
deleted file mode 100755
index aba3bb275..000000000
--- a/lib/system/threads.nim
+++ /dev/null
@@ -1,376 +0,0 @@
-#
-#
-#            Nimrod's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## Thread support for Nimrod. **Note**: This is part of the system module.
-## Do not import it directly. To activate thread support you need to compile
-## with the ``--threads:on`` command line switch.
-##
-## Nimrod's memory model for threads is quite different from other common 
-## programming languages (C, Pascal): Each thread has its own
-## (garbage collected) heap and sharing of memory is restricted. This helps
-## to prevent race conditions and improves efficiency. See the manual for
-## details of this memory model.
-##
-## Example:
-##
-## .. code-block:: nimrod
-##
-##  import locks
-##
-##  var
-##    thr: array [0..4, TThread[tuple[a,b: int]]]
-##    L: TLock
-##  
-##  proc threadFunc(interval: tuple[a,b: int]) {.thread.} =
-##    for i in interval.a..interval.b:
-##      Acquire(L) # lock stdout
-##      echo i
-##      Release(L)
-##
-##  InitLock(L)
-##
-##  for i in 0..high(thr):
-##    createThread(thr[i], threadFunc, (i*10, i*10+5))
-##  joinThreads(thr)
-  
-const
-  maxRegisters = 256 # don't think there is an arch with more registers
-  useStackMaskHack = false ## use the stack mask hack for better performance
-  StackGuardSize = 4096
-  ThreadStackMask = 1024*256*sizeof(int)-1
-  ThreadStackSize = ThreadStackMask+1 - StackGuardSize
-
-when defined(windows):
-  type
-    TSysThread = THandle
-    TWinThreadProc = proc (x: pointer): int32 {.stdcall.}
-
-  proc CreateThread(lpThreadAttributes: Pointer, dwStackSize: int32,
-                     lpStartAddress: TWinThreadProc, 
-                     lpParameter: Pointer,
-                     dwCreationFlags: int32, 
-                     lpThreadId: var int32): TSysThread {.
-    stdcall, dynlib: "kernel32", importc: "CreateThread".}
-
-  proc winSuspendThread(hThread: TSysThread): int32 {.
-    stdcall, dynlib: "kernel32", importc: "SuspendThread".}
-      
-  proc winResumeThread(hThread: TSysThread): int32 {.
-    stdcall, dynlib: "kernel32", importc: "ResumeThread".}
-
-  proc WaitForMultipleObjects(nCount: int32,
-                              lpHandles: ptr TSysThread,
-                              bWaitAll: int32,
-                              dwMilliseconds: int32): int32 {.
-    stdcall, dynlib: "kernel32", importc: "WaitForMultipleObjects".}
-
-  proc TerminateThread(hThread: TSysThread, dwExitCode: int32): int32 {.
-    stdcall, dynlib: "kernel32", importc: "TerminateThread".}
-    
-  type
-    TThreadVarSlot = distinct int32
-
-  proc ThreadVarAlloc(): TThreadVarSlot {.
-    importc: "TlsAlloc", stdcall, dynlib: "kernel32".}
-  proc ThreadVarSetValue(dwTlsIndex: TThreadVarSlot, lpTlsValue: pointer) {.
-    importc: "TlsSetValue", stdcall, dynlib: "kernel32".}
-  proc ThreadVarGetValue(dwTlsIndex: TThreadVarSlot): pointer {.
-    importc: "TlsGetValue", stdcall, dynlib: "kernel32".}
-  
-else:
-  {.passL: "-pthread".}
-  {.passC: "-pthread".}
-
-  type
-    TSysThread {.importc: "pthread_t", header: "<sys/types.h>",
-                 final, pure.} = object
-    Tpthread_attr {.importc: "pthread_attr_t",
-                     header: "<sys/types.h>", final, pure.} = object
-                 
-    Ttimespec {.importc: "struct timespec",
-                header: "<time.h>", final, pure.} = object
-      tv_sec: int
-      tv_nsec: int
-
-  proc pthread_attr_init(a1: var TPthread_attr) {.
-    importc, header: "<pthread.h>".}
-  proc pthread_attr_setstacksize(a1: var TPthread_attr, a2: int) {.
-    importc, header: "<pthread.h>".}
-
-  proc pthread_create(a1: var TSysThread, a2: var TPthread_attr,
-            a3: proc (x: pointer) {.noconv.}, 
-            a4: pointer): cint {.importc: "pthread_create", 
-            header: "<pthread.h>".}
-  proc pthread_join(a1: TSysThread, a2: ptr pointer): cint {.
-    importc, header: "<pthread.h>".}
-
-  proc pthread_cancel(a1: TSysThread): cint {.
-    importc: "pthread_cancel", header: "<pthread.h>".}
-
-  proc AcquireSysTimeoutAux(L: var TSysLock, timeout: var Ttimespec): cint {.
-    importc: "pthread_mutex_timedlock", header: "<time.h>".}
-
-  proc AcquireSysTimeout(L: var TSysLock, msTimeout: int) {.inline.} =
-    var a: Ttimespec
-    a.tv_sec = msTimeout div 1000
-    a.tv_nsec = (msTimeout mod 1000) * 1000
-    var res = AcquireSysTimeoutAux(L, a)
-    if res != 0'i32: raise newException(EResourceExhausted, $strerror(res))
-
-  type
-    TThreadVarSlot {.importc: "pthread_key_t", pure, final,
-                   header: "<sys/types.h>".} = object
-
-  proc pthread_getspecific(a1: TThreadVarSlot): pointer {.
-    importc: "pthread_getspecific", header: "<pthread.h>".}
-  proc pthread_key_create(a1: ptr TThreadVarSlot, 
-                          destruct: proc (x: pointer) {.noconv.}): int32 {.
-    importc: "pthread_key_create", header: "<pthread.h>".}
-  proc pthread_key_delete(a1: TThreadVarSlot): int32 {.
-    importc: "pthread_key_delete", header: "<pthread.h>".}
-
-  proc pthread_setspecific(a1: TThreadVarSlot, a2: pointer): int32 {.
-    importc: "pthread_setspecific", header: "<pthread.h>".}
-  
-  proc ThreadVarAlloc(): TThreadVarSlot {.inline.} =
-    discard pthread_key_create(addr(result), nil)
-  proc ThreadVarSetValue(s: TThreadVarSlot, value: pointer) {.inline.} =
-    discard pthread_setspecific(s, value)
-  proc ThreadVarGetValue(s: TThreadVarSlot): pointer {.inline.} =
-    result = pthread_getspecific(s)
-
-  when useStackMaskHack:
-    proc pthread_attr_setstack(attr: var TPthread_attr, stackaddr: pointer,
-                               size: int): cint {.
-      importc: "pthread_attr_setstack", header: "<pthread.h>".}
-
-const
-  emulatedThreadVars = compileOption("tlsEmulation")
-
-when emulatedThreadVars:
-  # the compiler generates this proc for us, so that we can get the size of
-  # the thread local var block; we use this only for sanity checking though
-  proc NimThreadVarsSize(): int {.noconv, importc: "NimThreadVarsSize".}
-
-# we preallocate a fixed size for thread local storage, so that no heap
-# allocations are needed. Currently less than 7K are used on a 64bit machine.
-# We use ``float`` for proper alignment:
-type
-  TThreadLocalStorage = array [0..1_000, float]
-
-  PGcThread = ptr TGcThread
-  TGcThread {.pure, inheritable.} = object
-    sys: TSysThread
-    when emulatedThreadVars and not useStackMaskHack:
-      tls: TThreadLocalStorage
-    else:
-      nil
-    when hasSharedHeap:
-      next, prev: PGcThread
-      stackBottom, stackTop: pointer
-      stackSize: int
-    else:
-      nil
-
-# XXX it'd be more efficient to not use a global variable for the 
-# thread storage slot, but to rely on the implementation to assign slot X
-# for us... ;-)
-var globalsSlot = ThreadVarAlloc()
-#const globalsSlot = TThreadVarSlot(0)
-#sysAssert checkSlot.int == globalsSlot.int
-
-when emulatedThreadVars:
-  proc GetThreadLocalVars(): pointer {.compilerRtl, inl.} =
-    result = addr(cast[PGcThread](ThreadVarGetValue(globalsSlot)).tls)
-
-when useStackMaskHack:
-  proc MaskStackPointer(offset: int): pointer {.compilerRtl, inl.} =
-    var x {.volatile.}: pointer
-    x = addr(x)
-    result = cast[pointer]((cast[int](x) and not ThreadStackMask) +% 
-      (0) +% offset)
-
-# create for the main thread. Note: do not insert this data into the list
-# of all threads; it's not to be stopped etc.
-when not defined(useNimRtl):
-  
-  when not useStackMaskHack:
-    var mainThread: TGcThread
-    ThreadVarSetValue(globalsSlot, addr(mainThread))
-    when not defined(createNimRtl): initStackBottom()
-    initGC()
-    
-  when emulatedThreadVars:
-    if NimThreadVarsSize() > sizeof(TThreadLocalStorage):
-      echo "too large thread local storage size requested"
-      quit 1
-  
-  when hasSharedHeap and not defined(boehmgc) and not defined(nogc):
-    var
-      threadList: PGcThread
-      
-    proc registerThread(t: PGcThread) = 
-      # we need to use the GC global lock here!
-      AcquireSys(HeapLock)
-      t.prev = nil
-      t.next = threadList
-      if threadList != nil: 
-        sysAssert(threadList.prev == nil, "threadList.prev == nil")
-        threadList.prev = t
-      threadList = t
-      ReleaseSys(HeapLock)
-    
-    proc unregisterThread(t: PGcThread) =
-      # we need to use the GC global lock here!
-      AcquireSys(HeapLock)
-      if t == threadList: threadList = t.next
-      if t.next != nil: t.next.prev = t.prev
-      if t.prev != nil: t.prev.next = t.next
-      # so that a thread can be unregistered twice which might happen if the
-      # code executes `destroyThread`:
-      t.next = nil
-      t.prev = nil
-      ReleaseSys(HeapLock)
-      
-    # on UNIX, the GC uses ``SIGFREEZE`` to tell every thread to stop so that
-    # the GC can examine the stacks?
-    proc stopTheWord() = nil
-    
-# We jump through some hops here to ensure that Nimrod thread procs can have
-# the Nimrod calling convention. This is needed because thread procs are 
-# ``stdcall`` on Windows and ``noconv`` on UNIX. Alternative would be to just
-# use ``stdcall`` since it is mapped to ``noconv`` on UNIX anyway.
-
-type
-  TThread* {.pure, final.}[TArg] =
-      object of TGcThread ## Nimrod thread. A thread is a heavy object (~14K)
-                          ## that **must not** be part of a message! Use
-                          ## a ``TThreadId`` for that.
-    when TArg is void:
-      dataFn: proc () {.nimcall.}
-    else:
-      dataFn: proc (m: TArg) {.nimcall.}
-      data: TArg
-  TThreadId*[TArg] = ptr TThread[TArg] ## the current implementation uses
-                                       ## a pointer as a thread ID.
-
-when not defined(boehmgc) and not hasSharedHeap:
-  proc deallocOsPages()
-
-template ThreadProcWrapperBody(closure: expr) {.immediate.} =
-  when defined(globalsSlot): ThreadVarSetValue(globalsSlot, closure)
-  var t = cast[ptr TThread[TArg]](closure)
-  when useStackMaskHack:
-    var tls: TThreadLocalStorage
-  when not defined(boehmgc) and not defined(nogc) and not hasSharedHeap:
-    # init the GC for this thread:
-    setStackBottom(addr(t))
-    initGC()
-  when defined(registerThread):
-    t.stackBottom = addr(t)
-    registerThread(t)
-  when TArg is void: t.dataFn()
-  else: t.dataFn(t.data)
-  when defined(registerThread): unregisterThread(t)
-  when defined(deallocOsPages): deallocOsPages()
-  # Since an unhandled exception terminates the whole process (!), there is
-  # no need for a ``try finally`` here, nor would it be correct: The current
-  # exception is tried to be re-raised by the code-gen after the ``finally``!
-  # However this is doomed to fail, because we already unmapped every heap
-  # page!
-  
-  # mark as not running anymore:
-  t.dataFn = nil
-  
-{.push stack_trace:off.}
-when defined(windows):
-  proc threadProcWrapper[TArg](closure: pointer): int32 {.stdcall.} = 
-    ThreadProcWrapperBody(closure)
-    # implicitely return 0
-else:
-  proc threadProcWrapper[TArg](closure: pointer) {.noconv.} = 
-    ThreadProcWrapperBody(closure)
-{.pop.}
-
-proc running*[TArg](t: TThread[TArg]): bool {.inline.} = 
-  ## returns true if `t` is running.
-  result = t.dataFn != nil
-
-proc joinThread*[TArg](t: TThread[TArg]) {.inline.} = 
-  ## waits for the thread `t` to finish.
-  when hostOS == "windows":
-    discard WaitForSingleObject(t.sys, -1'i32)
-  else:
-    discard pthread_join(t.sys, nil)
-
-proc joinThreads*[TArg](t: varargs[TThread[TArg]]) = 
-  ## waits for every thread in `t` to finish.
-  when hostOS == "windows":
-    var a: array[0..255, TSysThread]
-    sysAssert a.len >= t.len, "a.len >= t.len"
-    for i in 0..t.high: a[i] = t[i].sys
-    discard WaitForMultipleObjects(t.len.int32, 
-                                   cast[ptr TSysThread](addr(a)), 1, -1)
-  else:
-    for i in 0..t.high: joinThread(t[i])
-
-when false:
-  # XXX a thread should really release its heap here somehow:
-  proc destroyThread*[TArg](t: var TThread[TArg]) =
-    ## forces the thread `t` to terminate. This is potentially dangerous if
-    ## you don't have full control over `t` and its acquired resources.
-    when hostOS == "windows":
-      discard TerminateThread(t.sys, 1'i32)
-    else:
-      discard pthread_cancel(t.sys)
-    when defined(registerThread): unregisterThread(addr(t))
-    t.dataFn = nil
-
-proc createThread*[TArg](t: var TThread[TArg], 
-                         tp: proc (arg: TArg) {.thread.}, 
-                         param: TArg) =
-  ## creates a new thread `t` and starts its execution. Entry point is the
-  ## proc `tp`. `param` is passed to `tp`. `TArg` can be ``void`` if you
-  ## don't need to pass any data to the thread.
-  when TArg isnot void: t.data = param
-  t.dataFn = tp
-  when hasSharedHeap: t.stackSize = ThreadStackSize
-  when hostOS == "windows":
-    var dummyThreadId: int32
-    t.sys = CreateThread(nil, ThreadStackSize, threadProcWrapper[TArg],
-                         addr(t), 0'i32, dummyThreadId)
-    if t.sys <= 0:
-      raise newException(EResourceExhausted, "cannot create thread")
-  else:
-    var a {.noinit.}: Tpthread_attr
-    pthread_attr_init(a)
-    pthread_attr_setstacksize(a, ThreadStackSize)
-    if pthread_create(t.sys, a, threadProcWrapper[TArg], addr(t)) != 0:
-      raise newException(EResourceExhausted, "cannot create thread")
-
-proc threadId*[TArg](t: var TThread[TArg]): TThreadId[TArg] {.inline.} =
-  ## returns the thread ID of `t`.
-  result = addr(t)
-
-proc myThreadId*[TArg](): TThreadId[TArg] =
-  ## returns the thread ID of the thread that calls this proc. This is unsafe
-  ## because the type ``TArg`` is not checked for consistency!
-  result = cast[TThreadId[TArg]](ThreadVarGetValue(globalsSlot))
-
-when false:
-  proc mainThreadId*[TArg](): TThreadId[TArg] =
-    ## returns the thread ID of the main thread.
-    result = cast[TThreadId[TArg]](addr(mainThread))
-
-when useStackMaskHack:
-  proc runMain(tp: proc () {.thread.}) {.compilerproc.} =
-    var mainThread: TThread[pointer]
-    createThread(mainThread, tp)
-    joinThread(mainThread)
-
diff --git a/lib/system/timers.nim b/lib/system/timers.nim
index fa1a13a5f..ffb0f7716 100644
--- a/lib/system/timers.nim
+++ b/lib/system/timers.nim
@@ -1,6 +1,6 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2012 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
@@ -8,86 +8,92 @@
 #
 
 ## Timer support for the realtime GC. Based on
-## `<https://github.com/jckarter/clay/blob/master/compiler/src/hirestimer.cpp>`_
+## `<https://github.com/jckarter/clay/blob/master/compiler/hirestimer.cpp>`_
 
 type
-  TTicks = distinct int64
-  TNanos = int64
+  Ticks = distinct int64
+  Nanos = int64
 
 when defined(windows):
 
-  proc QueryPerformanceCounter(res: var TTicks) {.
+  proc QueryPerformanceCounter(res: var Ticks) {.
     importc: "QueryPerformanceCounter", stdcall, dynlib: "kernel32".}
   proc QueryPerformanceFrequency(res: var int64) {.
     importc: "QueryPerformanceFrequency", stdcall, dynlib: "kernel32".}
 
-  proc getTicks(): TTicks {.inline.} =
+  proc getTicks(): Ticks {.inline.} =
     QueryPerformanceCounter(result)
 
-  proc `-`(a, b: TTicks): TNanos =
+  proc `-`(a, b: Ticks): Nanos =
     var frequency: int64
     QueryPerformanceFrequency(frequency)
-    var performanceCounterRate = 1000000000.0 / toFloat(frequency.int)
+    var performanceCounterRate = 1e+9'f64 / float64(frequency)
 
-    result = ((a.int64 - b.int64).int.toFloat * performanceCounterRate).TNanos
+    result = Nanos(float64(a.int64 - b.int64) * performanceCounterRate)
 
-elif defined(macosx):
+elif defined(macosx) and not defined(emscripten):
   type
-    TMachTimebaseInfoData {.pure, final, 
-        importc: "mach_timebase_info_data_t", 
+    MachTimebaseInfoData {.pure, final,
+        importc: "mach_timebase_info_data_t",
         header: "<mach/mach_time.h>".} = object
-      numer, denom: int32
+      numer, denom: int32 # note: `uint32` in sources
 
-  proc mach_absolute_time(): int64 {.importc, header: "<mach/mach.h>".}
-  proc mach_timebase_info(info: var TMachTimebaseInfoData) {.importc,
+  proc mach_absolute_time(): uint64 {.importc, header: "<mach/mach_time.h>".}
+  proc mach_timebase_info(info: var MachTimebaseInfoData) {.importc,
     header: "<mach/mach_time.h>".}
 
-  proc getTicks(): TTicks {.inline.} =
-    result = TTicks(mach_absolute_time())
-  
-  var timeBaseInfo: TMachTimebaseInfoData
+  proc getTicks(): Ticks {.inline.} =
+    result = Ticks(mach_absolute_time())
+
+  var timeBaseInfo: MachTimebaseInfoData
   mach_timebase_info(timeBaseInfo)
-    
-  proc `-`(a, b: TTicks): TNanos =
-    result = (a.int64 - b.int64)  * timeBaseInfo.numer div timeBaseInfo.denom
+
+  proc `-`(a, b: Ticks): Nanos =
+    result = (a.int64 - b.int64) * timeBaseInfo.numer div timeBaseInfo.denom
 
 elif defined(posixRealtime):
   type
-    TClockid {.importc: "clockid_t", header: "<time.h>", final.} = object
+    Clockid {.importc: "clockid_t", header: "<time.h>", final.} = object
 
-    TTimeSpec {.importc: "struct timespec", header: "<time.h>", 
+    TimeSpec {.importc: "struct timespec", header: "<time.h>",
                final, pure.} = object ## struct timespec
-      tv_sec: int  ## Seconds. 
-      tv_nsec: int ## Nanoseconds. 
+      tv_sec: int  ## Seconds.
+      tv_nsec: int ## Nanoseconds.
 
   var
-    CLOCK_REALTIME {.importc: "CLOCK_REALTIME", header: "<time.h>".}: TClockid
+    CLOCK_REALTIME {.importc: "CLOCK_REALTIME", header: "<time.h>".}: Clockid
 
-  proc clock_gettime(clkId: TClockid, tp: var TTimespec) {.
+  proc clock_gettime(clkId: Clockid, tp: var Timespec) {.
     importc: "clock_gettime", header: "<time.h>".}
 
-  proc getTicks(): TTicks =
-    var t: TTimespec
+  proc getTicks(): Ticks =
+    var t: Timespec
     clock_gettime(CLOCK_REALTIME, t)
-    result = TTicks(int64(t.tv_sec) * 1000000000'i64 + int64(t.tv_nsec))
+    result = Ticks(int64(t.tv_sec) * 1000000000'i64 + int64(t.tv_nsec))
 
-  proc `-`(a, b: TTicks): TNanos {.borrow.}
+  proc `-`(a, b: Ticks): Nanos {.borrow.}
 
 else:
-  # fallback Posix implementation:  
+  # fallback Posix implementation:
+  when not declared(Time):
+    when defined(linux):
+      type Time = clong
+    else:
+      type Time = int
+
   type
-    Ttimeval {.importc: "struct timeval", header: "<sys/select.h>", 
+    Timeval {.importc: "struct timeval", header: "<sys/select.h>",
                final, pure.} = object ## struct timeval
-      tv_sec: int  ## Seconds. 
-      tv_usec: int ## Microseconds. 
-        
-  proc posix_gettimeofday(tp: var Ttimeval, unused: pointer = nil) {.
+      tv_sec: Time  ## Seconds.
+      tv_usec: clong ## Microseconds.
+
+  proc posix_gettimeofday(tp: var Timeval, unused: pointer = nil) {.
     importc: "gettimeofday", header: "<sys/time.h>".}
 
-  proc getTicks(): TTicks =
-    var t: Ttimeval
+  proc getTicks(): Ticks =
+    var t: Timeval
     posix_gettimeofday(t)
-    result = TTicks(int64(t.tv_sec) * 1000_000_000'i64 + 
+    result = Ticks(int64(t.tv_sec) * 1000_000_000'i64 +
                     int64(t.tv_usec) * 1000'i64)
 
-  proc `-`(a, b: TTicks): TNanos {.borrow.}
+  proc `-`(a, b: Ticks): Nanos {.borrow.}
diff --git a/lib/system/widestrs.nim b/lib/system/widestrs.nim
deleted file mode 100644
index 588093d10..000000000
--- a/lib/system/widestrs.nim
+++ /dev/null
@@ -1,260 +0,0 @@
-#
-#
-#            Nimrod's Runtime Library
-#        (c) Copyright 2012 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## Nimrod support for C/C++'s `wide strings`:idx:. This is part of the system
-## module! Do not import it directly!
-
-type
-  TUtf16Char* = distinct int16
-  WideCString* = ptr array[0.. 1_000_000, TUtf16Char]
-
-proc len*(w: WideCString): int =
-  ## returns the length of a widestring. This traverses the whole string to
-  ## find the binary zero end marker!
-  while int16(w[result]) != 0'i16: inc result
-
-when true:
-  const
-    UNI_REPLACEMENT_CHAR = TUtf16Char(0xFFFD'i16)
-    UNI_MAX_BMP = 0x0000FFFF
-    UNI_MAX_UTF16 = 0x0010FFFF
-    UNI_MAX_UTF32 = 0x7FFFFFFF
-    UNI_MAX_LEGAL_UTF32 = 0x0010FFFF
-
-    halfShift = 10
-    halfBase = 0x0010000
-    halfMask = 0x3FF
-
-    UNI_SUR_HIGH_START = 0xD800
-    UNI_SUR_HIGH_END = 0xDBFF
-    UNI_SUR_LOW_START = 0xDC00
-    UNI_SUR_LOW_END = 0xDFFF
-
-  template ones(n: expr): expr = ((1 shl n)-1)
-
-  template fastRuneAt(s: cstring, i: int, result: expr, doInc = true) =
-    ## Returns the unicode character ``s[i]`` in `result`. If ``doInc == true``
-    ## `i` is incremented by the number of bytes that have been processed.
-    bind ones
-
-    if ord(s[i]) <=% 127:
-      result = ord(s[i])
-      when doInc: inc(i)
-    elif ord(s[i]) shr 5 == 0b110:
-      #assert(ord(s[i+1]) shr 6 == 0b10)
-      result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))
-      when doInc: inc(i, 2)
-    elif ord(s[i]) shr 4 == 0b1110:
-      #assert(ord(s[i+1]) shr 6 == 0b10)
-      #assert(ord(s[i+2]) shr 6 == 0b10)
-      result = (ord(s[i]) and ones(4)) shl 12 or
-               (ord(s[i+1]) and ones(6)) shl 6 or
-               (ord(s[i+2]) and ones(6))
-      when doInc: inc(i, 3)
-    elif ord(s[i]) shr 3 == 0b11110:
-      #assert(ord(s[i+1]) shr 6 == 0b10)
-      #assert(ord(s[i+2]) shr 6 == 0b10)
-      #assert(ord(s[i+3]) shr 6 == 0b10)
-      result = (ord(s[i]) and ones(3)) shl 18 or
-               (ord(s[i+1]) and ones(6)) shl 12 or
-               (ord(s[i+2]) and ones(6)) shl 6 or
-               (ord(s[i+3]) and ones(6))
-      when doInc: inc(i, 4)
-    else:
-      result = 0xFFFD
-      when doInc: inc(i)
-
-  iterator runes(s: cstring): int =
-    var
-      i = 0
-      result: int
-    while s[i] != '\0':
-      fastRuneAt(s, i, result, true)
-      yield result
-
-  proc allocWideCString*(source: cstring, L: int): WideCString =
-    ## free after usage with `dealloc`.
-    result = cast[wideCString](alloc(L * 4 + 2))
-    var d = 0
-    for ch in runes(source):
-      if ch <=% UNI_MAX_BMP:
-        if ch >=% UNI_SUR_HIGH_START and ch <=% UNI_SUR_LOW_END:
-          result[d] = UNI_REPLACEMENT_CHAR
-        else:
-          result[d] = TUtf16Char(toU16(ch))
-      elif ch >% UNI_MAX_UTF16:
-        result[d] = UNI_REPLACEMENT_CHAR
-      else:
-        let ch = ch -% halfBase
-        result[d] = TUtf16Char(toU16((ch shr halfShift) +% UNI_SUR_HIGH_START))
-        inc d
-        result[d] = TUtf16Char(toU16((ch and halfMask) +% UNI_SUR_LOW_START))
-      inc d
-    result[d] = TUtf16Char(0'i16)
-
-  proc allocWideCString*(s: cstring): WideCString =
-    ## free after usage with `dealloc`.
-    if s.isNil: return nil
-
-    when not defined(c_strlen):
-      proc c_strlen(a: CString): int {.nodecl, noSideEffect, importc: "strlen".}
-
-    let L = cstrlen(s)
-    result = allocWideCString(s, L)
-
-  proc allocWideCString*(s: string): WideCString =
-    ## free after usage with `dealloc`.
-    result = allocWideCString(s, s.len)
-
-  proc `$`*(w: wideCString, estimate: int): string =
-    result = newStringOfCap(estimate + estimate shr 2)
-
-    var i = 0
-    while w[i].int16 != 0'i16:
-      var ch = w[i].int
-      inc i
-      if ch >=% UNI_SUR_HIGH_START and ch <=% UNI_SUR_HIGH_END:
-        # If the 16 bits following the high surrogate are in the source buffer...
-        let ch2 = w[i].int
-        # If it's a low surrogate, convert to UTF32:
-        if ch2 >=% UNI_SUR_LOW_START and ch2 <=% UNI_SUR_LOW_END:
-          ch = ((ch -% UNI_SUR_HIGH_START) shr halfShift) +%
-                (ch2 -% UNI_SUR_LOW_START) +% halfBase
-          inc i
-          
-      if ch <=% 127:
-        result.add chr(ch)
-      elif ch <=% 0x07FF:
-        result.add chr((ch shr 6) or 0b110_00000)
-        result.add chr((ch and ones(6)) or 0b10_000000)
-      elif ch <=% 0xFFFF:
-        result.add chr(ch shr 12 or 0b1110_0000)
-        result.add chr(ch shr 6 and ones(6) or 0b10_0000_00)
-        result.add chr(ch and ones(6) or 0b10_0000_00)
-      elif ch <=% 0x0010FFFF:
-        result.add chr(ch shr 18 or 0b1111_0000)
-        result.add chr(ch shr 12 and ones(6) or 0b10_0000_00)
-        result.add chr(ch shr 6 and ones(6) or 0b10_0000_00)
-        result.add chr(ch and ones(6) or 0b10_0000_00)
-      else:
-        # replacement char:
-        result.add chr(0xFFFD shr 12 or 0b1110_0000)
-        result.add chr(0xFFFD shr 6 and ones(6) or 0b10_0000_00)
-        result.add chr(0xFFFD and ones(6) or 0b10_0000_00)
-
-  proc `$`*(s: WideCString): string =
-    result = s $ 80
-
-else:
-  const
-    utf8Encoding = 65001
-    
-  proc MultiByteToWideChar*(
-    CodePage: int32,
-    dwFlags: int32,
-    lpMultiByteStr: cstring,
-    cbMultiByte: cint,
-    lpWideCharStr: WideCString,
-    cchWideChar: cint): cint {.
-      stdcall, importc: "MultiByteToWideChar", dynlib: "kernel32".}
-
-  proc WideCharToMultiByte*(
-    CodePage: int32,
-    dwFlags: int32,
-    lpWideCharStr: WideCString,
-    cchWideChar: cint,
-    lpMultiByteStr: cstring,
-    cbMultiByte: cint,
-    lpDefaultChar: cstring=nil,
-    lpUsedDefaultChar: pointer=nil): cint {.
-      stdcall, importc: "WideCharToMultiByte", dynlib: "kernel32".}
-
-  proc raiseEncodingError() {.noinline, noreturn.} =
-    raise newException(EOS, "error in unicode conversion")
-
-  proc `$`*(s: WideCString, len: int): string =
-    # special case: empty string: needed because MultiByteToWideChar
-    # returns 0 in case of error:
-    if len == 0: return ""
-
-    # educated guess of capacity:
-    var cap = len + len shr 2
-    result = newStringOfCap(cap)
-    
-    let m = WideCharToMultiByte(
-      CodePage = utf8Encoding,
-      dwFlags = 0'i32,
-      lpWideCharStr = s,
-      cchWideChar = cint(len),
-      lpMultiByteStr = cstring(result),
-      cbMultiByte = cap)
-    if m == 0:
-      # try again; ask for capacity:
-      cap = WideCharToMultiByte(
-        CodePage = utf8Encoding,
-        dwFlags = 0'i32,
-        lpWideCharStr = s,
-        cchWideChar = cint(len),
-        lpMultiByteStr = nil,
-        cbMultiByte = cint(0))
-      # and do the conversion properly:
-      result = newStringOfCap(cap)
-      let m = WideCharToMultiByte(
-        CodePage = utf8Encoding,
-        dwFlags = 0'i32,
-        lpWideCharStr = s,
-        cchWideChar = cint(len),
-        lpMultiByteStr = cstring(result),
-        cbMultiByte = cap)
-      if m == 0: raiseEncodingError()
-      setLen(result, m)
-    elif m <= cap:
-      setLen(result, m)
-    else:
-      sysAssert(false, "") # cannot happen
-    
-  proc `$`*(s: WideCString): string =
-    result = s $ s.len
-    
-  proc allocWideCString*(s: string): WideCString =
-    ## free after usage with `dealloc`.
-    let cap = s.len+1
-    result = cast[wideCString](alloc0(cap * 2))
-    # special case: empty string: needed because MultiByteToWideChar
-    # return 0 in case of error:
-    if s.len == 0: return
-    # convert to utf-16 LE
-    let m = MultiByteToWideChar(CodePage = utf8Encoding, dwFlags = 0'i32, 
-                                lpMultiByteStr = cstring(s),
-                                cbMultiByte = cint(s.len),
-                                lpWideCharStr = result,
-                                cchWideChar = cint(cap))
-    if m == 0: raiseEncodingError()
-
-  proc allocWideCString*(s: cstring): WideCString =
-    ## free after usage with `dealloc`.
-    if s.isNil: return nil
-
-    when not defined(c_strlen):
-      proc c_strlen(a: CString): int {.nodecl, noSideEffect, importc: "strlen".}
-
-    let len = cstrlen(s)
-    let cap = len+1
-    result = cast[wideCString](alloc0(cap * 2))
-    # special case: empty string: needed because MultiByteToWideChar
-    # return 0 in case of error:
-    if s.len == 0: return
-    # convert to utf-16 LE
-    let m = MultiByteToWideChar(CodePage = utf8Encoding, dwFlags = 0'i32, 
-                                lpMultiByteStr = s,
-                                cbMultiByte = cint(len),
-                                lpWideCharStr = result,
-                                cchWideChar = cint(cap))
-    if m == 0: raiseEncodingError()
-